From bed1a1474ca4ffaaefbf59a64ef4b18be734f463 Mon Sep 17 00:00:00 2001 From: Tyler Perkins Date: Sun, 19 Sep 2021 18:21:27 -0400 Subject: [PATCH] Add rss_utils --- src/util/rss.cpp | 610 +++++++++++++++++++++++++++++++++++++++++++++++ src/util/rss.hpp | 123 ++++++++++ 2 files changed, 733 insertions(+) create mode 100644 src/util/rss.cpp create mode 100644 src/util/rss.hpp diff --git a/src/util/rss.cpp b/src/util/rss.cpp new file mode 100644 index 0000000..3650668 --- /dev/null +++ b/src/util/rss.cpp @@ -0,0 +1,610 @@ +/////////////////////////////////////////////////////////////////////////////// +// Tyler Perkins +// 7-23-21 +// rss implementation +// + +#include "rss.hpp" + +using namespace rss_utils; + +rss::rss() { + _uri = ""; + _ok = false; + _item_node = nullptr; +} + +rss::rss(const std::string& rss_uri, bool perf_update){ + _uri = rss_uri; + _item_node = nullptr; + _ok = false; + if(perf_update) + update(); +} + +rss::rss(const char* rss_uri, bool perf_update){ + _uri = std::string(rss_uri); + _ok = false; + _item_node = nullptr; + if(perf_update) + update(); +} + +rss::rss(const rss& rhs){ + _uri = rhs._uri; + _ok = false; + _item_node = nullptr; + update(); +} + +rss::~rss(){ + _doc.clear(); +} + +rss& rss::operator=(const rss& rhs){ + if(this != &rhs){ + _uri = rhs._uri; + _ok = false; + _item_node = nullptr; + update(); + } + return *this; +} + +rss* rss::clone() const { + rss* clone = new rss(*this); + return clone; +} + +bool rss::isOk() const { return _ok; } + +void rss::setURI(const std::string& rss_uri, bool perf_update) { + _uri = rss_uri; + _ok = false; + if(perf_update) + update(); +} + +void rss::setURI(const char* rss_uri, bool perf_update) { + _uri = std::string(rss_uri); + _ok = false; + if(perf_update) + update(); +} + +std::string rss::getURI() const { return _uri; } + +bool rss::update() { + std::string result; + + if(_uri == "-"){ + std::getline(std::cin, result); + return parse(result); + } else { + CURL *curl; + CURLcode res; + curl = curl_easy_init(); + + if(curl){ + curl_easy_setopt(curl, CURLOPT_URL, _uri.c_str()); + curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, rss_utils::write_to_string); + curl_easy_setopt(curl, CURLOPT_WRITEDATA, &result); + res = curl_easy_perform(curl); + curl_easy_cleanup(curl); + + if(res == CURLE_OK){ + return parse(result); + } else { + std::cerr << "curl_easy_perform(curl) failed: " + << curl_easy_strerror(res) << std::endl; + } + } + _ok = false; + return false; + } +} + +std::string rss::getTitle() const { + if(!_ok) + return ""; + + rapidxml::xml_node<> *tmp = _item_node->first_node("title"); + if(tmp == 0) + return ""; + return cdata_to_string(tmp); +} + +std::string rss::getLink() const { + if(!_ok) + return ""; + + rapidxml::xml_node<> *tmp = _item_node->first_node("link"); + if(tmp == 0) + return ""; + return cdata_to_string(tmp); +} + +std::string rss::getDescription() const { + if(!_ok) + return ""; + + rapidxml::xml_node<> *tmp = _item_node->first_node("description"); + if(tmp == 0) + return ""; + return cdata_to_string(tmp); +} + +std::string rss::getLanguage() const { + if(!_ok) + return ""; + + rapidxml::xml_node<> *tmp = _item_node->first_node("language"); + if(tmp == 0) + return ""; + return cdata_to_string(tmp); +} + +std::string rss::getWebMaster() const { + if(!_ok) + return ""; + + rapidxml::xml_node<> *tmp = _item_node->first_node("webMaster"); + if(tmp == 0) + return ""; + return cdata_to_string(tmp); +} + +std::string rss::getCopyright() const { + if(!_ok) + return ""; + + rapidxml::xml_node<> *tmp = _item_node->first_node("copyright"); + if(tmp == 0) + return ""; + return cdata_to_string(tmp); +} + +std::string rss::getPubDate() const { + if(!_ok) + return ""; + + rapidxml::xml_node<> *tmp = _item_node->first_node("pubDate"); + if(tmp == 0) + return ""; + return cdata_to_string(tmp); + +} + +std::string rss::getManagingEditor() const { + if(!_ok) + return ""; + + rapidxml::xml_node<> *tmp = _item_node->first_node("managingEditor"); + if(tmp == 0) + return ""; + return cdata_to_string(tmp); +} + +std::string rss::getGenerator() const { + if(!_ok) + return ""; + + rapidxml::xml_node<> *tmp = _item_node->first_node("generator"); + if(tmp == 0) + return ""; + return cdata_to_string(tmp); +} + +std::string rss::getDocs() const { + if(!_ok) + return ""; + + rapidxml::xml_node<> *tmp = _item_node->first_node("docs"); + if(tmp == 0) + return ""; + return cdata_to_string(tmp); +} + +std::string rss::getTTL() const { + if(!_ok) + return ""; + + rapidxml::xml_node<> *tmp = _item_node->first_node("ttl"); + if(tmp == 0) + return ""; + return cdata_to_string(tmp); + +} + +std::string rss::getLastBuildDate() const { + if(!_ok) + return ""; + + rapidxml::xml_node<> *tmp = _item_node->first_node("lastBuildDate"); + if(tmp == 0) + return ""; + return cdata_to_string(tmp); +} + +std::string rss::getImageURL() const { + if(!_ok) + return ""; + + rapidxml::xml_node<> *tmp = _item_node->first_node("image"); + if(tmp == 0) + return ""; + tmp = tmp->first_node("url"); + if(tmp == 0) + return ""; + return cdata_to_string(tmp); +} + +std::string rss::getImageTitle() const { + if(!_ok) + return ""; + + rapidxml::xml_node<> *tmp = _item_node->first_node("image"); + if(tmp == 0) + return ""; + tmp = tmp->first_node("title"); + if(tmp == 0) + return ""; + return cdata_to_string(tmp); +} + +std::string rss::getImageLink() const { + if(!_ok) + return ""; + + rapidxml::xml_node<> *tmp = _item_node->first_node("image"); + if(tmp == 0) + return ""; + tmp = tmp->first_node("link"); + if(tmp == 0) + return ""; + return cdata_to_string(tmp); +} + +int rss::getImageWidth() const { + if(!_ok) + return 0; + + rapidxml::xml_node<> *tmp = _item_node->first_node("image"); + if(tmp == 0) + return 0; + tmp = tmp->first_node("width"); + if(tmp == 0) + return 0; + return atoi(tmp->value()); +} + +int rss::getImageHeight() const { + if(!_ok) + return 0; + + rapidxml::xml_node<> *tmp = _item_node->first_node("image"); + if(tmp == 0) + return 0; + tmp = tmp->first_node("height"); + if(tmp == 0) + return 0; + return atoi(tmp->value()); +} + +std::string rss::getCloudDomain() const { + if(!_ok) + return ""; + + rapidxml::xml_node<> *tmp = _item_node->first_node("cloud"); + if(tmp == 0) + return ""; + rapidxml::xml_attribute<> *attr = tmp->first_attribute("domain"); + if(attr == 0) + return ""; + + return attr->value(); +} + +int rss::getCloudPort() const { + if(!_ok) + return 0; + + rapidxml::xml_node<> *tmp = _item_node->first_node("cloud"); + if(tmp == 0) + return 0; + rapidxml::xml_attribute<> *attr = tmp->first_attribute("port"); + if(attr == 0) + return 0; + + return atoi(attr->value()); +} + +std::string rss::getCloudPath() const { + if(!_ok) + return ""; + + rapidxml::xml_node<> *tmp = _item_node->first_node("cloud"); + if(tmp == 0) + return ""; + rapidxml::xml_attribute<> *attr = tmp->first_attribute("path"); + if(attr == 0) + return ""; + + return attr->value(); +} + +std::string rss::getCloudRegisterProcedure() const { + if(!_ok) + return ""; + + rapidxml::xml_node<> *tmp = _item_node->first_node("cloud"); + if(tmp == 0) + return ""; + rapidxml::xml_attribute<> *attr = tmp->first_attribute("registerProcedure"); + if(attr == 0) + return ""; + + return attr->value(); +} + +std::string rss::getCloudProtocol() const { + if(!_ok) + return ""; + + rapidxml::xml_node<> *tmp = _item_node->first_node("cloud"); + if(tmp == 0) + return ""; + rapidxml::xml_attribute<> *attr = tmp->first_attribute("protocol"); + if(attr == 0) + return ""; + + return attr->value(); +} + +int rss::getItemCount() const { + if(!_ok) + return -1; + + return _items.size(); +} + +std::vector rss::getItems() { + if(!_ok) + return std::vector(); + + if(!_items.empty()) + return _items; + + //items is empty, have not processed the items yet + std::vector*> items; + items = parseItems(); + for(auto it = items.begin(); it != items.end(); ++it){ + item i(*it); + _items.push_back(i); + } + + return _items; +} + +item& rss::getItem(const int index) { + getItems(); + return _items[index]; +} + +const item& rss::getItem(const int index) const { + return _items[index]; +} + +item& rss::operator[](const int index) { + return getItem(index); +} + +const item& rss::operator[](const int index) const { + return getItem(index); +} + +std::vector*> rss::parseItems() { + std::vector*> items; + + if(!_ok) + return items; + + rapidxml::xml_node<> *first_item = _item_node->first_node("item"); + //there are no items, return empty vector + if(first_item == 0) + return items; + + for(; first_item != NULL; first_item = first_item->next_sibling()) + items.push_back(first_item); + + return items; +} + +bool rss::parse(const std::string& rss_str){ + _ok = true; + + //parse, check for errors in xml doc + try { + _doc.parse<0>(_doc.allocate_string(rss_str.c_str())); + } catch (const std::runtime_error& e){ + std::cerr << "rss::parse() runtime error: " << e.what() << std::endl; + _ok = false; + } catch (const rapidxml::parse_error& e){ + std::cerr << "rss::parse() error with file: " << e.what() << std::endl; + _ok = false; + } + + //verify that it is an rss file + rapidxml::xml_node<> *tmp = _doc.first_node("rss"); + if(tmp == NULL){ + std::cerr << "rss::parse() error with file: XML is not an rss feed!" << std::endl; + _ok = false; + } else { + tmp = tmp->first_node("channel"); + if(tmp == NULL){ + std::cerr << "rss::parse() error with file: RSS feed has no channels!" + << std::endl; + _ok = false; + } + } + + if(_ok){ + _item_node = tmp; + } + return _ok; +} + +std::string rss::cdata_to_string(const rapidxml::xml_node<>* node) const{ + //this will dig till were past the cdata + const rapidxml::xml_node<>* tmp = node; + while(tmp->value()[0] == '\0'){ //if string is empty + tmp = tmp->first_node(); + if(tmp == NULL) + return ""; + } + return tmp->value(); +} + +size_t rss_utils::write_to_string(void* ptr, size_t size, size_t nmemb, std::string* s){ + s->append(static_cast(ptr), size * nmemb); + return size * nmemb; +} + +std::string item::cdata_to_string(const rapidxml::xml_node<>* node) const{ + //this will dig till were past the cdata + const rapidxml::xml_node<>* tmp = node; + while(tmp->value()[0] == '\0'){ //if string is empty + tmp = tmp->first_node(); + if(tmp == NULL) + return ""; + } + return tmp->value(); +} + +item::item() {} +item::item(rapidxml::xml_node<>* node){ + _item = node; +} + +item::item(const item& rhs){ + _item = rhs._item; +} + +item::~item() {} + +item& item::operator=(const item& rhs){ + if(this != &rhs){ + _item = rhs._item; + } + return *this; +} + +item* item::clone() const { + item* clone = new item(*this); + return clone; +} + +std::string item::getTitle() const { + rapidxml::xml_node<>* tmp = _item->first_node("title"); + if(tmp == 0) + return ""; + return cdata_to_string(tmp); +} + +std::string item::getLink() const { + rapidxml::xml_node<>* tmp = _item->first_node("link"); + if(tmp == 0) + return ""; + return cdata_to_string(tmp); +} + +std::string item::getDescription() const { + rapidxml::xml_node<>* tmp = _item->first_node("description"); + if(tmp == 0) + return ""; + return cdata_to_string(tmp); +} + +std::string item::getAuthor() const { + rapidxml::xml_node<>* tmp = _item->first_node("author"); + if(tmp == 0) + return ""; + return cdata_to_string(tmp); +} + +std::string item::getCategory() const { + rapidxml::xml_node<>* tmp = _item->first_node("category"); + if(tmp == 0) + return ""; + return cdata_to_string(tmp); +} + +std::string item::getComments() const { + rapidxml::xml_node<>* tmp = _item->first_node("comments"); + if(tmp == 0) + return ""; + return cdata_to_string(tmp); +} + +std::string item::getGuid() const { + rapidxml::xml_node<>* tmp = _item->first_node("guid"); + if(tmp == 0) + return ""; + return cdata_to_string(tmp); +} + +bool item::getGuidPermaLink() const { + rapidxml::xml_node<>* tmp = _item->first_node("guid"); + if(tmp == 0) + return false; + rapidxml::xml_attribute<>* attr = tmp->first_attribute("isPermaLink"); + if(attr == 0) + return false; + else if(attr->value() == std::string("true")) + return true; + return false; +} + +std::string item::getPubDate() const { + rapidxml::xml_node<>* tmp = _item->first_node("pubDate"); + if(tmp == 0) + return ""; + return cdata_to_string(tmp); +} + +std::string item::getSource() const { + rapidxml::xml_node<>* tmp = _item->first_node("source"); + if(tmp == 0) + return ""; + return cdata_to_string(tmp); +} + +std::string item::getEnclosureURL() const { + rapidxml::xml_node<>* tmp = _item->first_node("enclosure"); + if(tmp == 0) + return ""; + rapidxml::xml_attribute<>* attr = tmp->first_attribute("url"); + if(attr == 0) + return ""; + return attr->value(); +} + +std::string item::getEnclosureType() const { + rapidxml::xml_node<>* tmp = _item->first_node("enclosure"); + if(tmp == 0) + return ""; + rapidxml::xml_attribute<>* attr = tmp->first_attribute("type"); + if(attr == 0) + return ""; + return attr->value(); +} + +int item::getEnclosureLength() const { + rapidxml::xml_node<>* tmp = _item->first_node("enclosure"); + if(tmp == 0) + return -1; + rapidxml::xml_attribute<>* attr = tmp->first_attribute("length"); + if(attr == 0) + return -1; + return atoi(attr->value()); +} diff --git a/src/util/rss.hpp b/src/util/rss.hpp new file mode 100644 index 0000000..32d1f20 --- /dev/null +++ b/src/util/rss.hpp @@ -0,0 +1,123 @@ +/////////////////////////////////////////////////////////////////////////////// +// Tyler Perkins +// 7-23-21 +// rss definition +// + +#pragma once + +#if _TESTS_ == 1 +#include "config.hpp" +#endif + +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +namespace rss_utils { + typedef std::vector> item_map; + class item; + class rss { + public: + rss(); + rss(const std::string&, bool = true); + rss(const char*, bool = true); + rss(const rss&); + ~rss(); + rss& operator=(const rss&); + rss* clone() const; + + bool isOk() const; + + void setURI(const std::string&, bool = true); + void setURI(const char*, bool = true); + std::string getURI() const; + + bool update(); + + std::string getTitle() const; + std::string getLink() const; + std::string getDescription() const; + std::string getLanguage() const; + std::string getWebMaster() const; + std::string getCopyright() const; + std::string getPubDate() const; + std::string getManagingEditor() const; + std::string getGenerator() const; + std::string getDocs() const; + std::string getTTL() const; + std::string getLastBuildDate() const; + + std::string getImageURL() const; + std::string getImageTitle() const; + std::string getImageLink() const; + int getImageWidth() const; + int getImageHeight() const; + + std::string getCloudDomain() const; + int getCloudPort() const; + std::string getCloudPath() const; + std::string getCloudRegisterProcedure() const; + std::string getCloudProtocol() const; + + int getItemCount() const; + std::vector getItems(); + + item& getItem(const int); + const item& getItem(const int) const; + item& operator[](const int); + const item& operator[](const int) const; + + + private: + bool parse(const std::string&); + std::vector*> parseItems(); + std::string cdata_to_string(const rapidxml::xml_node<>*) const; + + bool _ok; + std::string _uri; + rapidxml::xml_node<> *_item_node; + rapidxml::xml_document<> _doc; + std::vector _items; + }; + + class item { + public: + item(); + item(rapidxml::xml_node<>*); + item(const item&); + ~item(); + item& operator=(const item&); + item* clone() const; + + std::string getTitle() const; + std::string getLink() const; + std::string getDescription() const; + std::string getAuthor() const; + std::string getCategory() const; + std::string getComments() const; + std::string getGuid() const; + bool getGuidPermaLink() const; + std::string getPubDate() const; + std::string getSource() const; + + std::string getEnclosureURL() const; + std::string getEnclosureType() const; + int getEnclosureLength() const; + + + private: + std::string cdata_to_string(const rapidxml::xml_node<>*) const; + + rapidxml::xml_node<> *_item; + }; + + size_t write_to_string(void*, size_t, size_t, std::string*); +}