From e6c971332750c96ca1ac4b316efe5b71ad225895 Mon Sep 17 00:00:00 2001 From: Clortox Date: Fri, 23 Jul 2021 21:17:22 -0400 Subject: [PATCH] Add rss object and parsing --- src/config.hpp | 13 +++ src/main.cpp | 47 ++++++++++ src/rss.cpp | 232 +++++++++++++++++++++++++++++++++++++++++++++++++ src/rss.hpp | 71 +++++++++++++++ src/tests.cpp | 83 ++++++++++++++++++ src/tests.hpp | 18 ++++ 6 files changed, 464 insertions(+) create mode 100644 src/config.hpp create mode 100644 src/main.cpp create mode 100644 src/rss.cpp create mode 100644 src/rss.hpp create mode 100644 src/tests.cpp create mode 100644 src/tests.hpp diff --git a/src/config.hpp b/src/config.hpp new file mode 100644 index 0000000..0c6920f --- /dev/null +++ b/src/config.hpp @@ -0,0 +1,13 @@ +/////////////////////////////////////////////////////////////////////////////// +// Tyler Perkins +// 7-23-21 +// config files +// + +#pragma once + +/* Tests: + * Set to 1 to compile and run the test suite + */ + +#define _TESTS_ 1 diff --git a/src/main.cpp b/src/main.cpp new file mode 100644 index 0000000..553cad3 --- /dev/null +++ b/src/main.cpp @@ -0,0 +1,47 @@ +/////////////////////////////////////////////////////////////////////////////// +// Tyler Perkins +// 7-23-21 +// entry point +// + +#include +#include "config.hpp" +#include "rss.hpp" + +#if _TESTS_ == 1 +#include "tests.hpp" +#endif + +void help(char* progName){ + std::cout << "Usage: " << progName << "[-u FEED_URI]\n"; + std::cout << "Options:\n"; + std::cout << " [-u, --uri] URI of the rss stream\n\n"; + std::cout << " [-t, --title] Get title of channel\n"; + std::cout << " [-l, --link] Get link to channel\n"; + std::cout << " [-d, --description] Get Description of channel\n"; + std::cout << " [-e, --language] Get language code of channel\n"; + std::cout << " [-m, --webmaster] Get webMaster's email\n"; + std::cout << " [-c, --copyright] Get copyright\n"; + std::cout << " [-p, --pubdate] Get publishing date\n"; + std::cout << " [-q, --managingeditor] Get Managing Editor\n"; + std::cout << " [-g, --generator] Get generator of this feed\n"; + std::cout << " [-o, --docs] Get link to RSS documentation\n"; + std::cout << " [-w, --ttl] Get ttl, time that channel can be\n"; + std::cout << " cached before being updated\n"; + std::cout << " [-b, --builddate] Get last time the channel's\n"; + std::cout << " content changed\n"; + +} + +int main(int argc, char** argv) { +#if _TESTS_ == 1 + rss_utils::testRSS(); + return 0; +#endif + + + + + + +} diff --git a/src/rss.cpp b/src/rss.cpp new file mode 100644 index 0000000..1865c1f --- /dev/null +++ b/src/rss.cpp @@ -0,0 +1,232 @@ +/////////////////////////////////////////////////////////////////////////////// +// Tyler Perkins +// 7-23-21 +// rss implementation +// + +#include "rss.hpp" + +using namespace rss_utils; + +rss::rss() { + uri = ""; + ok = false; + item_node = nullptr; +} + +rss::rss(const std::string& rss_uri, bool perf_update){ + uri = rss_uri; + item_node = nullptr; + ok = false; + if(perf_update) + update(); +} + +rss::rss(const char* rss_uri, bool perf_update){ + uri = std::string(rss_uri); + ok = false; + item_node = nullptr; + if(perf_update) + update(); +} + +rss::rss(const rss& rhs){ + uri = rhs.uri; + ok = false; + item_node = nullptr; + update(); +} + +rss::~rss(){ + doc.clear(); +} + +rss& rss::operator=(const rss& rhs){ + if(this != &rhs){ + uri = rhs.uri; + ok = false; + item_node = nullptr; + update(); + } + return *this; +} + +rss* rss::clone() const { + rss* clone = new rss(*this); + return clone; +} + +bool rss::isOk() const { return ok; } + +void rss::setURI(const std::string& rss_uri, bool perf_update) { + uri = rss_uri; + ok = false; + if(perf_update) + update(); +} + +void rss::setURI(const char* rss_uri, bool perf_update) { + uri = std::string(rss_uri); + ok = false; + if(perf_update) + update(); +} + +std::string rss::getURI() const { return uri; } + +bool rss::update() { + std::string result; + + CURL *curl; + CURLcode res; + curl = curl_easy_init(); + + if(curl){ + curl_easy_setopt(curl, CURLOPT_URL, uri.c_str()); + curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, rss_utils::write_to_string); + curl_easy_setopt(curl, CURLOPT_WRITEDATA, &result); + res = curl_easy_perform(curl); + curl_easy_cleanup(curl); + + if(res == CURLE_OK){ + return parse(result); + } else { + std::cerr << "curl_easy_perform(curl) failed: " + << curl_easy_strerror(res) << std::endl; + } + } + ok = false; + return false; +} + +std::string rss::getTitle() const { + if(!ok) + return ""; + + rapidxml::xml_node<> *tmp = item_node->first_node("title"); + if(tmp == 0) + return ""; + return tmp->value(); +} + +std::string rss::getLink() const { + if(!ok) + return ""; + + rapidxml::xml_node<> *tmp = item_node->first_node("link"); + if(tmp == 0) + return ""; + return tmp->value(); +} + +std::string rss::getDescription() const { + if(!ok) + return ""; + + rapidxml::xml_node<> *tmp = item_node->first_node("description"); + if(tmp == 0) + return ""; + return tmp->value(); +} + +std::string rss::getLanguage() const { + if(!ok) + return ""; + + rapidxml::xml_node<> *tmp = item_node->first_node("language"); + if(tmp == 0) + return ""; + return tmp->value(); +} + +std::string rss::getWebMaster() const { + if(!ok) + return ""; + + rapidxml::xml_node<> *tmp = item_node->first_node("webMaster"); + if(tmp == 0) + return ""; + return tmp->value(); +} + +std::string rss::getCopyright() const { + if(!ok) + return ""; + + rapidxml::xml_node<> *tmp = item_node->first_node("copyright"); + if(tmp == 0) + return ""; + return tmp->value(); +} + +std::string rss::getPubDate() const { + if(!ok) + return ""; + + rapidxml::xml_node<> *tmp = item_node->first_node("pubDate"); + if(tmp == 0) + return ""; + return tmp->value(); + +} + +std::vector> rss::getItems() const { + std::vector> items; + + if(!ok) + return items; + + rapidxml::xml_node<> *first_item = item_node->first_node("item"); + //there are no items, return empty vector + if(first_item == 0) + return items; + + for(; first_item != NULL; first_item = first_item->next_sibling()){ + std::map tmp_item; + for(rapidxml::xml_node<> *item_val = first_item->first_node(); + item_val != NULL; item_val = item_val->next_sibling()) + tmp_item[item_val->name()] = item_val->value(); + items.push_back(tmp_item); + } + + return items; +} + +bool rss::parse(const std::string& rss_str){ + ok = true; + + //parse, check for errors in xml doc + try { + doc.parse<0>(doc.allocate_string(rss_str.c_str())); + } catch (const std::runtime_error& e){ + std::cerr << "rss::parse() runtime error: " << e.what() << std::endl; + ok = false; + } catch (const rapidxml::parse_error& e){ + std::cerr << "rss::parse() error with file: " << e.what() << std::endl; + ok = false; + } + + //verify that it is an rss file + rapidxml::xml_node<> *tmp = doc.first_node("rss"); + if(tmp == NULL){ + std::cerr << "rss::parse() error with file: XML is not an rss feed!" << std::endl; + ok = false; + } else { + tmp = tmp->first_node("channel"); + if(tmp == NULL){ + std::cerr << "rss::parse() error with file: RSS feed has no channels!" + << std::endl; + ok = false; + } + } + + if(ok){ + item_node = tmp; + } + return ok; +} + +size_t rss_utils::write_to_string(void* ptr, size_t size, size_t nmemb, std::string* s){ + s->append(static_cast(ptr), size * nmemb); + return size * nmemb; +} diff --git a/src/rss.hpp b/src/rss.hpp new file mode 100644 index 0000000..aa31f37 --- /dev/null +++ b/src/rss.hpp @@ -0,0 +1,71 @@ +/////////////////////////////////////////////////////////////////////////////// +// Tyler Perkins +// 7-23-21 +// rss definition +// + +#pragma once + +#if _TESTS_ == 1 +#include "config.hpp" +#endif + +#include +#include +#include +#include + +#include +#include +#include + +namespace rss_utils { + typedef std::vector> item_map; + class rss { + public: + rss(); + rss(const std::string&, bool = true); + rss(const char*, bool = true); + rss(const rss&); + ~rss(); + rss& operator=(const rss&); + rss* clone() const; + + bool isOk() const; + + void setURI(const std::string&, bool = true); + void setURI(const char*, bool = true); + std::string getURI() const; + + bool update(); + + std::string getTitle() const; + std::string getLink() const; + std::string getDescription() const; + std::string getLanguage() const; + std::string getWebMaster() const; + std::string getCopyright() const; + std::string getPubDate() const; + std::string getManagingEditor() const; + std::string getGenerator() const; + std::string getDocs() const; + std::string getTTL() const; + std::string getLastBuildDate() const; + + //TODO + //std::string getImage() const; + //std::string getCloud() const; + + std::vector> getItems() const; + + private: + bool parse(const std::string&); + + bool ok; + std::string uri; + rapidxml::xml_node<> *item_node; + rapidxml::xml_document<> doc; + }; + + size_t write_to_string(void*, size_t, size_t, std::string*); +} diff --git a/src/tests.cpp b/src/tests.cpp new file mode 100644 index 0000000..e410e02 --- /dev/null +++ b/src/tests.cpp @@ -0,0 +1,83 @@ +/////////////////////////////////////////////////////////////////////////////// +// Tyler Perkins +// 7-23-21 +// tests implementation +// + +#include "tests.hpp" + +using namespace rss_utils; + +void rss_utils::testRSS() { + std::cout << "===== test RSS class =====" << std::endl; + std::string url1 = "https://www.feedforall.com/sample-feed.xml"; + std::string bad_url = "https://www.not_a_real_link.com/"; + + //test default constructor + { + rss_utils::rss feed; + assert(feed.getURI() == ""); + assert(feed.isOk() == false); + std::cout << "def cont tests passed!" << std::endl; + } + { + rss_utils::rss feed(url1); + assert(feed.getURI() == url1); + assert(feed.isOk() == true); + std::cout << "std::string& cont tests passed!" << std::endl; + } + { + rss_utils::rss feed(url1.c_str()); + assert(feed.getURI() == url1); + assert(feed.isOk() == true); + std::cout << "char* cont tests passed!" << std::endl; + } + { + rss_utils::rss feed(url1); + rss_utils::rss feed2(feed); + assert(feed2.getURI() == url1); + assert(feed2.isOk() == true); + std::cout << "copy cont tests passed!" << std::endl; + } + { + rss_utils::rss feed(url1); + rss_utils::rss feed2 = feed; + assert(feed2.getURI() == url1); + assert(feed2.isOk() == true); + std::cout << "assignment operator tests passed!" << std::endl; + } + { + rss_utils::rss feed(url1); + rss_utils::rss* feed2 = feed.clone(); + assert(feed2->getURI() == url1); + assert(feed2->isOk() == true); + std::cout << "clone functions tests passed!" << std::endl; + } + { + rss_utils::rss feed(url1); + feed.setURI(bad_url); + assert(feed.getURI() == bad_url); + assert(feed.isOk() == false); + std::cout << "std::string& setURI tests passed!" << std::endl; + } + { + rss_utils::rss feed; + feed.setURI(bad_url.c_str()); + assert(feed.getURI() == bad_url); + assert(feed.isOk() == false); + std::cout << "char* setURI tests passed!" << std::endl; + } + { + rss_utils::rss feed(url1); + rss_utils::item_map items = feed.getItems(); + for(auto it = items.begin(); it != items.end(); ++it){ + std::cout << "=== Item ===" << std::endl; + std::cout << "Item: " << (*it)["title"] << std::endl; + std::cout << "Description: " << (*it)["description"] << std::endl; + std::cout << "Link: " << (*it)["link"] << std::endl; + std::cout << "pubDate: " << (*it)["pubDate"] << std::endl; + } + std::cout << "=== Done with Items ===" << std::endl; + } + std::cout << "===== test RSS class =====" << std::endl; +} diff --git a/src/tests.hpp b/src/tests.hpp new file mode 100644 index 0000000..6b95935 --- /dev/null +++ b/src/tests.hpp @@ -0,0 +1,18 @@ +/////////////////////////////////////////////////////////////////////////////// +// Tyler Perkins +// 7-23-21 +// tests definition +// + +#pragma once + +#include +#include +#include +#include "rss.hpp" + +namespace rss_utils { + void testRSS(); + + +}