Add rss object and parsing

This commit is contained in:
Clortox 2021-07-23 21:17:22 -04:00
parent 7f2da61313
commit e6c9713327
6 changed files with 464 additions and 0 deletions

13
src/config.hpp Normal file
View File

@ -0,0 +1,13 @@
///////////////////////////////////////////////////////////////////////////////
// Tyler Perkins
// 7-23-21
// config files
//
#pragma once
/* Tests:
* Set to 1 to compile and run the test suite
*/
#define _TESTS_ 1

47
src/main.cpp Normal file
View File

@ -0,0 +1,47 @@
///////////////////////////////////////////////////////////////////////////////
// Tyler Perkins
// 7-23-21
// entry point
//
#include <iostream>
#include "config.hpp"
#include "rss.hpp"
#if _TESTS_ == 1
#include "tests.hpp"
#endif
void help(char* progName){
std::cout << "Usage: " << progName << "[-u FEED_URI]\n";
std::cout << "Options:\n";
std::cout << " [-u, --uri] URI of the rss stream\n\n";
std::cout << " [-t, --title] Get title of channel\n";
std::cout << " [-l, --link] Get link to channel\n";
std::cout << " [-d, --description] Get Description of channel\n";
std::cout << " [-e, --language] Get language code of channel\n";
std::cout << " [-m, --webmaster] Get webMaster's email\n";
std::cout << " [-c, --copyright] Get copyright\n";
std::cout << " [-p, --pubdate] Get publishing date\n";
std::cout << " [-q, --managingeditor] Get Managing Editor\n";
std::cout << " [-g, --generator] Get generator of this feed\n";
std::cout << " [-o, --docs] Get link to RSS documentation\n";
std::cout << " [-w, --ttl] Get ttl, time that channel can be\n";
std::cout << " cached before being updated\n";
std::cout << " [-b, --builddate] Get last time the channel's\n";
std::cout << " content changed\n";
}
int main(int argc, char** argv) {
#if _TESTS_ == 1
rss_utils::testRSS();
return 0;
#endif
}

232
src/rss.cpp Normal file
View File

@ -0,0 +1,232 @@
///////////////////////////////////////////////////////////////////////////////
// Tyler Perkins
// 7-23-21
// rss implementation
//
#include "rss.hpp"
using namespace rss_utils;
rss::rss() {
uri = "";
ok = false;
item_node = nullptr;
}
rss::rss(const std::string& rss_uri, bool perf_update){
uri = rss_uri;
item_node = nullptr;
ok = false;
if(perf_update)
update();
}
rss::rss(const char* rss_uri, bool perf_update){
uri = std::string(rss_uri);
ok = false;
item_node = nullptr;
if(perf_update)
update();
}
rss::rss(const rss& rhs){
uri = rhs.uri;
ok = false;
item_node = nullptr;
update();
}
rss::~rss(){
doc.clear();
}
rss& rss::operator=(const rss& rhs){
if(this != &rhs){
uri = rhs.uri;
ok = false;
item_node = nullptr;
update();
}
return *this;
}
rss* rss::clone() const {
rss* clone = new rss(*this);
return clone;
}
bool rss::isOk() const { return ok; }
void rss::setURI(const std::string& rss_uri, bool perf_update) {
uri = rss_uri;
ok = false;
if(perf_update)
update();
}
void rss::setURI(const char* rss_uri, bool perf_update) {
uri = std::string(rss_uri);
ok = false;
if(perf_update)
update();
}
std::string rss::getURI() const { return uri; }
bool rss::update() {
std::string result;
CURL *curl;
CURLcode res;
curl = curl_easy_init();
if(curl){
curl_easy_setopt(curl, CURLOPT_URL, uri.c_str());
curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, rss_utils::write_to_string);
curl_easy_setopt(curl, CURLOPT_WRITEDATA, &result);
res = curl_easy_perform(curl);
curl_easy_cleanup(curl);
if(res == CURLE_OK){
return parse(result);
} else {
std::cerr << "curl_easy_perform(curl) failed: "
<< curl_easy_strerror(res) << std::endl;
}
}
ok = false;
return false;
}
std::string rss::getTitle() const {
if(!ok)
return "";
rapidxml::xml_node<> *tmp = item_node->first_node("title");
if(tmp == 0)
return "";
return tmp->value();
}
std::string rss::getLink() const {
if(!ok)
return "";
rapidxml::xml_node<> *tmp = item_node->first_node("link");
if(tmp == 0)
return "";
return tmp->value();
}
std::string rss::getDescription() const {
if(!ok)
return "";
rapidxml::xml_node<> *tmp = item_node->first_node("description");
if(tmp == 0)
return "";
return tmp->value();
}
std::string rss::getLanguage() const {
if(!ok)
return "";
rapidxml::xml_node<> *tmp = item_node->first_node("language");
if(tmp == 0)
return "";
return tmp->value();
}
std::string rss::getWebMaster() const {
if(!ok)
return "";
rapidxml::xml_node<> *tmp = item_node->first_node("webMaster");
if(tmp == 0)
return "";
return tmp->value();
}
std::string rss::getCopyright() const {
if(!ok)
return "";
rapidxml::xml_node<> *tmp = item_node->first_node("copyright");
if(tmp == 0)
return "";
return tmp->value();
}
std::string rss::getPubDate() const {
if(!ok)
return "";
rapidxml::xml_node<> *tmp = item_node->first_node("pubDate");
if(tmp == 0)
return "";
return tmp->value();
}
std::vector<std::map<std::string, std::string>> rss::getItems() const {
std::vector<std::map<std::string, std::string>> items;
if(!ok)
return items;
rapidxml::xml_node<> *first_item = item_node->first_node("item");
//there are no items, return empty vector
if(first_item == 0)
return items;
for(; first_item != NULL; first_item = first_item->next_sibling()){
std::map<std::string, std::string> tmp_item;
for(rapidxml::xml_node<> *item_val = first_item->first_node();
item_val != NULL; item_val = item_val->next_sibling())
tmp_item[item_val->name()] = item_val->value();
items.push_back(tmp_item);
}
return items;
}
bool rss::parse(const std::string& rss_str){
ok = true;
//parse, check for errors in xml doc
try {
doc.parse<0>(doc.allocate_string(rss_str.c_str()));
} catch (const std::runtime_error& e){
std::cerr << "rss::parse() runtime error: " << e.what() << std::endl;
ok = false;
} catch (const rapidxml::parse_error& e){
std::cerr << "rss::parse() error with file: " << e.what() << std::endl;
ok = false;
}
//verify that it is an rss file
rapidxml::xml_node<> *tmp = doc.first_node("rss");
if(tmp == NULL){
std::cerr << "rss::parse() error with file: XML is not an rss feed!" << std::endl;
ok = false;
} else {
tmp = tmp->first_node("channel");
if(tmp == NULL){
std::cerr << "rss::parse() error with file: RSS feed has no channels!"
<< std::endl;
ok = false;
}
}
if(ok){
item_node = tmp;
}
return ok;
}
size_t rss_utils::write_to_string(void* ptr, size_t size, size_t nmemb, std::string* s){
s->append(static_cast<char *>(ptr), size * nmemb);
return size * nmemb;
}

71
src/rss.hpp Normal file
View File

@ -0,0 +1,71 @@
///////////////////////////////////////////////////////////////////////////////
// Tyler Perkins
// 7-23-21
// rss definition
//
#pragma once
#if _TESTS_ == 1
#include "config.hpp"
#endif
#include <string>
#include <vector>
#include <map>
#include <iostream>
#include <rapidxml/rapidxml.hpp>
#include <rapidxml/rapidxml_print.hpp>
#include <curl/curl.h>
namespace rss_utils {
typedef std::vector<std::map<std::string, std::string>> item_map;
class rss {
public:
rss();
rss(const std::string&, bool = true);
rss(const char*, bool = true);
rss(const rss&);
~rss();
rss& operator=(const rss&);
rss* clone() const;
bool isOk() const;
void setURI(const std::string&, bool = true);
void setURI(const char*, bool = true);
std::string getURI() const;
bool update();
std::string getTitle() const;
std::string getLink() const;
std::string getDescription() const;
std::string getLanguage() const;
std::string getWebMaster() const;
std::string getCopyright() const;
std::string getPubDate() const;
std::string getManagingEditor() const;
std::string getGenerator() const;
std::string getDocs() const;
std::string getTTL() const;
std::string getLastBuildDate() const;
//TODO
//std::string getImage() const;
//std::string getCloud() const;
std::vector<std::map<std::string, std::string>> getItems() const;
private:
bool parse(const std::string&);
bool ok;
std::string uri;
rapidxml::xml_node<> *item_node;
rapidxml::xml_document<> doc;
};
size_t write_to_string(void*, size_t, size_t, std::string*);
}

83
src/tests.cpp Normal file
View File

@ -0,0 +1,83 @@
///////////////////////////////////////////////////////////////////////////////
// Tyler Perkins
// 7-23-21
// tests implementation
//
#include "tests.hpp"
using namespace rss_utils;
void rss_utils::testRSS() {
std::cout << "===== test RSS class =====" << std::endl;
std::string url1 = "https://www.feedforall.com/sample-feed.xml";
std::string bad_url = "https://www.not_a_real_link.com/";
//test default constructor
{
rss_utils::rss feed;
assert(feed.getURI() == "");
assert(feed.isOk() == false);
std::cout << "def cont tests passed!" << std::endl;
}
{
rss_utils::rss feed(url1);
assert(feed.getURI() == url1);
assert(feed.isOk() == true);
std::cout << "std::string& cont tests passed!" << std::endl;
}
{
rss_utils::rss feed(url1.c_str());
assert(feed.getURI() == url1);
assert(feed.isOk() == true);
std::cout << "char* cont tests passed!" << std::endl;
}
{
rss_utils::rss feed(url1);
rss_utils::rss feed2(feed);
assert(feed2.getURI() == url1);
assert(feed2.isOk() == true);
std::cout << "copy cont tests passed!" << std::endl;
}
{
rss_utils::rss feed(url1);
rss_utils::rss feed2 = feed;
assert(feed2.getURI() == url1);
assert(feed2.isOk() == true);
std::cout << "assignment operator tests passed!" << std::endl;
}
{
rss_utils::rss feed(url1);
rss_utils::rss* feed2 = feed.clone();
assert(feed2->getURI() == url1);
assert(feed2->isOk() == true);
std::cout << "clone functions tests passed!" << std::endl;
}
{
rss_utils::rss feed(url1);
feed.setURI(bad_url);
assert(feed.getURI() == bad_url);
assert(feed.isOk() == false);
std::cout << "std::string& setURI tests passed!" << std::endl;
}
{
rss_utils::rss feed;
feed.setURI(bad_url.c_str());
assert(feed.getURI() == bad_url);
assert(feed.isOk() == false);
std::cout << "char* setURI tests passed!" << std::endl;
}
{
rss_utils::rss feed(url1);
rss_utils::item_map items = feed.getItems();
for(auto it = items.begin(); it != items.end(); ++it){
std::cout << "=== Item ===" << std::endl;
std::cout << "Item: " << (*it)["title"] << std::endl;
std::cout << "Description: " << (*it)["description"] << std::endl;
std::cout << "Link: " << (*it)["link"] << std::endl;
std::cout << "pubDate: " << (*it)["pubDate"] << std::endl;
}
std::cout << "=== Done with Items ===" << std::endl;
}
std::cout << "===== test RSS class =====" << std::endl;
}

18
src/tests.hpp Normal file
View File

@ -0,0 +1,18 @@
///////////////////////////////////////////////////////////////////////////////
// Tyler Perkins
// 7-23-21
// tests definition
//
#pragma once
#include <cassert>
#include <string>
#include <iostream>
#include "rss.hpp"
namespace rss_utils {
void testRSS();
}