Add Items parsing

This commit is contained in:
Clortox 2021-07-25 13:40:52 -04:00
parent 9835fe7c1f
commit 1c5051b452
8 changed files with 301 additions and 73 deletions

View File

@ -10,4 +10,4 @@
* Set to 1 to compile and run the test suite
*/
#define _TESTS_ 0
#define _TESTS_ 1

View File

@ -20,6 +20,7 @@
int main(int argc, char** argv) {
#if _TESTS_ == 1
rss_utils::testRSS();
rss_utils::testItems();
return 0;
#endif

View File

@ -9,43 +9,43 @@
using namespace rss_utils;
rss::rss() {
uri = "";
ok = false;
item_node = nullptr;
_uri = "";
_ok = false;
_item_node = nullptr;
}
rss::rss(const std::string& rss_uri, bool perf_update){
uri = rss_uri;
item_node = nullptr;
ok = false;
_uri = rss_uri;
_item_node = nullptr;
_ok = false;
if(perf_update)
update();
}
rss::rss(const char* rss_uri, bool perf_update){
uri = std::string(rss_uri);
ok = false;
item_node = nullptr;
_uri = std::string(rss_uri);
_ok = false;
_item_node = nullptr;
if(perf_update)
update();
}
rss::rss(const rss& rhs){
uri = rhs.uri;
ok = false;
item_node = nullptr;
_uri = rhs._uri;
_ok = false;
_item_node = nullptr;
update();
}
rss::~rss(){
doc.clear();
_doc.clear();
}
rss& rss::operator=(const rss& rhs){
if(this != &rhs){
uri = rhs.uri;
ok = false;
item_node = nullptr;
_uri = rhs._uri;
_ok = false;
_item_node = nullptr;
update();
}
return *this;
@ -56,23 +56,23 @@ rss* rss::clone() const {
return clone;
}
bool rss::isOk() const { return ok; }
bool rss::isOk() const { return _ok; }
void rss::setURI(const std::string& rss_uri, bool perf_update) {
uri = rss_uri;
ok = false;
_uri = rss_uri;
_ok = false;
if(perf_update)
update();
}
void rss::setURI(const char* rss_uri, bool perf_update) {
uri = std::string(rss_uri);
ok = false;
_uri = std::string(rss_uri);
_ok = false;
if(perf_update)
update();
}
std::string rss::getURI() const { return uri; }
std::string rss::getURI() const { return _uri; }
bool rss::update() {
std::string result;
@ -82,7 +82,7 @@ bool rss::update() {
curl = curl_easy_init();
if(curl){
curl_easy_setopt(curl, CURLOPT_URL, uri.c_str());
curl_easy_setopt(curl, CURLOPT_URL, _uri.c_str());
curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, rss_utils::write_to_string);
curl_easy_setopt(curl, CURLOPT_WRITEDATA, &result);
res = curl_easy_perform(curl);
@ -95,75 +95,75 @@ bool rss::update() {
<< curl_easy_strerror(res) << std::endl;
}
}
ok = false;
_ok = false;
return false;
}
std::string rss::getTitle() const {
if(!ok)
if(!_ok)
return "";
rapidxml::xml_node<> *tmp = item_node->first_node("title");
rapidxml::xml_node<> *tmp = _item_node->first_node("title");
if(tmp == 0)
return "";
return tmp->value();
}
std::string rss::getLink() const {
if(!ok)
if(!_ok)
return "";
rapidxml::xml_node<> *tmp = item_node->first_node("link");
rapidxml::xml_node<> *tmp = _item_node->first_node("link");
if(tmp == 0)
return "";
return tmp->value();
}
std::string rss::getDescription() const {
if(!ok)
if(!_ok)
return "";
rapidxml::xml_node<> *tmp = item_node->first_node("description");
rapidxml::xml_node<> *tmp = _item_node->first_node("description");
if(tmp == 0)
return "";
return tmp->value();
}
std::string rss::getLanguage() const {
if(!ok)
if(!_ok)
return "";
rapidxml::xml_node<> *tmp = item_node->first_node("language");
rapidxml::xml_node<> *tmp = _item_node->first_node("language");
if(tmp == 0)
return "";
return tmp->value();
}
std::string rss::getWebMaster() const {
if(!ok)
if(!_ok)
return "";
rapidxml::xml_node<> *tmp = item_node->first_node("webMaster");
rapidxml::xml_node<> *tmp = _item_node->first_node("webMaster");
if(tmp == 0)
return "";
return tmp->value();
}
std::string rss::getCopyright() const {
if(!ok)
if(!_ok)
return "";
rapidxml::xml_node<> *tmp = item_node->first_node("copyright");
rapidxml::xml_node<> *tmp = _item_node->first_node("copyright");
if(tmp == 0)
return "";
return tmp->value();
}
std::string rss::getPubDate() const {
if(!ok)
if(!_ok)
return "";
rapidxml::xml_node<> *tmp = item_node->first_node("pubDate");
rapidxml::xml_node<> *tmp = _item_node->first_node("pubDate");
if(tmp == 0)
return "";
return tmp->value();
@ -171,40 +171,40 @@ std::string rss::getPubDate() const {
}
std::string rss::getManagingEditor() const {
if(!ok)
if(!_ok)
return "";
rapidxml::xml_node<> *tmp = item_node->first_node("managingEditor");
rapidxml::xml_node<> *tmp = _item_node->first_node("managingEditor");
if(tmp == 0)
return "";
return tmp->value();
}
std::string rss::getGenerator() const {
if(!ok)
if(!_ok)
return "";
rapidxml::xml_node<> *tmp = item_node->first_node("generator");
rapidxml::xml_node<> *tmp = _item_node->first_node("generator");
if(tmp == 0)
return "";
return tmp->value();
}
std::string rss::getDocs() const {
if(!ok)
if(!_ok)
return "";
rapidxml::xml_node<> *tmp = item_node->first_node("docs");
rapidxml::xml_node<> *tmp = _item_node->first_node("docs");
if(tmp == 0)
return "";
return tmp->value();
}
std::string rss::getTTL() const {
if(!ok)
if(!_ok)
return "";
rapidxml::xml_node<> *tmp = item_node->first_node("ttl");
rapidxml::xml_node<> *tmp = _item_node->first_node("ttl");
if(tmp == 0)
return "";
return tmp->value();
@ -212,26 +212,72 @@ std::string rss::getTTL() const {
}
std::string rss::getLastBuildDate() const {
if(!ok)
if(!_ok)
return "";
rapidxml::xml_node<> *tmp = item_node->first_node("lastBuildDate");
rapidxml::xml_node<> *tmp = _item_node->first_node("lastBuildDate");
if(tmp == 0)
return "";
return tmp->value();
}
std::vector<std::map<std::string, std::string>> rss::getItems() const {
std::vector<std::map<std::string, std::string>> items;
int rss::getItemCount() const {
if(!_ok)
return -1;
if(!ok)
return _items.size();
}
std::vector<item> rss::getItems() {
if(!_ok)
return std::vector<item>();
if(!_items.empty())
return _items;
//items is empty, have not processed the items yet
std::vector<rapidxml::xml_node<>*> items;
items = parseItems();
for(auto it = items.begin(); it != items.end(); ++it){
item i(*it);
_items.push_back(i);
}
return _items;
}
item& rss::getItem(const int index) {
getItems();
return _items[index];
}
const item& rss::getItem(const int index) const {
return _items[index];
}
item& rss::operator[](const int index) {
return getItem(index);
}
const item& rss::operator[](const int index) const {
return getItem(index);
}
std::vector<rapidxml::xml_node<>*> rss::parseItems() {
std::vector<rapidxml::xml_node<>*> items;
if(!_ok)
return items;
rapidxml::xml_node<> *first_item = item_node->first_node("item");
rapidxml::xml_node<> *first_item = _item_node->first_node("item");
//there are no items, return empty vector
if(first_item == 0)
return items;
for(; first_item != NULL; first_item = first_item->next_sibling())
items.push_back(first_item);
/*
for(; first_item != NULL; first_item = first_item->next_sibling()){
std::map<std::string, std::string> tmp_item;
for(rapidxml::xml_node<> *item_val = first_item->first_node();
@ -239,45 +285,142 @@ std::vector<std::map<std::string, std::string>> rss::getItems() const {
tmp_item[item_val->name()] = item_val->value();
items.push_back(tmp_item);
}
*/
return items;
}
bool rss::parse(const std::string& rss_str){
ok = true;
_ok = true;
//parse, check for errors in xml doc
try {
doc.parse<0>(doc.allocate_string(rss_str.c_str()));
_doc.parse<0>(_doc.allocate_string(rss_str.c_str()));
} catch (const std::runtime_error& e){
std::cerr << "rss::parse() runtime error: " << e.what() << std::endl;
ok = false;
_ok = false;
} catch (const rapidxml::parse_error& e){
std::cerr << "rss::parse() error with file: " << e.what() << std::endl;
ok = false;
_ok = false;
}
//verify that it is an rss file
rapidxml::xml_node<> *tmp = doc.first_node("rss");
rapidxml::xml_node<> *tmp = _doc.first_node("rss");
if(tmp == NULL){
std::cerr << "rss::parse() error with file: XML is not an rss feed!" << std::endl;
ok = false;
_ok = false;
} else {
tmp = tmp->first_node("channel");
if(tmp == NULL){
std::cerr << "rss::parse() error with file: RSS feed has no channels!"
<< std::endl;
ok = false;
_ok = false;
}
}
if(ok){
item_node = tmp;
if(_ok){
_item_node = tmp;
}
return ok;
return _ok;
}
size_t rss_utils::write_to_string(void* ptr, size_t size, size_t nmemb, std::string* s){
s->append(static_cast<char *>(ptr), size * nmemb);
return size * nmemb;
}
item::item() {}
item::item(rapidxml::xml_node<>* node){
_item = node;
}
item::item(const item& rhs){
_item = rhs._item;
}
item::~item() {}
item& item::operator=(const item& rhs){
if(this != &rhs){
_item = rhs._item;
}
return *this;
}
item* item::clone() const {
item* clone = new item(*this);
return clone;
}
std::string item::getTitle() const {
rapidxml::xml_node<>* tmp = _item->first_node("title");
if(tmp == 0)
return "";
return tmp->value();
}
std::string item::getLink() const {
rapidxml::xml_node<>* tmp = _item->first_node("link");
if(tmp == 0)
return "";
return tmp->value();
}
std::string item::getDescription() const {
rapidxml::xml_node<>* tmp = _item->first_node("description");
if(tmp == 0)
return "";
return tmp->value();
}
std::string item::getAuthor() const {
rapidxml::xml_node<>* tmp = _item->first_node("author");
if(tmp == 0)
return "";
return tmp->value();
}
std::string item::getCategory() const {
rapidxml::xml_node<>* tmp = _item->first_node("category");
if(tmp == 0)
return "";
return tmp->value();
}
std::string item::getComments() const {
rapidxml::xml_node<>* tmp = _item->first_node("comments");
if(tmp == 0)
return "";
return tmp->value();
}
/*
std::string item::getEnclosure() const {
rapidxml::xml_node<>* tmp = _item->first_node("enclosure");
if(tmp == 0)
return "";
return tmp->value();
}
*/
std::string item::getGuid() const {
rapidxml::xml_node<>* tmp = _item->first_node("guid");
if(tmp == 0)
return "";
return tmp->value();
}
std::string item::getPubDate() const {
rapidxml::xml_node<>* tmp = _item->first_node("pubDate");
if(tmp == 0)
return "";
return tmp->value();
}
std::string item::getSource() const {
rapidxml::xml_node<>* tmp = _item->first_node("source");
if(tmp == 0)
return "";
return tmp->value();
}

View File

@ -14,6 +14,8 @@
#include <vector>
#include <map>
#include <iostream>
#include <stdexcept>
#include "tylers_utils.hpp"
#include <rapidxml/rapidxml.hpp>
#include <rapidxml/rapidxml_print.hpp>
@ -21,6 +23,7 @@
namespace rss_utils {
typedef std::vector<std::map<std::string, std::string>> item_map;
class item;
class rss {
public:
rss();
@ -51,20 +54,53 @@ namespace rss_utils {
std::string getDocs() const;
std::string getTTL() const;
std::string getLastBuildDate() const;
int getItemCount() const;
std::vector<item> getItems();
item& getItem(const int);
const item& getItem(const int) const;
item& operator[](const int);
const item& operator[](const int) const;
//TODO
//std::string getImage() const;
//std::string getCloud() const;
std::vector<std::map<std::string, std::string>> getItems() const;
private:
bool parse(const std::string&);
std::vector<rapidxml::xml_node<>*> parseItems();
bool ok;
std::string uri;
rapidxml::xml_node<> *item_node;
rapidxml::xml_document<> doc;
bool _ok;
std::string _uri;
rapidxml::xml_node<> *_item_node;
rapidxml::xml_document<> _doc;
std::vector<item> _items;
};
class item {
public:
item();
item(rapidxml::xml_node<>*);
item(const item&);
~item();
item& operator=(const item&);
item* clone() const;
std::string getTitle() const;
std::string getLink() const;
std::string getDescription() const;
std::string getAuthor() const;
std::string getCategory() const;
std::string getComments() const;
//std::string getEnclosure() const;
std::string getGuid() const;
std::string getPubDate() const;
std::string getSource() const;
private:
rapidxml::xml_node<> *_item;
};
size_t write_to_string(void*, size_t, size_t, std::string*);

View File

@ -13,7 +13,6 @@
namespace rss_utils {
std::string rss_to_list(const rss&, const option_flags*);
//std::string rss_to_items(const rss&
}

View File

@ -67,6 +67,7 @@ void rss_utils::testRSS() {
assert(feed.isOk() == false);
std::cout << "char* setURI tests passed!" << std::endl;
}
/*
{
rss_utils::rss feed(url1);
rss_utils::item_map items = feed.getItems();
@ -79,5 +80,34 @@ void rss_utils::testRSS() {
}
std::cout << "=== Done with Items ===" << std::endl;
}
*/
std::cout << "===== test RSS class =====" << std::endl;
}
void rss_utils::testItems() {
std::cout << "===== test RSS Items =====" << std::endl;
std::string url1 = "https://www.feedforall.com/sample-feed.xml";
std::string bad_url = "https://www.not_a_real_link.com/";
{
rss_utils::rss feed(url1);
std::vector<rss_utils::item> items = feed.getItems();
//display items
std::cout << "===== Item =====" << std::endl;
for(auto it = items.begin(); it != items.end(); ++it){
std::cout << (*it).getTitle() << std::endl;
std::cout << (*it).getLink() << std::endl;
std::cout << (*it).getDescription() << std::endl;
std::cout << (*it).getAuthor() << std::endl;
std::cout << (*it).getCategory() << std::endl;
std::cout << (*it).getComments() << std::endl;
std::cout << (*it).getGuid() << std::endl;
std::cout << (*it).getPubDate() << std::endl;
std::cout << (*it).getSource() << std::endl;
std::cout << "===== Item =====" << std::endl;
}
}
std::cout << "===== test RSS Items =====" << std::endl;
}

View File

@ -13,6 +13,5 @@
namespace rss_utils {
void testRSS();
void testItems();
}

20
src/tylers_utils.hpp Normal file
View File

@ -0,0 +1,20 @@
///////////////////////////////////////////////////////////////////////////////
// Tyler Perkins
// 7-24-21
// Tyler's useful utilities for c++
//
namespace tylers_utilities{
//constexpr hash function
constexpr size_t hash(const char* str){
const long long p = 131;
const long long m = 4294967291; //2^32 - 5
long long total = 0;
long long current_multiplier = 1;
for(int i = 0; str[i] != '\0'; ++i){
total = (total + current_multiplier * str[i]) % m;
current_multiplier = (current_multiplier * p) % m;
}
return total;
}
}