From adc6d41101547d2d20b64c38634e5159cd21cd20 Mon Sep 17 00:00:00 2001 From: Clortox Date: Sun, 25 Jul 2021 19:07:40 -0400 Subject: [PATCH] Fix CDATA not parsing --- src/main.cpp | 2 ++ src/options.cpp | 40 +++++++++++++++++++++++------ src/rss.cpp | 68 +++++++++++++++++++++++++++++++++---------------- src/rss.hpp | 4 +++ src/rss_out.cpp | 5 ++++ 5 files changed, 89 insertions(+), 30 deletions(-) diff --git a/src/main.cpp b/src/main.cpp index 08bd87e..acb67dd 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -43,6 +43,8 @@ int main(int argc, char** argv) { output += "\n" + rss_utils::rss_to_items(feed, opts); } + std::cout << "Item count: " << feed.getItemCount() << std::endl; + std::cout << output << std::endl; delete opts; diff --git a/src/options.cpp b/src/options.cpp index 6997946..c2bb7d1 100644 --- a/src/options.cpp +++ b/src/options.cpp @@ -93,13 +93,22 @@ option_flags* parse_options(int argc, char** argv) { current_item->description ^= 1; break; case 'e': - ret->language ^= 1; + if(current_item == nullptr) + ret->language ^= 1; + else + std::cerr << "-e option not understood in context of --item; ignoring" << std::endl; break; case 'm': - ret->webmaster ^= 1; + if(current_item == nullptr) + ret->webmaster ^= 1; + else + std::cerr << "-m option not understood in context of --item; ignoring" << std::endl; break; case 'c': - ret->copyright ^= 1; + if(current_item == nullptr) + ret->copyright ^= 1; + else + std::cerr << "-c option not understood in context of --item; ignoring" << std::endl; break; case 'p': if(current_item == nullptr) @@ -108,19 +117,34 @@ option_flags* parse_options(int argc, char** argv) { current_item->pubdate ^= 1; break; case 'q': - ret->managingeditor ^= 1; + if(current_item == nullptr) + ret->managingeditor ^= 1; + else + std::cerr << "-q option not understood in context of --item; ignoring" << std::endl; break; case 'g': - ret->generator ^= 1; + if(current_item == nullptr) + ret->generator ^= 1; + else + std::cerr << "-g option not understood in context of --item; ignoring" << std::endl; break; case 'o': - ret->docs ^= 1; + if(current_item == nullptr) + ret->docs ^= 1; + else + std::cerr << "-o option not understood in context of --item; ignoring" << std::endl; break; case 'w': - ret->ttl ^= 1; + if(current_item == nullptr) + ret->ttl ^= 1; + else + std::cerr << "-w option not understood in context of --item; ignoring" << std::endl; break; case 'b': - ret->builddate ^= 1; + if(current_item == nullptr) + ret->builddate ^= 1; + else + std::cerr << "-b option not understood in context of --item; ignoring" << std::endl; break; case 'i': if(ret->items == nullptr){ diff --git a/src/rss.cpp b/src/rss.cpp index 184639f..0ae8440 100644 --- a/src/rss.cpp +++ b/src/rss.cpp @@ -106,7 +106,7 @@ std::string rss::getTitle() const { rapidxml::xml_node<> *tmp = _item_node->first_node("title"); if(tmp == 0) return ""; - return tmp->value(); + return cdata_to_string(tmp); } std::string rss::getLink() const { @@ -116,7 +116,7 @@ std::string rss::getLink() const { rapidxml::xml_node<> *tmp = _item_node->first_node("link"); if(tmp == 0) return ""; - return tmp->value(); + return cdata_to_string(tmp); } std::string rss::getDescription() const { @@ -126,7 +126,7 @@ std::string rss::getDescription() const { rapidxml::xml_node<> *tmp = _item_node->first_node("description"); if(tmp == 0) return ""; - return tmp->value(); + return cdata_to_string(tmp); } std::string rss::getLanguage() const { @@ -136,7 +136,7 @@ std::string rss::getLanguage() const { rapidxml::xml_node<> *tmp = _item_node->first_node("language"); if(tmp == 0) return ""; - return tmp->value(); + return cdata_to_string(tmp); } std::string rss::getWebMaster() const { @@ -146,7 +146,7 @@ std::string rss::getWebMaster() const { rapidxml::xml_node<> *tmp = _item_node->first_node("webMaster"); if(tmp == 0) return ""; - return tmp->value(); + return cdata_to_string(tmp); } std::string rss::getCopyright() const { @@ -156,7 +156,7 @@ std::string rss::getCopyright() const { rapidxml::xml_node<> *tmp = _item_node->first_node("copyright"); if(tmp == 0) return ""; - return tmp->value(); + return cdata_to_string(tmp); } std::string rss::getPubDate() const { @@ -166,7 +166,7 @@ std::string rss::getPubDate() const { rapidxml::xml_node<> *tmp = _item_node->first_node("pubDate"); if(tmp == 0) return ""; - return tmp->value(); + return cdata_to_string(tmp); } @@ -177,7 +177,7 @@ std::string rss::getManagingEditor() const { rapidxml::xml_node<> *tmp = _item_node->first_node("managingEditor"); if(tmp == 0) return ""; - return tmp->value(); + return cdata_to_string(tmp); } std::string rss::getGenerator() const { @@ -187,7 +187,7 @@ std::string rss::getGenerator() const { rapidxml::xml_node<> *tmp = _item_node->first_node("generator"); if(tmp == 0) return ""; - return tmp->value(); + return cdata_to_string(tmp); } std::string rss::getDocs() const { @@ -197,7 +197,7 @@ std::string rss::getDocs() const { rapidxml::xml_node<> *tmp = _item_node->first_node("docs"); if(tmp == 0) return ""; - return tmp->value(); + return cdata_to_string(tmp); } std::string rss::getTTL() const { @@ -207,7 +207,7 @@ std::string rss::getTTL() const { rapidxml::xml_node<> *tmp = _item_node->first_node("ttl"); if(tmp == 0) return ""; - return tmp->value(); + return cdata_to_string(tmp); } @@ -218,7 +218,7 @@ std::string rss::getLastBuildDate() const { rapidxml::xml_node<> *tmp = _item_node->first_node("lastBuildDate"); if(tmp == 0) return ""; - return tmp->value(); + return cdata_to_string(tmp); } int rss::getItemCount() const { @@ -324,11 +324,35 @@ bool rss::parse(const std::string& rss_str){ return _ok; } +std::string rss::cdata_to_string(const rapidxml::xml_node<>* node) const{ + //this will dig till were past the cdata + std::cout << "in cdata" << std::endl; + const rapidxml::xml_node<>* tmp = node; + while(tmp->value()[0] == '\0'){ //if string is empty + tmp = tmp->first_node(); + if(tmp == NULL) + return ""; + } + return tmp->value(); +} + size_t rss_utils::write_to_string(void* ptr, size_t size, size_t nmemb, std::string* s){ s->append(static_cast(ptr), size * nmemb); return size * nmemb; } +std::string item::cdata_to_string(const rapidxml::xml_node<>* node) const{ + //this will dig till were past the cdata + std::cout << "in cdata" << std::endl; + const rapidxml::xml_node<>* tmp = node; + while(tmp->value()[0] == '\0'){ //if string is empty + tmp = tmp->first_node(); + if(tmp == NULL) + return ""; + } + return tmp->value(); +} + item::item() {} item::item(rapidxml::xml_node<>* node){ _item = node; @@ -356,42 +380,42 @@ std::string item::getTitle() const { rapidxml::xml_node<>* tmp = _item->first_node("title"); if(tmp == 0) return ""; - return tmp->value(); + return cdata_to_string(tmp); } std::string item::getLink() const { rapidxml::xml_node<>* tmp = _item->first_node("link"); if(tmp == 0) return ""; - return tmp->value(); + return cdata_to_string(tmp); } std::string item::getDescription() const { rapidxml::xml_node<>* tmp = _item->first_node("description"); if(tmp == 0) return ""; - return tmp->value(); + return cdata_to_string(tmp); } std::string item::getAuthor() const { rapidxml::xml_node<>* tmp = _item->first_node("author"); if(tmp == 0) return ""; - return tmp->value(); + return cdata_to_string(tmp); } std::string item::getCategory() const { rapidxml::xml_node<>* tmp = _item->first_node("category"); if(tmp == 0) return ""; - return tmp->value(); + return cdata_to_string(tmp); } std::string item::getComments() const { rapidxml::xml_node<>* tmp = _item->first_node("comments"); if(tmp == 0) return ""; - return tmp->value(); + return cdata_to_string(tmp); } /* @@ -399,7 +423,7 @@ std::string item::getEnclosure() const { rapidxml::xml_node<>* tmp = _item->first_node("enclosure"); if(tmp == 0) return ""; - return tmp->value(); + return cdata_to_string(tmp); } */ @@ -407,20 +431,20 @@ std::string item::getGuid() const { rapidxml::xml_node<>* tmp = _item->first_node("guid"); if(tmp == 0) return ""; - return tmp->value(); + return cdata_to_string(tmp); } std::string item::getPubDate() const { rapidxml::xml_node<>* tmp = _item->first_node("pubDate"); if(tmp == 0) return ""; - return tmp->value(); + return cdata_to_string(tmp); } std::string item::getSource() const { rapidxml::xml_node<>* tmp = _item->first_node("source"); if(tmp == 0) return ""; - return tmp->value(); + return cdata_to_string(tmp); } diff --git a/src/rss.hpp b/src/rss.hpp index 3daa9f4..ddcb6b2 100644 --- a/src/rss.hpp +++ b/src/rss.hpp @@ -11,6 +11,7 @@ #endif #include +#include #include #include #include @@ -69,6 +70,7 @@ namespace rss_utils { private: bool parse(const std::string&); std::vector*> parseItems(); + std::string cdata_to_string(const rapidxml::xml_node<>*) const; bool _ok; std::string _uri; @@ -99,6 +101,8 @@ namespace rss_utils { private: + std::string cdata_to_string(const rapidxml::xml_node<>*) const; + rapidxml::xml_node<> *_item; }; diff --git a/src/rss_out.cpp b/src/rss_out.cpp index 6278984..e88c4bb 100644 --- a/src/rss_out.cpp +++ b/src/rss_out.cpp @@ -49,6 +49,11 @@ std::string rss_utils::rss_to_items(const rss& rss_obj, const option_flags* flag int maxItem = rss_obj.getItemCount(); std::string ret; + if(maxItem == -1){ + ret = ""; + return ret; + } + for(unsigned int i=0; i < flags->item_count; ++i, ++items){ //if has a valid index if(items->index >= 0 && items->index < maxItem){