From afca68c0018ffc7ff967d264a4df06fcc1085023 Mon Sep 17 00:00:00 2001 From: cr-marcstevens Date: Fri, 18 Sep 2020 14:11:32 +0200 Subject: [PATCH] Implementation changed to adjust to new DBLP URL scheme --- configure.ac | 2 +- src/bib_get.hpp | 3 ++- src/bib_search.hpp | 20 ++++++++++++-------- src/core.hpp | 2 +- version | 2 +- 5 files changed, 17 insertions(+), 12 deletions(-) diff --git a/configure.ac b/configure.ac index 2dc680a..5588369 100644 --- a/configure.ac +++ b/configure.ac @@ -1,6 +1,6 @@ AC_PREREQ([2.68]) # Important: DBLPBIBTEX version needs to match with: ./version src/core.hpp -AC_INIT([dblpbibtex], [2.1], [marc@marc-stevens.nl],[dblpbibtex],[https://github.com/cr-marcstevens/dblpbibtex]) +AC_INIT([dblpbibtex], [2.3], [marc@marc-stevens.nl],[dblpbibtex],[https://github.com/cr-marcstevens/dblpbibtex]) AC_CONFIG_SRCDIR([README.md]) AC_CONFIG_HEADERS([src/config.h]) AC_CONFIG_AUX_DIR([build-aux]) diff --git a/src/bib_get.hpp b/src/bib_get.hpp index 990e635..15c9f7a 100644 --- a/src/bib_get.hpp +++ b/src/bib_get.hpp @@ -16,7 +16,8 @@ namespace sa = string_algo; /*** download citations ***/ bool download_dblp_citation(const std::string& key, bool prepend = true) { - auto hdr_html = url_get("https://dblp.org/rec/bib" + std::to_string(params.dblpformat) + "/" + key.substr(5)); + std::string url = "https://dblp.org/rec/" + key.substr(5) + ".bib?param=" + std::to_string(params.dblpformat); + auto hdr_html = url_get(url); auto& html = hdr_html.second; if (html.empty()) return false; diff --git a/src/bib_search.hpp b/src/bib_search.hpp index 5ce6206..7e910e6 100644 --- a/src/bib_search.hpp +++ b/src/bib_search.hpp @@ -50,20 +50,24 @@ bool search_citation_dblp(const std::string& citkey) std::cout << "Search phrase too short <5 chars: '" << searchphrase << "'" << std::endl; return false; } - auto p_header_body = url_get("https://dblp.org/search/?q=" + searchphrase); + auto p_header_body = url_get("https://dblp.org/search?q=" + searchphrase); auto& html = p_header_body.second; html.erase(html.begin(), sa::ifind(html," cits2; - while (html.find("/rec/bibtex/") != std::string::npos) + while (html.find("/rec/") != std::string::npos) { - std::string cit = html.substr(html.find("/rec/bibtex/")); + std::string cit = html.substr(html.find("/rec/")); cit.erase(sa::to_lower_copy(cit).find_first_not_of("abcdefghijklmnopqrstuvwxyz0123456789:/.+-$&@=!?")); - cit.erase(0, std::string("/rec/bibtex/").length()); - cit = "DBLP:" + cit; - cits2.insert(cit); - html.erase(0, html.find("/rec/bibtex/")+1); - std::cout << "Found citations: " << cit << std::endl; + cit.erase(0, std::string("/rec/").length()); + if (cit.substr(cit.size()-5) == ".html") + { + cit.resize(cit.size()-5); + cit = "DBLP:" + cit; + cits2.insert(cit); + std::cout << "Found citations: " << cit << std::endl; + } + html.erase(0, html.find("/rec/")+1); } std::vector cits(cits2.begin(), cits2.end()); if (cits.size() == 0) diff --git a/src/core.hpp b/src/core.hpp index 454d561..2a14d29 100644 --- a/src/core.hpp +++ b/src/core.hpp @@ -18,7 +18,7 @@ //#define USE_CURL_FORM // use for old versions of curl that doesn't have curl_mime yet -#define VERSION "2.2" +#define VERSION "2.3" #include namespace sa = string_algo; diff --git a/version b/version index dff0958..9dfd992 100644 --- a/version +++ b/version @@ -1 +1 @@ -VERSION:2.2 \ No newline at end of file +VERSION:2.3 \ No newline at end of file