close #44 - v1.2.1 is out

ferru97 · ferru97 · commit 587556c1c983 · 2021-10-29T20:51:44.000+02:00
diff --git a/PyPaperBot/Crossref.py b/PyPaperBot/Crossref.py
@@ -42,41 +42,41 @@ def getPapersInfo(papers, scholar_search_link, restrict, scholar_results):
     papers_return = []
     num = 1
     for paper in papers:
-        while num <= scholar_results:
-            title = paper['title']
-            queries = {'query.bibliographic': title.lower(),'sort':'relevance',"select":"DOI,title,deposited,author,short-container-title"}
+        #while num <= scholar_results:
+        title = paper['title']
+        queries = {'query.bibliographic': title.lower(),'sort':'relevance',"select":"DOI,title,deposited,author,short-container-title"}
 
-            print("Searching paper {} of {} on Crossref...".format(num,scholar_results))
-            num += 1
+        print("Searching paper {} of {} on Crossref...".format(num,len(papers)))
+        num += 1
 
-            found_timestamp = 0
-            paper_found = Paper(title,paper['link'],scholar_search_link, paper['cites'], paper['link_pdf'], paper['year'], paper['authors'])
-            while True:
-                try:
-                    for el in iterate_publications_as_json(max_results=30, queries=queries):
+        found_timestamp = 0
+        paper_found = Paper(title,paper['link'],scholar_search_link, paper['cites'], paper['link_pdf'], paper['year'], paper['authors'])
+        while True:
+            try:
+                for el in iterate_publications_as_json(max_results=30, queries=queries):
 
-                        el_date = 0
-                        if "deposited" in el and "timestamp" in el["deposited"]:
-                            el_date = int(el["deposited"]["timestamp"])
+                    el_date = 0
+                    if "deposited" in el and "timestamp" in el["deposited"]:
+                        el_date = int(el["deposited"]["timestamp"])
 
-                        if (paper_found.DOI==None or el_date>found_timestamp) and "title" in el and similarStrings(title.lower() ,el["title"][0].lower())>0.75:
-                            found_timestamp = el_date
+                    if (paper_found.DOI==None or el_date>found_timestamp) and "title" in el and similarStrings(title.lower() ,el["title"][0].lower())>0.75:
+                        found_timestamp = el_date
 
-                            if "DOI" in el:
-                                paper_found.DOI = el["DOI"].strip().lower()
-                            if "short-container-title" in el and len(el["short-container-title"])>0:
-                                paper_found.jurnal = el["short-container-title"][0]
+                        if "DOI" in el:
+                            paper_found.DOI = el["DOI"].strip().lower()
+                        if "short-container-title" in el and len(el["short-container-title"])>0:
+                            paper_found.jurnal = el["short-container-title"][0]
 
-                            if restrict==None or restrict!=1:
-                                paper_found.setBibtex(getBibtex(paper_found.DOI))
+                        if restrict==None or restrict!=1:
+                            paper_found.setBibtex(getBibtex(paper_found.DOI))
 
-                    break
-                except ConnectionError as e:
-                    print("Wait 10 seconds and try again...")
-                    time.sleep(10)
+                break
+            except ConnectionError as e:
+                print("Wait 10 seconds and try again...")
+                time.sleep(10)
 
-            papers_return.append(paper_found)
+        papers_return.append(paper_found)
 
-            time.sleep(random.randint(1,10))
+        time.sleep(random.randint(1,10))
 
     return papers_return
diff --git a/PyPaperBot/Downloader.py b/PyPaperBot/Downloader.py
@@ -43,7 +43,7 @@ def saveFile(file_name,content, paper,dwn_source):
     paper.downloaded = True
     paper.downloadedFrom = dwn_source
 
-def downloadPapers(papers, dwnl_dir, num_limit, scholar_results, SciHub_URL=None):
+def downloadPapers(papers, dwnl_dir, num_limit, SciHub_URL=None):
     def URLjoin(*args):
         return "/".join(map(lambda x: str(x).rstrip('/'), args))
 
@@ -56,7 +56,7 @@ def URLjoin(*args):
     paper_files = []
     for p in papers:
         if p.canBeDownloaded() and (num_limit==None or num_downloaded<num_limit):
-            print("Download {} of {} -> {}".format(paper_number, scholar_results, p.title))
+            print("Download {} of {} -> {}".format(paper_number, len(papers), p.title))
             paper_number += 1
 
             pdf_dir = getSaveDir(dwnl_dir, p.getFileName())
diff --git a/PyPaperBot/Scholar.py b/PyPaperBot/Scholar.py
@@ -34,6 +34,9 @@ def scholar_requests(scholar_pages, url, restrict, scholar_results=10):
                 break
 
         papers = schoolarParser(html)
+        if len(papers)>scholar_results:
+            papers = papers[0:scholar_results]
+            
         print("\nGoogle Scholar page {} : {} papers found".format(i,scholar_results))
 
         if(len(papers)>0):
diff --git a/PyPaperBot/__init__.py b/PyPaperBot/__init__.py
@@ -1 +1 @@
-__version__= "1.2"
+__version__= "1.2.1"
diff --git a/PyPaperBot/__main__.py b/PyPaperBot/__main__.py
@@ -66,7 +66,7 @@ def main():
     parser.add_argument('--journal-filter', default=None, type=str ,help='CSV file path of the journal filter (More info on github)')
     parser.add_argument('--restrict', default=None, type=int ,choices=[0,1], help='0:Download only Bibtex - 1:Down load only papers PDF')
     parser.add_argument('--scihub-mirror', default=None, type=str, help='Mirror for downloading papers from sci-hub. If not set, it is selected automatically')
-    parser.add_argument('--scholar-results', default=10, type=int, choices=[1,2,3,4,5,6,7,8,9,10], help='Downloads the first x results in a scholar page(max=10)')
+    parser.add_argument('--scholar-results', default=10, type=int, choices=[1,2,3,4,5,6,7,8,9,10], help='Downloads the first x results for each scholar page(default/max=10)')
     parser.add_argument('--proxy', nargs='+', default=[], help='Use proxychains, provide a seperated list of proxies to use.Please specify the argument al the end')
     args = parser.parse_args()
 
@@ -146,3 +146,4 @@ def main():
 
 if __name__ == "__main__":
     main()
+    print("""Work completed!\nIf you like this project, you can offer me a cup of coffee at --> https://www.paypal.com/paypalme/ferru97 <-- :)\n""")
diff --git a/README.md b/README.md
@@ -24,6 +24,8 @@ Use `pip` to install from pypi:
 pip install PyPaperBot
 ```
 
+If on windows you get an error saying *error: Microsoft Visual C++ 14.0 is required..* try to install [Microsoft C++ Build Tools](https://visualstudio.microsoft.com/it/visual-cpp-build-tools/) or [Visual Studio](https://visualstudio.microsoft.com/it/downloads/)
+
 ### For Termux users
 
 Since numpy cannot be directly installed....
diff --git a/setup.py b/setup.py
@@ -6,15 +6,15 @@
 setuptools.setup(
   name = 'PyPaperBot',        
   packages = setuptools.find_packages(),
-  version = '1.2',     
+  version = '1.2.1',     
   license='MIT', 
   description = 'PyPaperBot is a Python tool for downloading scientific papers using Google Scholar, Crossref, and SciHub.',
   long_description=long_description,
   long_description_content_type="text/markdown",
   author = 'Vito Ferrulli',
   author_email = 'vitof970@gmail.com',
   url = 'https://github.com/ferru97/PyPaperBot',
-  download_url = 'https://github.com/ferru97/PyPaperBot/archive/v1.2.tar.gz',
+  download_url = 'https://github.com/ferru97/PyPaperBot/archive/v1.2.1.tar.gz',
   keywords = ['download-papers','google-scholar', 'scihub', 'scholar', 'crossref', 'papers'],
   install_requires=[          
         'astroid>=2.4.2,<=2.5',

Original file line number	Diff line number	Diff line change
`@@ -1 +1 @@`
`1`		`-__version__= "1.2"`
	`1`	`+__version__= "1.2.1"`