Skip to content

Commit 587556c

Browse files
committed
close #44 - v1.2.1 is out
1 parent 8a3c5df commit 587556c

File tree

7 files changed

+39
-33
lines changed

7 files changed

+39
-33
lines changed

PyPaperBot/Crossref.py

Lines changed: 27 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -42,41 +42,41 @@ def getPapersInfo(papers, scholar_search_link, restrict, scholar_results):
4242
papers_return = []
4343
num = 1
4444
for paper in papers:
45-
while num <= scholar_results:
46-
title = paper['title']
47-
queries = {'query.bibliographic': title.lower(),'sort':'relevance',"select":"DOI,title,deposited,author,short-container-title"}
45+
#while num <= scholar_results:
46+
title = paper['title']
47+
queries = {'query.bibliographic': title.lower(),'sort':'relevance',"select":"DOI,title,deposited,author,short-container-title"}
4848

49-
print("Searching paper {} of {} on Crossref...".format(num,scholar_results))
50-
num += 1
49+
print("Searching paper {} of {} on Crossref...".format(num,len(papers)))
50+
num += 1
5151

52-
found_timestamp = 0
53-
paper_found = Paper(title,paper['link'],scholar_search_link, paper['cites'], paper['link_pdf'], paper['year'], paper['authors'])
54-
while True:
55-
try:
56-
for el in iterate_publications_as_json(max_results=30, queries=queries):
52+
found_timestamp = 0
53+
paper_found = Paper(title,paper['link'],scholar_search_link, paper['cites'], paper['link_pdf'], paper['year'], paper['authors'])
54+
while True:
55+
try:
56+
for el in iterate_publications_as_json(max_results=30, queries=queries):
5757

58-
el_date = 0
59-
if "deposited" in el and "timestamp" in el["deposited"]:
60-
el_date = int(el["deposited"]["timestamp"])
58+
el_date = 0
59+
if "deposited" in el and "timestamp" in el["deposited"]:
60+
el_date = int(el["deposited"]["timestamp"])
6161

62-
if (paper_found.DOI==None or el_date>found_timestamp) and "title" in el and similarStrings(title.lower() ,el["title"][0].lower())>0.75:
63-
found_timestamp = el_date
62+
if (paper_found.DOI==None or el_date>found_timestamp) and "title" in el and similarStrings(title.lower() ,el["title"][0].lower())>0.75:
63+
found_timestamp = el_date
6464

65-
if "DOI" in el:
66-
paper_found.DOI = el["DOI"].strip().lower()
67-
if "short-container-title" in el and len(el["short-container-title"])>0:
68-
paper_found.jurnal = el["short-container-title"][0]
65+
if "DOI" in el:
66+
paper_found.DOI = el["DOI"].strip().lower()
67+
if "short-container-title" in el and len(el["short-container-title"])>0:
68+
paper_found.jurnal = el["short-container-title"][0]
6969

70-
if restrict==None or restrict!=1:
71-
paper_found.setBibtex(getBibtex(paper_found.DOI))
70+
if restrict==None or restrict!=1:
71+
paper_found.setBibtex(getBibtex(paper_found.DOI))
7272

73-
break
74-
except ConnectionError as e:
75-
print("Wait 10 seconds and try again...")
76-
time.sleep(10)
73+
break
74+
except ConnectionError as e:
75+
print("Wait 10 seconds and try again...")
76+
time.sleep(10)
7777

78-
papers_return.append(paper_found)
78+
papers_return.append(paper_found)
7979

80-
time.sleep(random.randint(1,10))
80+
time.sleep(random.randint(1,10))
8181

8282
return papers_return

PyPaperBot/Downloader.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@ def saveFile(file_name,content, paper,dwn_source):
4343
paper.downloaded = True
4444
paper.downloadedFrom = dwn_source
4545

46-
def downloadPapers(papers, dwnl_dir, num_limit, scholar_results, SciHub_URL=None):
46+
def downloadPapers(papers, dwnl_dir, num_limit, SciHub_URL=None):
4747
def URLjoin(*args):
4848
return "/".join(map(lambda x: str(x).rstrip('/'), args))
4949

@@ -56,7 +56,7 @@ def URLjoin(*args):
5656
paper_files = []
5757
for p in papers:
5858
if p.canBeDownloaded() and (num_limit==None or num_downloaded<num_limit):
59-
print("Download {} of {} -> {}".format(paper_number, scholar_results, p.title))
59+
print("Download {} of {} -> {}".format(paper_number, len(papers), p.title))
6060
paper_number += 1
6161

6262
pdf_dir = getSaveDir(dwnl_dir, p.getFileName())

PyPaperBot/Scholar.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,9 @@ def scholar_requests(scholar_pages, url, restrict, scholar_results=10):
3434
break
3535

3636
papers = schoolarParser(html)
37+
if len(papers)>scholar_results:
38+
papers = papers[0:scholar_results]
39+
3740
print("\nGoogle Scholar page {} : {} papers found".format(i,scholar_results))
3841

3942
if(len(papers)>0):

PyPaperBot/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
__version__= "1.2"
1+
__version__= "1.2.1"

PyPaperBot/__main__.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,7 @@ def main():
6666
parser.add_argument('--journal-filter', default=None, type=str ,help='CSV file path of the journal filter (More info on github)')
6767
parser.add_argument('--restrict', default=None, type=int ,choices=[0,1], help='0:Download only Bibtex - 1:Down load only papers PDF')
6868
parser.add_argument('--scihub-mirror', default=None, type=str, help='Mirror for downloading papers from sci-hub. If not set, it is selected automatically')
69-
parser.add_argument('--scholar-results', default=10, type=int, choices=[1,2,3,4,5,6,7,8,9,10], help='Downloads the first x results in a scholar page(max=10)')
69+
parser.add_argument('--scholar-results', default=10, type=int, choices=[1,2,3,4,5,6,7,8,9,10], help='Downloads the first x results for each scholar page(default/max=10)')
7070
parser.add_argument('--proxy', nargs='+', default=[], help='Use proxychains, provide a seperated list of proxies to use.Please specify the argument al the end')
7171
args = parser.parse_args()
7272

@@ -146,3 +146,4 @@ def main():
146146

147147
if __name__ == "__main__":
148148
main()
149+
print("""Work completed!\nIf you like this project, you can offer me a cup of coffee at --> https://www.paypal.com/paypalme/ferru97 <-- :)\n""")

README.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,8 @@ Use `pip` to install from pypi:
2424
pip install PyPaperBot
2525
```
2626

27+
If on windows you get an error saying *error: Microsoft Visual C++ 14.0 is required..* try to install [Microsoft C++ Build Tools](https://visualstudio.microsoft.com/it/visual-cpp-build-tools/) or [Visual Studio](https://visualstudio.microsoft.com/it/downloads/)
28+
2729
### For Termux users
2830

2931
Since numpy cannot be directly installed....

setup.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,15 +6,15 @@
66
setuptools.setup(
77
name = 'PyPaperBot',
88
packages = setuptools.find_packages(),
9-
version = '1.2',
9+
version = '1.2.1',
1010
license='MIT',
1111
description = 'PyPaperBot is a Python tool for downloading scientific papers using Google Scholar, Crossref, and SciHub.',
1212
long_description=long_description,
1313
long_description_content_type="text/markdown",
1414
author = 'Vito Ferrulli',
1515
author_email = 'vitof970@gmail.com',
1616
url = 'https://github.com/ferru97/PyPaperBot',
17-
download_url = 'https://github.com/ferru97/PyPaperBot/archive/v1.2.tar.gz',
17+
download_url = 'https://github.com/ferru97/PyPaperBot/archive/v1.2.1.tar.gz',
1818
keywords = ['download-papers','google-scholar', 'scihub', 'scholar', 'crossref', 'papers'],
1919
install_requires=[
2020
'astroid>=2.4.2,<=2.5',

0 commit comments

Comments
 (0)