From 36a33475b74f2465c091f8a5fb6af9211ca7c8c3 Mon Sep 17 00:00:00 2001 From: Pritam Kumar Date: Thu, 12 Dec 2019 15:36:48 +0530 Subject: [PATCH] for python3 --- geeksforgeeks-pdf.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/geeksforgeeks-pdf.py b/geeksforgeeks-pdf.py index 93922f3..bf77a90 100644 --- a/geeksforgeeks-pdf.py +++ b/geeksforgeeks-pdf.py @@ -21,7 +21,7 @@ #print to_crawl -print len(to_crawl) +print (len(to_crawl)) count=0 def get_page(page): @@ -39,15 +39,15 @@ def save_as_pdf(s): html=get_page(s) client.convertHtml(html, output_file) output_file.close() - except pdfcrowd.Error,why: - print 'Failed:', why + except pdfcrowd.Error as why: + print ('Failed:', why) while len(to_crawl): b=to_crawl.pop() if b.find('http://www.geeksforgeeks.org')==0 and b not in crawled and b.find('forums')<0: count=count+1 - print count + print (count) crawled.append(b) status, response = http.request(b) for link in BeautifulSoup(response, parse_only=SoupStrainer('a')): @@ -64,13 +64,13 @@ def save_as_pdf(s): for st in crawled: if st.find('amazon')>=0 and st.find('#')<0 and st.find('tag')<0 and st.find('forum')<0: - print st + print (st) amazon.append(st) -print "Finished" -print len(amazon) +print ("Finished") +print (len(amazon)) for page in amazon: