From 057d37fc8d381c051486b38e5f076b71944f6f27 Mon Sep 17 00:00:00 2001
From: AaravSinghRathor <42621083+AaravSinghRathor@users.noreply.github.com>
Date: Fri, 2 Oct 2020 00:18:36 +0530
Subject: [PATCH 1/2] Updated the readme file

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 2a7f4d4..101a982 100644
--- a/README.md
+++ b/README.md
@@ -1,4 +1,4 @@
 geeksforgeeks-pdf
 =================
 
-This Python script aimed to download all Amazon Interview Experience from GeeksforGeeks website. You can modify this script to download as per your need.
+This Python script is created with an objective to download all Amazon Interview Experience Questions from GeeksforGeeks website. You can modify this script to download as per your need.

From 5cbe9a7105264c0e6a376ab9d157db507baa32ce Mon Sep 17 00:00:00 2001
From: AaravSinghRathor <42621083+AaravSinghRathor@users.noreply.github.com>
Date: Fri, 2 Oct 2020 00:25:38 +0530
Subject: [PATCH 2/2] Added some python3 changes, doc strings and improved the
 code according to pip8 guidelines

---
 geeksforgeeks-pdf.py | 47 ++++++++++++++++++++++----------------------
 1 file changed, 23 insertions(+), 24 deletions(-)

diff --git a/geeksforgeeks-pdf.py b/geeksforgeeks-pdf.py
index 93922f3..8fb66ff 100644
--- a/geeksforgeeks-pdf.py
+++ b/geeksforgeeks-pdf.py
@@ -1,53 +1,52 @@
 import httplib2
 import pdfcrowd
+import urllib2
 from bs4 import BeautifulSoup, SoupStrainer
 
 http = httplib2.Http()
-s= 'http://www.geeksforgeeks.org/'
-i=0
-to_crawl=[]
-to_crawl.append(s)
+s = 'http://www.geeksforgeeks.org/'
+i = 0
+to_crawl = [s]
 status, response = http.request(s)
-crawled=[]
-crawled.append(s)
+crawled = [s]
 
 
 for link in BeautifulSoup(response, parse_only=SoupStrainer('a')):
         if link.has_attr('href'):
-            li=link['href']
-            #print li
+            li = link['href']
+            # print li
             if li.find('http://www.geeksforgeeks.org')==0 and li not in crawled and li.find('forums')<0:
                 to_crawl.append(li)
             
 
-#print to_crawl
-print len(to_crawl)
-count=0
+# print to_crawl
+print(len(to_crawl))
+count = 0
 
+# Helper method to get page
 def get_page(page):
-    import urllib2
-    source=urllib2.urlopen(page)
+    source = urllib2.urlopen(page)
     return source.read()
 
-
+# Helper method to save the pdf
 def save_as_pdf(s):
     global i
     try:
         client = pdfcrowd.Client("mkap1234", "fc5ada9fbd1c55f46822d6e9e985a9bb")
         output_file = open('amazon'+str(i)+'.pdf', 'wb')
-        i=i+1
-        html=get_page(s)
+        i = i + 1
+        html = get_page(s)
         client.convertHtml(html, output_file)
         output_file.close()
-    except pdfcrowd.Error,why:
+    except pdfcrowd.Error, why:
         print 'Failed:', why
 
 
 while len(to_crawl):
-    b=to_crawl.pop()
+    b = to_crawl.pop()
     if b.find('http://www.geeksforgeeks.org')==0 and b not in crawled and b.find('forums')<0:
-        count=count+1
-        print count
+        count = count + 1
+        print(count)
         crawled.append(b)
         status, response = http.request(b)
         for link in BeautifulSoup(response, parse_only=SoupStrainer('a')):
@@ -64,14 +63,14 @@ def save_as_pdf(s):
 
 for st in crawled:
     if st.find('amazon')>=0 and st.find('#')<0 and st.find('tag')<0 and st.find('forum')<0:
-        print st
+        print(st)
         amazon.append(st)
 
 
 
-print "Finished"
-print len(amazon)
+print("Processing Finished")
+print(len(amazon))
         
-  
+# Saving all the pages fetched as pdf  
 for page in amazon:
     save_as_pdf(page)