-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathconverter1.py
43 lines (22 loc) · 1.13 KB
/
converter1.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
# -*- coding: utf-8 -*-
import PyPDF2
import os
if(os.path.isdir("temp") == False):
os.mkdir("temp")
txtpath = ""
pdfpath = ""
pdfpath = input("Enter the name of your pdf file - please use backslash when typing in directory path: ") #Provide the path for your pdf here
txtpath = input("Enter the name of your txt file - please use backslash when typing in directory path: ") #Provide the path for the output text file
BASEDIR = os.path.realpath("temp") # This is the sample base directory where all your text files will be stored if you do not give a specific path
print(BASEDIR)
if(len(txtpath) == 0):
txtpath = os.path.join(BASEDIR,os.path.basename(os.path.normpath(pdfpath)).replace(".pdf", "")+".txt")
pdfobj = open(pdfpath, 'rb')
pdfread = PyPDF2.PdfFileReader(pdfobj)
x = pdfread.numPages
for i in range(x):
pageObj = pdfread.getPage(i)
with open(txtpath, 'a+') as f:
f.write((pageObj.extractText()))
print(pageObj.extractText()) #This just provides the overview of what is being added to your output, you can remove it if want
pdfobj.close()