Skip to content

Commit 9188957

Browse files
Create pdf_to_text.py
1 parent da572c0 commit 9188957

File tree

1 file changed

+44
-0
lines changed

1 file changed

+44
-0
lines changed

Python/PDf2Txt/pdf_to_text.py

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
import os
2+
import PyPDF2
3+
4+
folder_path = os.path.dirname(os.path.abspath(__file__))
5+
6+
total_pdfs = 0
7+
converted = 0
8+
skipped = 0
9+
failed = 0
10+
11+
for filename in os.listdir(folder_path):
12+
if filename.lower().endswith('.pdf'):
13+
total_pdfs += 1
14+
pdf_file_path = os.path.join(folder_path, filename)
15+
txt_file_path = os.path.join(folder_path, f"{os.path.splitext(filename)[0]}.txt")
16+
17+
try:
18+
with open(pdf_file_path, 'rb') as file:
19+
reader = PyPDF2.PdfReader(file)
20+
text = ''
21+
for page in reader.pages:
22+
page_text = page.extract_text()
23+
if page_text:
24+
text += page_text + '\n'
25+
26+
if text.strip():
27+
with open(txt_file_path, 'w', encoding='utf-8') as f:
28+
f.write(text)
29+
print(f"✅ Converted '{filename}' to '{os.path.basename(txt_file_path)}'")
30+
converted += 1
31+
else:
32+
print(f"⚠ Skipped '{filename}': no text found")
33+
skipped += 1
34+
35+
except Exception as e:
36+
print(f"❌ Failed to convert '{filename}': {e}")
37+
failed += 1
38+
39+
print("\n--- Summary ---")
40+
print(f"Total PDFs found: {total_pdfs}")
41+
print(f"Converted: {converted}")
42+
print(f"Skipped (no text): {skipped}")
43+
print(f"Failed: {failed}")
44+
print("Processing complete.")

0 commit comments

Comments
 (0)