File tree Expand file tree Collapse file tree 1 file changed +44
-0
lines changed
Expand file tree Collapse file tree 1 file changed +44
-0
lines changed Original file line number Diff line number Diff line change 1+ import os
2+ import PyPDF2
3+
4+ folder_path = os .path .dirname (os .path .abspath (__file__ ))
5+
6+ total_pdfs = 0
7+ converted = 0
8+ skipped = 0
9+ failed = 0
10+
11+ for filename in os .listdir (folder_path ):
12+ if filename .lower ().endswith ('.pdf' ):
13+ total_pdfs += 1
14+ pdf_file_path = os .path .join (folder_path , filename )
15+ txt_file_path = os .path .join (folder_path , f"{ os .path .splitext (filename )[0 ]} .txt" )
16+
17+ try :
18+ with open (pdf_file_path , 'rb' ) as file :
19+ reader = PyPDF2 .PdfReader (file )
20+ text = ''
21+ for page in reader .pages :
22+ page_text = page .extract_text ()
23+ if page_text :
24+ text += page_text + '\n '
25+
26+ if text .strip ():
27+ with open (txt_file_path , 'w' , encoding = 'utf-8' ) as f :
28+ f .write (text )
29+ print (f"✅ Converted '{ filename } ' to '{ os .path .basename (txt_file_path )} '" )
30+ converted += 1
31+ else :
32+ print (f"⚠ Skipped '{ filename } ': no text found" )
33+ skipped += 1
34+
35+ except Exception as e :
36+ print (f"❌ Failed to convert '{ filename } ': { e } " )
37+ failed += 1
38+
39+ print ("\n --- Summary ---" )
40+ print (f"Total PDFs found: { total_pdfs } " )
41+ print (f"Converted: { converted } " )
42+ print (f"Skipped (no text): { skipped } " )
43+ print (f"Failed: { failed } " )
44+ print ("Processing complete." )
You can’t perform that action at this time.
0 commit comments