Merge pull request #82 from iamnehalien29/main

devmalik7 · web-flow · commit dcc691152a6a · 2025-10-23T14:13:30.000+05:30
PDf2Txt
diff --git a/Python/PDf2Txt/README.md b/Python/PDf2Txt/README.md
@@ -0,0 +1,24 @@
+# PDF to Text Converter
+
+This Python project automatically converts PDF files in a folder into plain text files using **PyPDF2**. Each PDF is processed page by page, and the extracted text is saved as a `.txt` file with the same name as the PDF.
+
+---
+
+## Features
+
+- Automatically processes **all PDFs in the folder**.
+- Skips PDFs with **no extractable text**.
+- Handles errors gracefully and prints a **summary**.
+- Creates `.txt` files named after the original PDF.
+
+---
+
+## Requirements
+
+- Python 3.7 or higher
+- PyPDF2 library
+
+Install PyPDF2 with pip:
+
+```bash
+pip install PyPDF2
diff --git a/Python/PDf2Txt/Screenshot 2025-10-17 210928.png b/Python/PDf2Txt/Screenshot 2025-10-17 210928.png
diff --git a/Python/PDf2Txt/pdf_to_text.py b/Python/PDf2Txt/pdf_to_text.py
@@ -0,0 +1,44 @@
+import os
+import PyPDF2
+
+folder_path = os.path.dirname(os.path.abspath(__file__))
+
+total_pdfs = 0
+converted = 0
+skipped = 0
+failed = 0
+
+for filename in os.listdir(folder_path):
+    if filename.lower().endswith('.pdf'):
+        total_pdfs += 1
+        pdf_file_path = os.path.join(folder_path, filename)
+        txt_file_path = os.path.join(folder_path, f"{os.path.splitext(filename)[0]}.txt")
+        
+        try:
+            with open(pdf_file_path, 'rb') as file:
+                reader = PyPDF2.PdfReader(file)
+                text = ''
+                for page in reader.pages:
+                    page_text = page.extract_text()
+                    if page_text:
+                        text += page_text + '\n'
+            
+            if text.strip():
+                with open(txt_file_path, 'w', encoding='utf-8') as f:
+                    f.write(text)
+                print(f"✅ Converted '{filename}' to '{os.path.basename(txt_file_path)}'")
+                converted += 1
+            else:
+                print(f"⚠ Skipped '{filename}': no text found")
+                skipped += 1
+        
+        except Exception as e:
+            print(f"❌ Failed to convert '{filename}': {e}")
+            failed += 1
+
+print("\n--- Summary ---")
+print(f"Total PDFs found: {total_pdfs}")
+print(f"Converted: {converted}")
+print(f"Skipped (no text): {skipped}")
+print(f"Failed: {failed}")
+print("Processing complete.")