diff --git a/pypdf2htmlEX/core.py b/pypdf2htmlEX/core.py
index 977949d..ddfb67c 100644
--- a/pypdf2htmlEX/core.py
+++ b/pypdf2htmlEX/core.py
@@ -1,6 +1,7 @@
import subprocess
import os
from os.path import basename
+from typing import Union
class PDF():
def __init__(self, pdf_file_path:str, drm=None):
@@ -26,40 +27,55 @@ def add_extra_options(args, options):
if self.drm is True:
options = ["pdf2htmlEX", f"{self.file_path}", "--no-drm", "1"]
options = add_extra_options(args, options)
+ print(options)
+ for i in options:
+ print(i, end=" ")
subprocess.call(options)
else:
options = ["pdf2htmlEX", f"{self.file_path}"]
options = add_extra_options(args, options)
subprocess.call(options)
- def to_html(self, file_path=None):
+ def to_html(self, dest_dir=None, pdf_filename=None, new_file_name=None):
'''
Converts the PDF file to HTML.
Parameters:
- file_path (str, optional): The path to save the converted HTML file. Defaults to None will save in the same directory that pdf_file_path of PDF class.
+ dest_dir (str, optional): The path to save the converted HTML file. Defaults to None will save in the same directory that pdf_file_path of PDF class.
'''
- if file_path:
- args = ["--dest-dir", file_path]
+ if dest_dir:
+ args = ["--dest-dir", dest_dir]
self.__make_html(args)
else:
self.__make_html()
+ if pdf_filename and new_file_name:
+ try:
+ pdf_filename = pdf_filename.replace(".pdf", ".html")
+ os.rename(f"{dest_dir}/{pdf_filename}", f"{dest_dir}/{new_file_name}")
+ except Exception as e:
+ print(f"Error renaming {dest_dir}/{pdf_filename}: {e}")
-def dir_to_html(dir_path, dest_dir=None):
+def dir_to_html(dir_path, dest_dir=None, new_file_name=None):
'''
Converts all PDF files in a directory to HTML.
Parameters:
- dir_path (str): The path to the directory containing PDF files.
dest_dir (str, optional): The path to the destination directory to save the HTML files. Defaults to None will save in same dir.
+ new_file_name (str, optional): The new name for the HTML files. Generates sequential for pdfs, ex: pdf0, pdf1, pdf2. Defaults to None will maintly name them as is replacing .pdf for .html
'''
pdf_files = [f"{dir_path}/{file}" for file in os.listdir(dir_path) if file.endswith(".pdf")]
- for pdf_file in pdf_files:
+ for i, pdf_file in enumerate(pdf_files):
pdf = PDF(pdf_file, drm=True)
if dest_dir:
- pdf.to_html(file_path=f'{dest_dir}/{basename(pdf_file).replace(".pdf", ".html")}')
+ if new_file_name:
+ if '.html' not in new_file_name:
+ pdf.to_html(dest_dir=dest_dir, pdf_filename=basename(pdf_file), new_file_name=f"{new_file_name}_{i+1}.html")
+ else:
+ pdf.to_html(dest_dir=dest_dir, pdf_filename=basename(pdf_file), new_file_name=f'{new_file_name.replace(".html","")}_{i+1}.html')
+ else:
+ pdf.to_html(dest_dir=dest_dir)
else:
pdf.to_html()
\ No newline at end of file
diff --git a/setup.py b/setup.py
index 35a5eaf..d213ac5 100644
--- a/setup.py
+++ b/setup.py
@@ -4,7 +4,7 @@
readme = file.read()
setup(name='pypdf2htmlex',
- version='1.6',
+ version='1.8',
license='MIT License',
author='Gabriel Batistuta',
long_description=readme,