kaushikj · juanfranblanco · Apr 2, 2024 · Apr 2, 2024 · Apr 2, 2024 · Apr 2, 2024
diff --git a/convertpdffolder.py b/convertpdffolder.py
@@ -0,0 +1,35 @@
+import os
+import glob
+import img2pdf
+import argparse
+
+def convert_images_to_pdf(image_folder, output_pdf_path):
+    # Construct the full path for PNG images in the folder
+    image_paths = sorted(glob.glob(os.path.join(image_folder, "*.png")))
+
+    # Check if there are images to convert
+    if not image_paths:
+        print("No PNG images found in the specified folder.")
+        return
+
+    # Convert images to PDF
+    try:
+        with open(output_pdf_path, "wb") as f:
+            f.write(img2pdf.convert(image_paths))
+        print(f"PDF created successfully at {output_pdf_path}")
+    except Exception as e:
+        print(f"An error occurred during the PDF creation: {e}")
+
+if __name__ == "__main__":
+    # Initialize parser
+    parser = argparse.ArgumentParser(description="Convert PNG images in a folder to a single PDF file.")
+
+    # Adding arguments
+    parser.add_argument("image_folder", type=str, help="The folder containing PNG images to be converted to PDF.")
+    parser.add_argument("output_pdf_path", type=str, help="The path to save the output PDF file.")
+
+    # Parse arguments
+    args = parser.parse_args()
+
+    # Call function with provided arguments
+    convert_images_to_pdf(args.image_folder, args.output_pdf_path)
diff --git a/requirements.txt b/requirements.txt
@@ -1,3 +1,3 @@
 imutils==0.5.4
-opencv_python==4.5.2.52
+opencv_python==4.9.0.80
 img2pdf==0.4.1
diff --git a/video2pdfslides.py b/video2pdfslides.py
@@ -110,7 +110,7 @@ def detect_unique_screenshots(video_path, output_folder_screenshot_path):
 
 def initialize_output_folder(video_path):
     '''Clean the output folder if already exists'''
-    output_folder_screenshot_path = f"{OUTPUT_SLIDES_DIR}/{video_path.rsplit('/')[-1].split('.')[0]}"
+    output_folder_screenshot_path = f"{OUTPUT_SLIDES_DIR}/{video_path.rsplit('/')[-1].rsplit('.', 1)[0]}"
 
     if os.path.exists(output_folder_screenshot_path):
         shutil.rmtree(output_folder_screenshot_path)
@@ -121,7 +121,7 @@ def initialize_output_folder(video_path):
 
 
 def convert_screenshots_to_pdf(output_folder_screenshot_path):
-    output_pdf_path = f"{OUTPUT_SLIDES_DIR}/{video_path.rsplit('/')[-1].split('.')[0]}" + '.pdf'
+    output_pdf_path = f"{OUTPUT_SLIDES_DIR}/{video_path.rsplit('/')[-1].rsplit('.', 1)[0]}" + '.pdf'
     print('output_folder_screenshot_path', output_folder_screenshot_path)
     print('output_pdf_path', output_pdf_path)
     print('converting images to pdf..')
@@ -147,14 +147,14 @@ def convert_screenshots_to_pdf(output_folder_screenshot_path):
     output_folder_screenshot_path = initialize_output_folder(video_path)
     detect_unique_screenshots(video_path, output_folder_screenshot_path)
 
-    print('Please Manually verify screenshots and delete duplicates')
-    while True:
-        choice = input("Press y to continue and n to terminate")
-        choice = choice.lower().strip()
-        if choice in ['y', 'n']:
-            break
-        else:
-            print('please enter a valid choice')
-
-    if choice == 'y':
-        convert_screenshots_to_pdf(output_folder_screenshot_path)
+#    print('Please Manually verify screenshots and delete duplicates')
+#    while True:
+#       choice = input("Press y to continue and n to terminate")
+#       choice = choice.lower().strip()
+#       if choice in ['y', 'n']:
+#            break
+#        else:
+#            print('please enter a valid choice')
+#
+#    if choice == 'y':
+     convert_screenshots_to_pdf(output_folder_screenshot_path)
diff --git a/video2pdfslidesfolder.py b/video2pdfslidesfolder.py
@@ -0,0 +1,103 @@
+import os
+import time
+import cv2
+import imutils
+import shutil
+import img2pdf
+import glob
+import argparse
+
+# Define constants
+OUTPUT_SLIDES_DIR = "./output"
+FRAME_RATE = 5  # Frames per second to be processed
+WARMUP = FRAME_RATE  # Initial number of frames to be skipped
+FGBG_HISTORY = FRAME_RATE * 6  # Number of frames in background object
+VAR_THRESHOLD = 16  # Threshold on the squared Mahalanobis distance
+DETECT_SHADOWS = False  # If true, the algorithm will detect shadows
+MIN_PERCENT = 0.2  # Min % of diff to detect motion stop
+MAX_PERCENT = 0.6  # Max % of diff for motion
+
+def sanitize_filename(filename):
+    """Sanitize the filename by removing or replacing characters not allowed in Windows paths."""
+    invalid_chars = ['<', '>', ':', '"', '/', '\\', '|', '?', '*', '[', ']']
+    for ch in invalid_chars:
+        filename = filename.replace(ch, '')
+    return filename
+
+def get_frames(video_path):
+    vs = cv2.VideoCapture(video_path)
+    if not vs.isOpened():
+        raise Exception(f'Unable to open file {video_path}')
+    total_frames = vs.get(cv2.CAP_PROP_FRAME_COUNT)
+    frame_time = 0
+    frame_count = 0
+    print("Total frames:", total_frames)
+    print("FRAME_RATE:", FRAME_RATE)
+    while True:
+        vs.set(cv2.CAP_PROP_POS_MSEC, frame_time * 1000)
+        frame_time += 1 / FRAME_RATE
+        ret, frame = vs.read()
+        if not ret:
+            break
+        frame_count += 1
+        yield frame_count, frame_time, frame
+    vs.release()
+
+def detect_unique_screenshots(video_path, output_folder_screenshot_path):
+    fgbg = cv2.createBackgroundSubtractorMOG2(history=FGBG_HISTORY, varThreshold=VAR_THRESHOLD, detectShadows=DETECT_SHADOWS)
+    captured = False
+    start_time = time.time()
+    W, H = None, None
+    screenshots_count = 0
+    for frame_count, frame_time, frame in get_frames(video_path):
+        orig = frame.copy()
+        frame = imutils.resize(frame, width=600)
+        mask = fgbg.apply(frame)
+        if W is None or H is None:
+            H, W = mask.shape[:2]
+        p_diff = (cv2.countNonZero(mask) / float(W * H)) * 100
+        if p_diff < MIN_PERCENT and not captured and frame_count > WARMUP:
+            captured = True
+            filename = f"{screenshots_count:03}_{round(frame_time / 60, 2)}.png"
+            path = os.path.join(output_folder_screenshot_path, filename)
+            print("Saving:", path)
+            cv2.imwrite(path, orig)
+            screenshots_count += 1
+        elif captured and p_diff >= MAX_PERCENT:
+            captured = False
+    print(f'{screenshots_count} screenshots captured in {time.time() - start_time}s.')
+
+def initialize_output_folder(video_path):
+    folder_name = os.path.splitext(os.path.basename(video_path))[0]
+    sanitized_folder_name = sanitize_filename(folder_name)
+    output_folder_screenshot_path = os.path.join(OUTPUT_SLIDES_DIR, sanitized_folder_name)
+    if os.path.exists(output_folder_screenshot_path):
+        shutil.rmtree(output_folder_screenshot_path)
+    os.makedirs(output_folder_screenshot_path, exist_ok=True)
+    print('Initialized output folder:', output_folder_screenshot_path)
+    return output_folder_screenshot_path
+
+def convert_screenshots_to_pdf(output_folder_screenshot_path, video_path):
+    images = sorted(glob.glob(os.path.join(output_folder_screenshot_path, "*.png")))
+    if images:
+        output_pdf_path = f"{output_folder_screenshot_path}.pdf"
+        print('Converting images to PDF:', output_pdf_path)
+        with open(output_pdf_path, "wb") as f:
+            f.write(img2pdf.convert(images))
+        print('PDF created at:', output_pdf_path)
+    else:
+        print(f"No PNG files found in {output_folder_screenshot_path}. PDF not created.")
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description="Convert video files in a directory to PDF slides")
+    parser.add_argument("dir_path", help="Directory path containing video files", type=str)
+    args = parser.parse_args()
+    directory_path = args.dir_path
+
+    for video_file in os.listdir(directory_path):
+        video_path = os.path.join(directory_path, video_file)
+        if os.path.isfile(video_path) and video_path.endswith(('.mp4', '.avi', '.mov', '.webm')):
+            print('Processing video:', video_path)
+            output_folder_screenshot_path = initialize_output_folder(video_path)
+            detect_unique_screenshots(video_path, output_folder_screenshot_path)
+            convert_screenshots_to_pdf(output_folder_screenshot_path, video_path)