From c3e1b46ed9d39c8215ee228d4573e077cc897459 Mon Sep 17 00:00:00 2001
From: bendsouza <bendsouza2@gmail.com>
Date: Wed, 4 Dec 2024 17:32:08 +0000
Subject: [PATCH 01/14] fix path to lambda_handler in dockerfile

---
 python/Dockerfile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python/Dockerfile b/python/Dockerfile
index 8c98c7f..536a1e9 100644
--- a/python/Dockerfile
+++ b/python/Dockerfile
@@ -14,4 +14,4 @@ RUN pip install --no-cache-dir -r /var/task/requirements.txt
 # Copy application code
 COPY . /var/task
 
-CMD ["lambda_handler.lambda_handler"]
\ No newline at end of file
+CMD ["python.lambda_handler.lambda_handler"]
\ No newline at end of file

From 9af0a217fd1831e631e0316ef06becd9468deb71 Mon Sep 17 00:00:00 2001
From: bendsouza <bendsouza2@gmail.com>
Date: Wed, 4 Dec 2024 18:49:19 +0000
Subject: [PATCH 02/14] add enchant-devel install to fix enchant lib import

---
 python/Dockerfile | 16 +++++++++++++---
 1 file changed, 13 insertions(+), 3 deletions(-)

diff --git a/python/Dockerfile b/python/Dockerfile
index 536a1e9..dda26b0 100644
--- a/python/Dockerfile
+++ b/python/Dockerfile
@@ -2,9 +2,19 @@ FROM public.ecr.aws/lambda/python:3.10-arm64
 
 WORKDIR /var/task
 
-# Install system dependencies
+# Install system dependencies (including enchant C library and build tools)
 RUN yum update -y && \
-    yum install -y wget gnupg gcc python3-devel mysql-devel mariadb-devel libsndfile ffmpeg && \
+    yum install -y \
+    wget \
+    gnupg \
+    gcc \
+    python3-devel \
+    mysql-devel \
+    mariadb-devel \
+    libsndfile \
+    ffmpeg \
+    enchant-devel \   
+    make && \
     yum clean all
 
 # Install Python dependencies
@@ -14,4 +24,4 @@ RUN pip install --no-cache-dir -r /var/task/requirements.txt
 # Copy application code
 COPY . /var/task
 
-CMD ["python.lambda_handler.lambda_handler"]
\ No newline at end of file
+CMD ["python.lambda_handler.lambda_handler"]

From bfbc90bff619b3090ef8078789a230e2804bfae5 Mon Sep 17 00:00:00 2001
From: bendsouza <bendsouza2@gmail.com>
Date: Wed, 4 Dec 2024 18:49:44 +0000
Subject: [PATCH 03/14] aws creds hierarchy - ensure local methods not used
 when running in aws

---
 python/s3_organiser.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/python/s3_organiser.py b/python/s3_organiser.py
index b2163df..aecd172 100644
--- a/python/s3_organiser.py
+++ b/python/s3_organiser.py
@@ -11,7 +11,9 @@
 
 dotenv.load_dotenv()
 
-if (public_key := os.getenv("AWS_PUBLIC_KEY") is not None) and (
+if utils.is_running_on_aws() is True:
+    session = boto3.Session()
+elif (public_key := os.getenv("AWS_PUBLIC_KEY") is not None) and (
         secret_key := os.getenv("AWS_SECRET_KEY") is not None):
     session = boto3.Session(
         aws_access_key_id=public_key,

From e8bc76ff9e79c0786eb51c97b848d04b06bebbd0 Mon Sep 17 00:00:00 2001
From: bendsouza <bendsouza2@gmail.com>
Date: Wed, 4 Dec 2024 18:50:48 +0000
Subject: [PATCH 04/14] comments

---
 python/Dockerfile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python/Dockerfile b/python/Dockerfile
index dda26b0..b5aefa4 100644
--- a/python/Dockerfile
+++ b/python/Dockerfile
@@ -2,7 +2,7 @@ FROM public.ecr.aws/lambda/python:3.10-arm64
 
 WORKDIR /var/task
 
-# Install system dependencies (including enchant C library and build tools)
+# Install system dependencies
 RUN yum update -y && \
     yum install -y \
     wget \

From 72a2f5df6184a741fb8704125b389969587042b2 Mon Sep 17 00:00:00 2001
From: bendsouza <bendsouza2@gmail.com>
Date: Wed, 4 Dec 2024 19:46:18 +0000
Subject: [PATCH 05/14] add event and context args to lambda handler

---
 python/lambda_handler.py | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/python/lambda_handler.py b/python/lambda_handler.py
index 3228dcc..314b92c 100644
--- a/python/lambda_handler.py
+++ b/python/lambda_handler.py
@@ -1,6 +1,7 @@
 import os
 import logging
 import traceback
+from typing import Dict, Any
 
 import MySQLdb
 
@@ -11,9 +12,13 @@
 logger.setLevel(logging.INFO)
 
 
-def lambda_handler():
+def lambda_handler(event: Dict[str, Any], context: Dict[str, Any]) -> Dict[str, Any]:
     """
     Lambda entry point to process video, upload to YouTube, and write metadata to db.
+    :param event: The event data passed to the lambda function
+    :param context: The context object providing runtime information about the Lambda execution, such as the function
+    name, request ID and remaining execution time.
+    :returns: A dictionary with a `statusCode` and `body` containing the result of the Lambda execution.
     """
     try:
         required_env_vars = ["DB_HOST", "DB_USER", "DB_PASSWORD", "DB_NAME"]

From 6dde758445ac7eb2fd67895999f007d5dfc3a2a7 Mon Sep 17 00:00:00 2001
From: bendsouza <bendsouza2@gmail.com>
Date: Mon, 9 Dec 2024 15:10:50 +0000
Subject: [PATCH 06/14] add logging

---
 python/Dockerfile        | 22 +++++++++++++--
 python/custom_logging.py | 59 ++++++++++++++++++++++++++++++++++++++++
 python/s3_organiser.py   |  2 ++
 python/word_generator.py | 30 +++++++++++++++-----
 4 files changed, 104 insertions(+), 9 deletions(-)
 create mode 100644 python/custom_logging.py

diff --git a/python/Dockerfile b/python/Dockerfile
index b5aefa4..9a2b7a3 100644
--- a/python/Dockerfile
+++ b/python/Dockerfile
@@ -2,7 +2,10 @@ FROM public.ecr.aws/lambda/python:3.10-arm64
 
 WORKDIR /var/task
 
-# Install system dependencies
+RUN curl -fsSL https://rpm.nodesource.com/setup_16.x | bash - && \
+    yum install -y nodejs
+
+# Install system-level dependencies
 RUN yum update -y && \
     yum install -y \
     wget \
@@ -13,10 +16,25 @@ RUN yum update -y && \
     mariadb-devel \
     libsndfile \
     ffmpeg \
-    enchant-devel \   
+    enchant-devel \
+    aspell-esp \
+    aspell-es \
+    hunspell-es \
     make && \
     yum clean all
 
+RUN mkdir -p /usr/share/hunspell && \
+    curl -o /usr/share/hunspell/es_ES.dic https://cgit.freedesktop.org/libreoffice/dictionaries/plain/es/es_ES.dic && \
+    curl -o /usr/share/hunspell/es_ES.aff https://cgit.freedesktop.org/libreoffice/dictionaries/plain/es/es_ES.aff
+
+ENV ENCHANT_CONFIG_DIR=/usr/share/hunspell
+ENV ENCHANT_DATA_DIR=/usr/share/hunspell
+
+# node.js dependencies
+COPY node/package.json /var/task/node/
+RUN cd /var/task/node && npm install
+
+
 # Install Python dependencies
 COPY requirements.txt /var/task/requirements.txt
 RUN pip install --no-cache-dir -r /var/task/requirements.txt
diff --git a/python/custom_logging.py b/python/custom_logging.py
new file mode 100644
index 0000000..0d6889a
--- /dev/null
+++ b/python/custom_logging.py
@@ -0,0 +1,59 @@
+from typing import Callable, Type
+import logging
+from functools import wraps
+
+logger = logging.getLogger(__name__)
+logger.setLevel(logging.INFO)
+
+
+def get_logger(module_name: str) -> logging.Logger:
+    """
+    Creates and configures a logger for the given module.
+
+    Args:
+        module_name (str): Name of the module requesting the logger.
+
+    Returns:
+        logging.Logger: Configured logger instance.
+    """
+    logger = logging.getLogger(module_name)
+    if not logger.hasHandlers():  # Prevents adding handlers multiple times if reused
+        handler = logging.StreamHandler()  # Logs to stdout by default
+        formatter = logging.Formatter(
+            "%(asctime)s - %(name)s - %(levelname)s - %(message)s"
+        )
+        handler.setFormatter(formatter)
+        logger.addHandler(handler)
+        logger.setLevel(logging.INFO)  # Set your desired logging level here
+    return logger
+
+
+def log_execution(func: Callable) -> Callable:
+    """Decorator to log the execution of a function or method."""
+    @wraps(func)
+    def wrapper(*args, **kwargs):
+        logger.info(f"Entering: {func.__qualname__}")
+        result = func(*args, **kwargs)
+        logger.info(f"Exiting: {func.__qualname__}")
+        return result
+    return wrapper
+
+
+def log_all_methods(cls: Type):
+    """Class decorator to log all method calls in a class."""
+    for attr_name, attr_value in cls.__dict__.items():
+        if isinstance(attr_value, property):
+            getter = log_execution(attr_value.fget) if attr_value.fget else None
+            setter = log_execution(attr_value.fset) if attr_value.fset else None
+            setattr(cls, attr_name, property(getter, setter))
+        elif callable(attr_value):
+            if isinstance(attr_value, staticmethod):
+                setattr(cls, attr_name, staticmethod(log_execution(attr_value.__func__)))
+            elif isinstance(attr_value, classmethod):
+                setattr(cls, attr_name, classmethod(log_execution(attr_value.__func__)))
+            else:
+                setattr(cls, attr_name, log_execution(attr_value))
+    return cls
+
+
+
diff --git a/python/s3_organiser.py b/python/s3_organiser.py
index aecd172..fd06b8e 100644
--- a/python/s3_organiser.py
+++ b/python/s3_organiser.py
@@ -8,6 +8,7 @@
 from botocore.exceptions import ClientError
 
 from python import utils
+from python import custom_logging
 
 dotenv.load_dotenv()
 
@@ -25,6 +26,7 @@
     )
 
 
+@custom_logging.log_all_methods
 class BucketSort:
     """
     Class for reading and writing to S3
diff --git a/python/word_generator.py b/python/word_generator.py
index 24826ae..62d5307 100644
--- a/python/word_generator.py
+++ b/python/word_generator.py
@@ -26,12 +26,14 @@
 from python.language_verification import LanguageVerification
 from python.s3_organiser import BucketSort
 from python import utils
+from python import custom_logging
 import base_config
 
 
 Image.ANTIALIAS = Image.Resampling.LANCZOS  # type: ignore[attr-defined]
 
 
+@custom_logging.log_all_methods
 class Audio:
     def __init__(self,
                  word_list_path: str,
@@ -57,8 +59,8 @@ def __init__(self,
         self.translated_sentence = self.google_translate(
             source_language=self.language_to_learn, target_language=self.native_language
         )
+        self.audio_duration = None
         self.audio_path = self.text_to_speech(language=self.language_to_learn)
-        self.audio_duration = self.get_audio_duration()
         self.sub_filepath = self.echogarden_generate_subtitles(sentence=self.sentence)
 
     @property
@@ -79,8 +81,10 @@ def text_to_speech(self, language: str, filepath: Optional[str] = None) -> str:
         :param filepath: Optional, the filepath to save the resulting .mp3 file to
         """
         dt = datetime.utcnow().strftime("%m-%d-%Y %H:%M:%S")
-        if filepath is None:
+        if filepath is None and self.cloud_storage is False:
             filepath = f"{base_config.BASE_DIR}/{Paths.AUDIO_DIR_PATH}/{dt}.wav"
+        elif filepath is None and self.cloud_storage is True:
+            filepath = f"/tmp/{dt}.wav"
         tts = gTTS(self.sentence, lang=language)
 
         if self.cloud_storage:
@@ -88,12 +92,10 @@ def text_to_speech(self, language: str, filepath: Optional[str] = None) -> str:
             tts.write_to_fp(audio_buffer)
             audio_buffer.seek(0)
 
-            s3_key = f"{Paths.AUDIO_DIR_PATH}/{dt}"
+            s3_key = f"{Paths.AUDIO_DIR_PATH}/{dt}.wav"
             s3_bucket = BucketSort(bucket=BUCKET_NAME)
             s3_path = s3_bucket.push_object_to_s3(audio_buffer.read(), s3_key)
 
-            return s3_path
-
         tts.save(filepath)
         return filepath
 
@@ -122,6 +124,8 @@ def generate_srt_file(self, total_syllable_count: int) -> str:
         Writes the sentence to a .srt subtitle file
         :param total_syllable_count: The total number of syllables in the audio
         """
+        if self.audio_duration is None:
+            self.audio_duration = self.get_audio_duration()
         syllables_per_second = self.audio_duration / total_syllable_count
         subtitle_length = 3
         words = self.sentence.split(" ")
@@ -175,15 +179,25 @@ def echogarden_generate_subtitles(self, sentence: str) -> str:
         :return: The output_file_path that the .srt file was written to if successfully generated, else None
         """
         dt = datetime.utcnow().strftime("%m-%d-%Y %H:%M:%S")
-        output_file_path = f"{base_config.BASE_DIR}/{Paths.SUBTITLE_DIR_PATH}/{dt}.srt"
+        if self.cloud_storage is False:
+            output_file_path = f"{base_config.BASE_DIR}/{Paths.SUBTITLE_DIR_PATH}/{dt}.srt"
+        else:
+            output_file_path = f"/tmp/{dt}.srt"
         file_to_execute = f"{base_config.BASE_DIR}/{Paths.NODE_SUBS_FILE_PATH}"
+        for log_path in [file_to_execute, self.audio_path]:
+            no_path = []
+            if not os.path.exists(log_path):
+                no_path.append(log_path)
+        if len(no_path) > 0:
+            raise FileNotFoundError(f"paths {no_path} do not exist")
+
         command = ["node", file_to_execute, self.audio_path, sentence, output_file_path]
         try:
             result = subprocess.run(command, check=True, capture_output=True, text=True)
         except subprocess.CalledProcessError as e:
             raise subprocess.CalledProcessError(
                 e.returncode, e.cmd, stderr=f"Command failed with exit code {e.returncode}. stderr {e.stderr}"
-            )
+            ) from e
 
         if self.cloud_storage is True:
             s3_bucket = BucketSort(bucket=BUCKET_NAME)
@@ -337,6 +351,7 @@ def google_translate(
         return translated_sentence
 
 
+@custom_logging.log_all_methods
 class ImageGenerator:
     """
     Can be used to generate and store images
@@ -438,6 +453,7 @@ def _check_valid_image_path(self):
             raise NotImplementedError
 
 
+@custom_logging.log_all_methods
 class VideoGenerator:
     """Class for generating videos"""
 

From be300e54fb2e93e17c3240ca19a342c0db6faa39 Mon Sep 17 00:00:00 2001
From: bendsouza <bendsouza2@gmail.com>
Date: Mon, 9 Dec 2024 15:12:44 +0000
Subject: [PATCH 07/14] add dockerfile for node env

---
 node/Dockerfile | 53 +++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 53 insertions(+)
 create mode 100644 node/Dockerfile

diff --git a/node/Dockerfile b/node/Dockerfile
new file mode 100644
index 0000000..3199fca
--- /dev/null
+++ b/node/Dockerfile
@@ -0,0 +1,53 @@
+FROM python:3.10-slim
+
+WORKDIR /var/task
+
+# System dependencies
+RUN apt-get update && \
+    apt-get install -y --no-install-recommends \
+    wget \
+    curl \
+    gnupg \
+    gcc \
+    g++ \
+    make \
+    python3 \
+    python3-dev \
+    python3-pip \
+    python3-venv \
+    mariadb-client \
+    libmariadb-dev \
+    libsndfile1 \
+    ffmpeg \
+    libenchant-2-2 \
+    aspell-es \
+    hunspell-es && \
+    apt-get clean && \
+    rm -rf /var/lib/apt/lists/*
+
+# Install Node.js 22 (latest version)
+RUN curl -fsSL https://deb.nodesource.com/setup_22.x | bash - && \
+    apt-get install -y nodejs && \
+    npm install -g npm@latest
+
+# Set Enchant configuration paths
+ENV ENCHANT_CONFIG_DIR=/usr/share/hunspell
+ENV ENCHANT_DATA_DIR=/usr/share/hunspell
+
+# Spanish dictionaries
+RUN mkdir -p /usr/share/hunspell && \
+    curl -o /usr/share/hunspell/es_ES.dic https://cgit.freedesktop.org/libreoffice/dictionaries/plain/es/es_ES.dic && \
+    curl -o /usr/share/hunspell/es_ES.aff https://cgit.freedesktop.org/libreoffice/dictionaries/plain/es/es_ES.aff
+
+# Node.js dependencies
+COPY node/package.json /var/task/node/
+RUN cd /var/task/node && npm install
+
+# Python dependencies
+COPY lambda-requirements.txt /var/task/requirements.txt
+RUN pip3 install --no-cache-dir -r /var/task/requirements.txt
+
+COPY . /var/task
+
+CMD ["python3", "-m", "python.lambda_handler"]
+

From 3bbb8f6566d6945b93e2225dd3d349d5c91d4d2c Mon Sep 17 00:00:00 2001
From: bendsouza <bendsouza2@gmail.com>
Date: Mon, 9 Dec 2024 15:12:52 +0000
Subject: [PATCH 08/14] comments

---
 python/Dockerfile | 1 -
 1 file changed, 1 deletion(-)

diff --git a/python/Dockerfile b/python/Dockerfile
index 9a2b7a3..65342b3 100644
--- a/python/Dockerfile
+++ b/python/Dockerfile
@@ -39,7 +39,6 @@ RUN cd /var/task/node && npm install
 COPY requirements.txt /var/task/requirements.txt
 RUN pip install --no-cache-dir -r /var/task/requirements.txt
 
-# Copy application code
 COPY . /var/task
 
 CMD ["python.lambda_handler.lambda_handler"]

From 2aa527213252bc2ab089a4229ff63f6eac35d65f Mon Sep 17 00:00:00 2001
From: bendsouza <bendsouza2@gmail.com>
Date: Mon, 9 Dec 2024 15:14:07 +0000
Subject: [PATCH 09/14] logging comments

---
 python/custom_logging.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/python/custom_logging.py b/python/custom_logging.py
index 0d6889a..3f68f5e 100644
--- a/python/custom_logging.py
+++ b/python/custom_logging.py
@@ -17,14 +17,14 @@ def get_logger(module_name: str) -> logging.Logger:
         logging.Logger: Configured logger instance.
     """
     logger = logging.getLogger(module_name)
-    if not logger.hasHandlers():  # Prevents adding handlers multiple times if reused
-        handler = logging.StreamHandler()  # Logs to stdout by default
+    if not logger.hasHandlers():
+        handler = logging.StreamHandler()
         formatter = logging.Formatter(
             "%(asctime)s - %(name)s - %(levelname)s - %(message)s"
         )
         handler.setFormatter(formatter)
         logger.addHandler(handler)
-        logger.setLevel(logging.INFO)  # Set your desired logging level here
+        logger.setLevel(logging.INFO)  
     return logger
 
 

From bf9371637683ee1432af628d576cefcaa2d31f9c Mon Sep 17 00:00:00 2001
From: bendsouza <bendsouza2@gmail.com>
Date: Mon, 9 Dec 2024 15:44:46 +0000
Subject: [PATCH 10/14] deprecate echogarden_generate_subs - use inbuilt
 subtitle generation from videogenerator class instead

---
 python/custom_logging.py |  2 +-
 python/main.py           |  1 -
 python/word_generator.py | 39 +++++++++++++++++++++++++--------------
 3 files changed, 26 insertions(+), 16 deletions(-)

diff --git a/python/custom_logging.py b/python/custom_logging.py
index 3f68f5e..8427263 100644
--- a/python/custom_logging.py
+++ b/python/custom_logging.py
@@ -24,7 +24,7 @@ def get_logger(module_name: str) -> logging.Logger:
         )
         handler.setFormatter(formatter)
         logger.addHandler(handler)
-        logger.setLevel(logging.INFO)  
+        logger.setLevel(logging.INFO)
     return logger
 
 
diff --git a/python/main.py b/python/main.py
index 8118c02..2fad9db 100644
--- a/python/main.py
+++ b/python/main.py
@@ -28,7 +28,6 @@ def process_video_and_upload(db_write_function: Optional[Callable[[Dict[str, str
         translated_sentence=audio_generator.translated_sentence,
         image_paths=image_generator.image_paths,
         audio_filepath=audio_generator.audio_path,
-        subtitles_filepath=audio_generator.sub_filepath,
         cloud_storage=True,
     )
 
diff --git a/python/word_generator.py b/python/word_generator.py
index 62d5307..d73e595 100644
--- a/python/word_generator.py
+++ b/python/word_generator.py
@@ -61,7 +61,7 @@ def __init__(self,
         )
         self.audio_duration = None
         self.audio_path = self.text_to_speech(language=self.language_to_learn)
-        self.sub_filepath = self.echogarden_generate_subtitles(sentence=self.sentence)
+        self.sub_filepath = None
 
     @property
     def word_list_path(self):
@@ -463,7 +463,7 @@ def __init__(self,
                  translated_sentence: str,
                  image_paths: List[str],
                  audio_filepath: str,
-                 subtitles_filepath: str,
+                 subtitles_filepath: Optional[str] = None,
                  cloud_storage: bool = False,
                  ):
         """
@@ -473,7 +473,7 @@ def __init__(self,
         :param translated_sentence: the sentence translated to the native language
         :param image_paths: a list of paths to images to use in the video
         :param audio_filepath: the path to the audio file
-        :param subtitles_filepath: the path to the subtitles
+        :param subtitles_filepath: the path to the subtitles file if subtitles have already been generated
         :param cloud_storage: if True generated videos and related content will be stored in S3, if False the content
         will be written locally
         """
@@ -524,7 +524,8 @@ def create_translated_subtitle_clip(
             font_size: int = 50,
             colour: str = "white",
             font: str = "Courier",
-            padding: int = 60
+            padding: int = 60,
+            text_pos: Tuple[str, str] = ("center", "top")
     ) -> CompositeVideoClip:
         """
         Creates a subtitle clip for a translated sentence with dynamically resizing background.
@@ -534,6 +535,7 @@ def create_translated_subtitle_clip(
         :param colour: The colour for the subtitles.
         :param font: The font for the text.
         :param padding: Padding for the subtitle background
+        :param text_pos: Where to place the subtitles
         :return: A CompositeVideoClip containing the timed translated subtitles.
         """
         words = translated_sentence.split()
@@ -552,7 +554,7 @@ def create_translated_subtitle_clip(
                 text=text,
                 font_size=font_size,
                 colour=colour,
-                text_pos=("center", "top"),
+                text_pos=text_pos,
                 font=font,
                 padding=padding
             ).set_start(current_time).set_duration(display_duration)
@@ -563,13 +565,21 @@ def create_translated_subtitle_clip(
         final_subtitle_clip = CompositeVideoClip(subtitle_clips)
         return final_subtitle_clip
 
-    def create_translated_subtitles_file(self, audio_duration: float) -> str:
+    def create_translated_subtitles_file(
+            self,
+            audio_duration: float,
+            words: Optional[str] = None,
+    ) -> str:
         """
         Creates a temporary SRT file for translated subtitles.
         :param audio_duration: Duration of the audio clip
+        :param words: The words to create subtitles for
         :return: Path to the created subtitles file
         """
-        words = self.translated_sentence.split()
+        if words is None:
+            words = self.translated_sentence.split()
+        else:
+            words = words.split()
         word_groups = [words[i:i + 3] for i in range(0, len(words), 3)]
 
         group_count = len(word_groups)
@@ -669,10 +679,10 @@ def generate_video(self, output_filepath: Optional[str] = None, word_font: str =
             audio_file = utils.write_bytes_to_local_temp_file(
                 bytes_object=audio_bytes, suffix=".wav", delete_file=False
             )
-            subtitle_bytes = s3_bucket.get_object_from_s3(self.subtitles_filepath)
-            subtitle_file = utils.write_bytes_to_local_temp_file(
-                bytes_object=subtitle_bytes, suffix=".srt", delete_file=False
-            )
+            # subtitle_bytes = s3_bucket.get_object_from_s3(self.subtitles_filepath)
+            # subtitle_file = utils.write_bytes_to_local_temp_file(
+            #     bytes_object=subtitle_bytes, suffix=".srt", delete_file=False
+            # )
             image_files = []
             for image_file in self.image_paths:
                 image_bytes = s3_bucket.get_object_from_s3(image_file)
@@ -682,7 +692,7 @@ def generate_video(self, output_filepath: Optional[str] = None, word_font: str =
                 image_files.append(image)
         else:
             audio_file = self.audio_filepath
-            subtitle_file = self.subtitles_filepath
+            # subtitle_file = self.subtitles_filepath
             image_files = self.image_paths
 
         audio_clip = AudioFileClip(audio_file)
@@ -698,7 +708,8 @@ def generate_video(self, output_filepath: Optional[str] = None, word_font: str =
             style='bounce'
         )
 
-        subtitles = SubtitlesClip(subtitle_file, self.create_subtitle_clip)
+        native_srt = self.create_translated_subtitles_file(audio_duration=audio_clip.duration, words=self.sentence)
+        subtitles = SubtitlesClip(native_srt, self.create_subtitle_clip)
 
         translated_srt = self.create_translated_subtitles_file(audio_clip.duration)
         translated_subtitles = SubtitlesClip(translated_srt, lambda txt: self.create_subtitle_clip(
@@ -735,7 +746,7 @@ def generate_video(self, output_filepath: Optional[str] = None, word_font: str =
                 s3_path = s3_bucket.push_object_to_s3(temp_video.read(), s3_key)
 
                 utils.remove_temp_file(audio_file)
-                utils.remove_temp_file(subtitle_file)
+                # utils.remove_temp_file(subtitle_file)
                 for tmp_image_to_remove in image_files:
                     utils.remove_temp_file(tmp_image_to_remove)
 

From 3c7171871dfef6e1883fe7fc32a6e696d3df1b95 Mon Sep 17 00:00:00 2001
From: bendsouza <bendsouza2@gmail.com>
Date: Mon, 9 Dec 2024 20:16:15 +0000
Subject: [PATCH 11/14] mypy fixes

---
 python/Dockerfile        |  8 +++++++-
 python/constants.py      |  3 ++-
 python/main.py           |  7 ++++++-
 python/word_generator.py | 33 ++++++++++++++++++---------------
 4 files changed, 33 insertions(+), 18 deletions(-)

diff --git a/python/Dockerfile b/python/Dockerfile
index 65342b3..47bed5d 100644
--- a/python/Dockerfile
+++ b/python/Dockerfile
@@ -20,9 +20,15 @@ RUN yum update -y && \
     aspell-esp \
     aspell-es \
     hunspell-es \
-    make && \
+    make \
+    liberation-sans-fonts \
+    ImageMagick && \
     yum clean all
 
+
+RUN fc-cache -f -v
+RUN fc-list | grep LiberationSans
+
 RUN mkdir -p /usr/share/hunspell && \
     curl -o /usr/share/hunspell/es_ES.dic https://cgit.freedesktop.org/libreoffice/dictionaries/plain/es/es_ES.dic && \
     curl -o /usr/share/hunspell/es_ES.aff https://cgit.freedesktop.org/libreoffice/dictionaries/plain/es/es_ES.aff
diff --git a/python/constants.py b/python/constants.py
index 9cd1850..75021e2 100644
--- a/python/constants.py
+++ b/python/constants.py
@@ -99,4 +99,5 @@ class Paths:
     VIDEO_DIR_PATH = "video"
     GOOGLE_CREDS_PATH = "google_creds.json"
     YT_TOKEN_PATH = "python/token.json"
-    PYTHON_ENV_FILE = ".env" 
+    PYTHON_ENV_FILE = ".env"
+    FONT_PATH = "/usr/share/fonts/liberation/LiberationSans-Regular.ttf"
diff --git a/python/main.py b/python/main.py
index 2fad9db..758d4e7 100644
--- a/python/main.py
+++ b/python/main.py
@@ -22,12 +22,17 @@ def process_video_and_upload(db_write_function: Optional[Callable[[Dict[str, str
     prompt = Prompts.IMAGE_GENERATOR + audio_generator.sentence
     image_generator = ImageGenerator(prompts=prompt, cloud_storage=True)
 
+    if audio_generator.cloud_storage is True:
+        audio_file = audio_generator.audio_cloud_path
+    else:
+        audio_file = audio_generator.audio_path
+
     video_generator = VideoGenerator(
         word=audio_generator.word,
         sentence=audio_generator.sentence,
         translated_sentence=audio_generator.translated_sentence,
         image_paths=image_generator.image_paths,
-        audio_filepath=audio_generator.audio_path,
+        audio_filepath=audio_file,
         cloud_storage=True,
     )
 
diff --git a/python/word_generator.py b/python/word_generator.py
index d73e595..14f0aa3 100644
--- a/python/word_generator.py
+++ b/python/word_generator.py
@@ -59,8 +59,8 @@ def __init__(self,
         self.translated_sentence = self.google_translate(
             source_language=self.language_to_learn, target_language=self.native_language
         )
-        self.audio_duration = None
-        self.audio_path = self.text_to_speech(language=self.language_to_learn)
+        self.audio_duration: Optional[float] = None
+        self.audio_path, self.audio_cloud_path = self.text_to_speech(language=self.language_to_learn)
         self.sub_filepath = None
 
     @property
@@ -74,7 +74,7 @@ def word_list_path(self, word_list_path):
         else:
             self._word_list_path = f"{base_config.BASE_DIR}/{word_list_path}"
 
-    def text_to_speech(self, language: str, filepath: Optional[str] = None) -> str:
+    def text_to_speech(self, language: str, filepath: Optional[str] = None) -> Tuple[str | None, str | None]:
         """
         Generate an audio file
         :param language: The language that the audio should be generated in
@@ -95,9 +95,11 @@ def text_to_speech(self, language: str, filepath: Optional[str] = None) -> str:
             s3_key = f"{Paths.AUDIO_DIR_PATH}/{dt}.wav"
             s3_bucket = BucketSort(bucket=BUCKET_NAME)
             s3_path = s3_bucket.push_object_to_s3(audio_buffer.read(), s3_key)
+        else:
+            s3_path = None
 
         tts.save(filepath)
-        return filepath
+        return filepath, s3_path
 
     def get_audio_duration(self) -> float:
         """
@@ -186,14 +188,14 @@ def echogarden_generate_subtitles(self, sentence: str) -> str:
         file_to_execute = f"{base_config.BASE_DIR}/{Paths.NODE_SUBS_FILE_PATH}"
         for log_path in [file_to_execute, self.audio_path]:
             no_path = []
-            if not os.path.exists(log_path):
+            if log_path is not None and not os.path.exists(log_path):
                 no_path.append(log_path)
         if len(no_path) > 0:
             raise FileNotFoundError(f"paths {no_path} do not exist")
 
         command = ["node", file_to_execute, self.audio_path, sentence, output_file_path]
         try:
-            result = subprocess.run(command, check=True, capture_output=True, text=True)
+            result = subprocess.run(command, check=True, capture_output=True, text=True)    # type: ignore[arg-type]
         except subprocess.CalledProcessError as e:
             raise subprocess.CalledProcessError(
                 e.returncode, e.cmd, stderr=f"Command failed with exit code {e.returncode}. stderr {e.stderr}"
@@ -492,7 +494,7 @@ def create_subtitle_clip(
             colour: str = "white",
             background_opacity: float = 0.7,
             text_pos: Tuple[str, str] | Tuple[int, int] | Tuple[float, float] = ("center", "center"),
-            font: str = "Courier",
+            font: str = Paths.FONT_PATH,
             padding: int = 60
     ) -> CompositeVideoClip:
         """
@@ -523,7 +525,7 @@ def create_translated_subtitle_clip(
             audio_duration: float,
             font_size: int = 50,
             colour: str = "white",
-            font: str = "Courier",
+            font: str = Paths.FONT_PATH,
             padding: int = 60,
             text_pos: Tuple[str, str] = ("center", "top")
     ) -> CompositeVideoClip:
@@ -577,10 +579,10 @@ def create_translated_subtitles_file(
         :return: Path to the created subtitles file
         """
         if words is None:
-            words = self.translated_sentence.split()
+            words_list = self.translated_sentence.split()
         else:
-            words = words.split()
-        word_groups = [words[i:i + 3] for i in range(0, len(words), 3)]
+            words_list = words.split()
+        word_groups = [words_list[i:i + 3] for i in range(0, len(words_list), 3)]
 
         group_count = len(word_groups)
         display_duration = audio_duration / group_count
@@ -605,7 +607,7 @@ def create_translated_subtitles_file(
     def create_fancy_word_clip(
             word: str,
             font_size: int = 80,
-            font: str = "Toppan-Bunkyu-Gothic-Demibold",
+            font: str = Paths.FONT_PATH,
             duration: float = 1.0,
             stroke_colour: str = "green",
             style: str = "bounce"
@@ -662,7 +664,7 @@ def create_fancy_word_clip(
 
         return final_clip
 
-    def generate_video(self, output_filepath: Optional[str] = None, word_font: str = "Courier") -> str:
+    def generate_video(self, output_filepath: Optional[str] = None, word_font: str = Paths.FONT_PATH) -> str:
         """
         Combine audio, images, word overlay and subtitles to generate and save a video
         :param output_filepath: the absolute path to store the generated video
@@ -732,12 +734,13 @@ def generate_video(self, output_filepath: Optional[str] = None, word_font: str =
 
         s3_path = None
         if self.cloud_storage is True:
-            with tempfile.NamedTemporaryFile(suffix=".mp4", delete=True) as temp_video:
+            with tempfile.NamedTemporaryFile(suffix=".mp4", dir="/tmp", delete=True) as temp_video:
                 final_video.write_videofile(
                     temp_video.name,
                     fps=24,
                     codec="libx264",
-                    audio_codec="aac"
+                    audio_codec="aac",
+                    temp_audiofile=f"/tmp/temp_audiofile.m4a",
                 )
                 temp_video.seek(0)
 

From f2abd8f7b44a0bf0acc00acb57aeedf735f66cbf Mon Sep 17 00:00:00 2001
From: bendsouza <bendsouza2@gmail.com>
Date: Mon, 9 Dec 2024 20:18:24 +0000
Subject: [PATCH 12/14] add requirements for lambda function

---
 lambda-requirements.txt | 24 ++++++++++++++++++++++++
 1 file changed, 24 insertions(+)
 create mode 100644 lambda-requirements.txt

diff --git a/lambda-requirements.txt b/lambda-requirements.txt
new file mode 100644
index 0000000..4dd4aae
--- /dev/null
+++ b/lambda-requirements.txt
@@ -0,0 +1,24 @@
+httpx==0.27.2
+google-api-python-client==2.94.0
+google-api-core==2.11.1
+openai==1.7.2
+black==23.12.1
+requests~=2.31.0
+deep-translator==1.11.4
+gTTS~=2.5.1
+moviepy~=1.0.3
+scipy==1.12.0
+numpy==1.26.3
+soundfile==0.12.1
+boto3==1.35.7
+fastapi==0.114.0
+pydantic~=2.5.3
+botocore==1.35.7
+pyenchant==3.2.2
+mypy==1.11.2
+python-dotenv==1.0.1
+google-auth==2.22.0
+google-auth-httplib2==0.1.0
+google-auth-oauthlib==1.2.1
+pillow==10.2.0
+mysqlclient==2.1.1
\ No newline at end of file

From 24fdfbb89db549509f264bcaa55c2db5434996fb Mon Sep 17 00:00:00 2001
From: bendsouza <bendsouza2@gmail.com>
Date: Mon, 9 Dec 2024 20:24:35 +0000
Subject: [PATCH 13/14] fix pytest by adding mock return value

---
 python/tests/test_functions.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/python/tests/test_functions.py b/python/tests/test_functions.py
index f33612a..24bbfa9 100644
--- a/python/tests/test_functions.py
+++ b/python/tests/test_functions.py
@@ -26,6 +26,7 @@ def setUpClass(cls):
         cls.mock_get_audio_duration = patch("python.word_generator.Audio.get_audio_duration").start()
         cls.mock_generate_srt_file = patch("python.word_generator.Audio.echogarden_generate_subtitles").start()
 
+        cls.mock_tts.return_value = ("local_path", "cloud_path")
         cls.mock_google_translator.return_value.translate.return_value = "Translated sentence"
         cls.audio = Audio(
             word_list_path="python/tests/test_word_list.txt",

From 2271be5ee02c19528a022eb7bc8681beb3a52bd0 Mon Sep 17 00:00:00 2001
From: bendsouza <bendsouza2@gmail.com>
Date: Mon, 9 Dec 2024 20:29:20 +0000
Subject: [PATCH 14/14] raise value error for none values

---
 python/main.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/python/main.py b/python/main.py
index 758d4e7..21d02a1 100644
--- a/python/main.py
+++ b/python/main.py
@@ -27,6 +27,8 @@ def process_video_and_upload(db_write_function: Optional[Callable[[Dict[str, str
     else:
         audio_file = audio_generator.audio_path
 
+    if audio_file is None:
+        raise TypeError(f"audio_file must be a string")
     video_generator = VideoGenerator(
         word=audio_generator.word,
         sentence=audio_generator.sentence,