Merge pull request #39 from bendsouza2/bug/docker-lambda-entry

fix lambda and docker integration
bendsouza2 · Dec 9, 2024 · 0a1ed07 · 0a1ed07
2 parents 329a394 + 2271be5
commit 0a1ed07
Show file tree

Hide file tree

Showing 10 changed files with 257 additions and 41 deletions.
diff --git a/lambda-requirements.txt b/lambda-requirements.txt
@@ -0,0 +1,24 @@
+httpx==0.27.2
+google-api-python-client==2.94.0
+google-api-core==2.11.1
+openai==1.7.2
+black==23.12.1
+requests~=2.31.0
+deep-translator==1.11.4
+gTTS~=2.5.1
+moviepy~=1.0.3
+scipy==1.12.0
+numpy==1.26.3
+soundfile==0.12.1
+boto3==1.35.7
+fastapi==0.114.0
+pydantic~=2.5.3
+botocore==1.35.7
+pyenchant==3.2.2
+mypy==1.11.2
+python-dotenv==1.0.1
+google-auth==2.22.0
+google-auth-httplib2==0.1.0
+google-auth-oauthlib==1.2.1
+pillow==10.2.0
+mysqlclient==2.1.1
diff --git a/node/Dockerfile b/node/Dockerfile
@@ -0,0 +1,53 @@
+FROM python:3.10-slim
+
+WORKDIR /var/task
+
+# System dependencies
+RUN apt-get update && \
+    apt-get install -y --no-install-recommends \
+    wget \
+    curl \
+    gnupg \
+    gcc \
+    g++ \
+    make \
+    python3 \
+    python3-dev \
+    python3-pip \
+    python3-venv \
+    mariadb-client \
+    libmariadb-dev \
+    libsndfile1 \
+    ffmpeg \
+    libenchant-2-2 \
+    aspell-es \
+    hunspell-es && \
+    apt-get clean && \
+    rm -rf /var/lib/apt/lists/*
+
+# Install Node.js 22 (latest version)
+RUN curl -fsSL https://deb.nodesource.com/setup_22.x | bash - && \
+    apt-get install -y nodejs && \
+    npm install -g npm@latest
+
+# Set Enchant configuration paths
+ENV ENCHANT_CONFIG_DIR=/usr/share/hunspell
+ENV ENCHANT_DATA_DIR=/usr/share/hunspell
+
+# Spanish dictionaries
+RUN mkdir -p /usr/share/hunspell && \
+    curl -o /usr/share/hunspell/es_ES.dic https://cgit.freedesktop.org/libreoffice/dictionaries/plain/es/es_ES.dic && \
+    curl -o /usr/share/hunspell/es_ES.aff https://cgit.freedesktop.org/libreoffice/dictionaries/plain/es/es_ES.aff
+
+# Node.js dependencies
+COPY node/package.json /var/task/node/
+RUN cd /var/task/node && npm install
+
+# Python dependencies
+COPY lambda-requirements.txt /var/task/requirements.txt
+RUN pip3 install --no-cache-dir -r /var/task/requirements.txt
+
+COPY . /var/task
+
+CMD ["python3", "-m", "python.lambda_handler"]
+
diff --git a/python/Dockerfile b/python/Dockerfile
@@ -2,16 +2,49 @@ FROM public.ecr.aws/lambda/python:3.10-arm64
 
 WORKDIR /var/task
 
-# Install system dependencies
+RUN curl -fsSL https://rpm.nodesource.com/setup_16.x | bash - && \
+    yum install -y nodejs
+
+# Install system-level dependencies
 RUN yum update -y && \
-    yum install -y wget gnupg gcc python3-devel mysql-devel mariadb-devel libsndfile ffmpeg && \
+    yum install -y \
+    wget \
+    gnupg \
+    gcc \
+    python3-devel \
+    mysql-devel \
+    mariadb-devel \
+    libsndfile \
+    ffmpeg \
+    enchant-devel \
+    aspell-esp \
+    aspell-es \
+    hunspell-es \
+    make \
+    liberation-sans-fonts \
+    ImageMagick && \
     yum clean all
 
+
+RUN fc-cache -f -v
+RUN fc-list | grep LiberationSans
+
+RUN mkdir -p /usr/share/hunspell && \
+    curl -o /usr/share/hunspell/es_ES.dic https://cgit.freedesktop.org/libreoffice/dictionaries/plain/es/es_ES.dic && \
+    curl -o /usr/share/hunspell/es_ES.aff https://cgit.freedesktop.org/libreoffice/dictionaries/plain/es/es_ES.aff
+
+ENV ENCHANT_CONFIG_DIR=/usr/share/hunspell
+ENV ENCHANT_DATA_DIR=/usr/share/hunspell
+
+# node.js dependencies
+COPY node/package.json /var/task/node/
+RUN cd /var/task/node && npm install
+
+
 # Install Python dependencies
 COPY requirements.txt /var/task/requirements.txt
 RUN pip install --no-cache-dir -r /var/task/requirements.txt
 
-# Copy application code
 COPY . /var/task
 
-CMD ["lambda_handler.lambda_handler"]
+CMD ["python.lambda_handler.lambda_handler"]
diff --git a/python/constants.py b/python/constants.py
@@ -99,4 +99,5 @@ class Paths:
     VIDEO_DIR_PATH = "video"
     GOOGLE_CREDS_PATH = "google_creds.json"
     YT_TOKEN_PATH = "python/token.json"
-    PYTHON_ENV_FILE = ".env" 
+    PYTHON_ENV_FILE = ".env"
+    FONT_PATH = "/usr/share/fonts/liberation/LiberationSans-Regular.ttf"
diff --git a/python/custom_logging.py b/python/custom_logging.py
@@ -0,0 +1,59 @@
+from typing import Callable, Type
+import logging
+from functools import wraps
+
+logger = logging.getLogger(__name__)
+logger.setLevel(logging.INFO)
+
+
+def get_logger(module_name: str) -> logging.Logger:
+    """
+    Creates and configures a logger for the given module.
+
+    Args:
+        module_name (str): Name of the module requesting the logger.
+
+    Returns:
+        logging.Logger: Configured logger instance.
+    """
+    logger = logging.getLogger(module_name)
+    if not logger.hasHandlers():
+        handler = logging.StreamHandler()
+        formatter = logging.Formatter(
+            "%(asctime)s - %(name)s - %(levelname)s - %(message)s"
+        )
+        handler.setFormatter(formatter)
+        logger.addHandler(handler)
+        logger.setLevel(logging.INFO)
+    return logger
+
+
+def log_execution(func: Callable) -> Callable:
+    """Decorator to log the execution of a function or method."""
+    @wraps(func)
+    def wrapper(*args, **kwargs):
+        logger.info(f"Entering: {func.__qualname__}")
+        result = func(*args, **kwargs)
+        logger.info(f"Exiting: {func.__qualname__}")
+        return result
+    return wrapper
+
+
+def log_all_methods(cls: Type):
+    """Class decorator to log all method calls in a class."""
+    for attr_name, attr_value in cls.__dict__.items():
+        if isinstance(attr_value, property):
+            getter = log_execution(attr_value.fget) if attr_value.fget else None
+            setter = log_execution(attr_value.fset) if attr_value.fset else None
+            setattr(cls, attr_name, property(getter, setter))
+        elif callable(attr_value):
+            if isinstance(attr_value, staticmethod):
+                setattr(cls, attr_name, staticmethod(log_execution(attr_value.__func__)))
+            elif isinstance(attr_value, classmethod):
+                setattr(cls, attr_name, classmethod(log_execution(attr_value.__func__)))
+            else:
+                setattr(cls, attr_name, log_execution(attr_value))
+    return cls
+
+
+
diff --git a/python/lambda_handler.py b/python/lambda_handler.py
@@ -1,6 +1,7 @@
 import os
 import logging
 import traceback
+from typing import Dict, Any
 
 import MySQLdb
 
@@ -11,9 +12,13 @@
 logger.setLevel(logging.INFO)
 
 
-def lambda_handler():
+def lambda_handler(event: Dict[str, Any], context: Dict[str, Any]) -> Dict[str, Any]:
     """
     Lambda entry point to process video, upload to YouTube, and write metadata to db.
+    :param event: The event data passed to the lambda function
+    :param context: The context object providing runtime information about the Lambda execution, such as the function
+    name, request ID and remaining execution time.
+    :returns: A dictionary with a `statusCode` and `body` containing the result of the Lambda execution.
     """
     try:
         required_env_vars = ["DB_HOST", "DB_USER", "DB_PASSWORD", "DB_NAME"]

diff --git a/python/main.py b/python/main.py
@@ -22,13 +22,19 @@ def process_video_and_upload(db_write_function: Optional[Callable[[Dict[str, str
     prompt = Prompts.IMAGE_GENERATOR + audio_generator.sentence
     image_generator = ImageGenerator(prompts=prompt, cloud_storage=True)
 
+    if audio_generator.cloud_storage is True:
+        audio_file = audio_generator.audio_cloud_path
+    else:
+        audio_file = audio_generator.audio_path
+
+    if audio_file is None:
+        raise TypeError(f"audio_file must be a string")
     video_generator = VideoGenerator(
         word=audio_generator.word,
         sentence=audio_generator.sentence,
         translated_sentence=audio_generator.translated_sentence,
         image_paths=image_generator.image_paths,
-        audio_filepath=audio_generator.audio_path,
-        subtitles_filepath=audio_generator.sub_filepath,
+        audio_filepath=audio_file,
         cloud_storage=True,
     )
 

diff --git a/python/s3_organiser.py b/python/s3_organiser.py
@@ -8,10 +8,13 @@
 from botocore.exceptions import ClientError
 
 from python import utils
+from python import custom_logging
 
 dotenv.load_dotenv()
 
-if (public_key := os.getenv("AWS_PUBLIC_KEY") is not None) and (
+if utils.is_running_on_aws() is True:
+    session = boto3.Session()
+elif (public_key := os.getenv("AWS_PUBLIC_KEY") is not None) and (
         secret_key := os.getenv("AWS_SECRET_KEY") is not None):
     session = boto3.Session(
         aws_access_key_id=public_key,
@@ -23,6 +26,7 @@
     )
 
 
+@custom_logging.log_all_methods
 class BucketSort:
     """
     Class for reading and writing to S3

diff --git a/python/tests/test_functions.py b/python/tests/test_functions.py
@@ -26,6 +26,7 @@ def setUpClass(cls):
         cls.mock_get_audio_duration = patch("python.word_generator.Audio.get_audio_duration").start()
         cls.mock_generate_srt_file = patch("python.word_generator.Audio.echogarden_generate_subtitles").start()
 
+        cls.mock_tts.return_value = ("local_path", "cloud_path")
         cls.mock_google_translator.return_value.translate.return_value = "Translated sentence"
         cls.audio = Audio(
             word_list_path="python/tests/test_word_list.txt",