Merge pull request #35 from bendsouza2/feature/lambda-handler

bendsouza2 · web-flow · commit dfcb34efc021 · 2024-12-01T20:42:29.000Z
Feature/lambda handler
diff --git a/.github/workflows/python-unittest.yml b/.github/workflows/python-unittest.yml
@@ -36,4 +36,4 @@ jobs:
         pytest -vvv
     - name: type check with mypy
       run: |
-        mypy -v .
+        mypy .
diff --git a/python/Dockerfile b/python/Dockerfile
@@ -0,0 +1,17 @@
+FROM public.ecr.aws/lambda/python:3.10-arm64
+
+WORKDIR /var/task
+
+# Install system dependencies
+RUN yum update -y && \
+    yum install -y wget gnupg gcc python3-devel mysql-devel mariadb-devel libsndfile ffmpeg && \
+    yum clean all
+
+# Install Python dependencies
+COPY requirements.txt /var/task/requirements.txt
+RUN pip install --no-cache-dir -r /var/task/requirements.txt
+
+# Copy application code
+COPY . /var/task
+
+CMD ["lambda_handler.lambda_handler"]
diff --git a/python/db_handler.py b/python/db_handler.py
@@ -0,0 +1,56 @@
+import os
+from typing import Dict
+
+import MySQLdb
+
+
+def write_to_db(video_details: Dict[str, str]) -> None:
+    """
+    Writes video metadata to a MySQL database using mysqlclient (MySQLdb).
+    :param video_details: Dictionary containing the data to write to the DB
+    """
+    connection = None
+    try:
+        connection = MySQLdb.connect(
+            host=os.getenv("DB_HOST"),
+            user=os.getenv("DB_USER"),
+            passwd=os.getenv("DB_PASSWORD"),
+            db=os.getenv("DB_NAME"),
+        )
+        cursor = connection.cursor()
+
+        sql_query = """
+        INSERT INTO videos (video_id, word, sentence, translated_sentence, title, description, upload_time, thumbnail_url)
+        VALUES (%s, %s, %s, %s, %s, %s, %s, %s)
+        ON DUPLICATE KEY UPDATE
+            word = VALUES(word),
+            sentence = VALUES(sentence),
+            translated_sentence = VALUES(translated_sentence),
+            title = VALUES(title),
+            description = VALUES(description),
+            upload_time = VALUES(upload_time),
+            thumbnail_url = VALUES(thumbnail_url);
+        """
+        cursor.execute(
+            sql_query,
+            (
+                video_details["video_id"],
+                video_details["word"],
+                video_details["sentence"],
+                video_details["translated_sentence"],
+                video_details["title"],
+                video_details["description"],
+                video_details["upload_time"],
+                video_details["thumbnail_url"],
+            ),
+        )
+
+        connection.commit()
+
+    except MySQLdb.Error as e:
+        print(f"Error while interacting with the database: {e}")
+        raise
+
+    finally:
+        if connection is not None:
+            connection.close()
diff --git a/python/lambda_handler.py b/python/lambda_handler.py
@@ -0,0 +1,63 @@
+import os
+import logging
+import traceback
+
+import MySQLdb
+
+from python.main import process_video_and_upload
+from python.db_handler import write_to_db
+
+logger = logging.getLogger()
+logger.setLevel(logging.INFO)
+
+
+def lambda_handler():
+    """
+    Lambda entry point to process video, upload to YouTube, and write metadata to db.
+    """
+    try:
+        required_env_vars = ["DB_HOST", "DB_USER", "DB_PASSWORD", "DB_NAME"]
+        missing_vars = [var for var in required_env_vars if not os.getenv(var)]
+        if len(missing_vars) > 0:
+            raise EnvironmentError(f"Missing required environment variables: {', '.join(missing_vars)}")
+
+        video_details = process_video_and_upload(db_write_function=write_to_db)
+
+        return {
+            "statusCode": 200,
+            "body": {
+                "message": "Video processed and uploaded successfully",
+                "video_details": video_details,
+            },
+        }
+
+    except EnvironmentError:
+        logger.error(f"Environment Error: {traceback.format_exc()}")
+        return {
+            "statusCode": 400,
+            "body": {
+                "message": "Missing required environment variables",
+                "error": traceback.format_exc(),
+            },
+        }
+
+    except MySQLdb.Error:
+        logger.error(f"MySQL Error: {traceback.format_exc()}")
+        return {
+            "statusCode": 500,
+            "body": {
+                "message": "Database error occurred while processing video",
+                "error": traceback.format_exc(),
+            },
+        }
+
+    except ValueError:
+        logger.error(f"Value Error: {traceback.format_exc()}")
+        return {
+            "statusCode": 400,
+            "body": {
+                "message": "Invalid data error occurred during processing",
+                "error": traceback.format_exc(),
+            },
+        }
+
diff --git a/python/language_verification.py b/python/language_verification.py
@@ -4,7 +4,6 @@
 from typing import Dict
 
 import requests
-import spacy
 import enchant
 
 from python.constants import URLs
@@ -43,21 +42,6 @@ def lexical_test_real_word(self, word: str) -> bool:
         else:
             return True
 
-    def spacy_real_word(self, model: str, word: str) -> bool:
-        """
-        Test a word is real using Spacy. For more info see https://spacy.io/
-        :param model: The model to use to help identify the word
-        :param word: The word to test
-        :return: True if the word exists for the given language, False if not
-        """
-
-        if model is None:
-            model = f"{self.language}_core_news_sm"
-
-        language = spacy.load(model)
-        doc = language(word)
-        return doc[0].is_alpha and not doc[0].is_stop
-
     def enchant_real_word(self, word: str) -> bool:
         """
         Test a word is real using enchant. For more info see https://pyenchant.github.io/pyenchant/install.html
diff --git a/python/main.py b/python/main.py
@@ -1,16 +1,16 @@
-from typing import Dict
+from typing import Dict, Callable, Optional
 from datetime import datetime
 
 from python.word_generator import Audio, ImageGenerator, VideoGenerator
 from python.yt_uploader import YTConnector
 from python.constants import Paths, LANGUAGE_TO_LEARN, NATIVE_LANGUAGE, Prompts
 
 
-def process_video_and_upload() -> Dict[str, str]:
+def process_video_and_upload(db_write_function: Optional[Callable[[Dict[str, str]], None]] = None) -> Dict[str, str]:
     """
-    Combines the main functionality of the project to generate audio and video for a random word
-    :return: a dictionary with the ID of the uploaded video, and the word, sentence and translated sentence that the
-    video is based on
+    Combines the main functionality to generate audio and video for a random word and upload it to YouTube.
+    Optionally writes metadata to a database using `db_write_function`.
+    :param db_write_function: Write video metadata to a RDB
     """
     audio_generator = Audio(
         word_list_path=Paths.WORD_LIST_PATH,
@@ -19,16 +19,8 @@ def process_video_and_upload() -> Dict[str, str]:
         cloud_storage=True,
     )
 
-    print(audio_generator.word)
-    print(audio_generator.sentence)
-    print(audio_generator.translated_sentence)
-
     prompt = Prompts.IMAGE_GENERATOR + audio_generator.sentence
-
-    image_generator = ImageGenerator(
-        prompts=prompt,
-        cloud_storage=True,
-    )
+    image_generator = ImageGenerator(prompts=prompt, cloud_storage=True)
 
     video_generator = VideoGenerator(
         word=audio_generator.word,
@@ -43,22 +35,17 @@ def process_video_and_upload() -> Dict[str, str]:
     video_filepath = video_generator.generate_video()
     video_metadata = video_generator.generate_video_metadata(language_code=LANGUAGE_TO_LEARN)
 
-    yt = YTConnector(
-        credentials_env=True,
-        cloud_storage=True,
-    )
+    yt = YTConnector(credentials_env=True, cloud_storage=True)
     upload_details = yt.upload_youtube_short(
         video_path=video_filepath,
-        title=video_metadata["title"],  # type: ignore[arg-type]
-        description=video_metadata["description"],  # type: ignore[arg-type]
-        tags=video_metadata["tags"]
+        title=str(video_metadata["title"]),
+        description=str(video_metadata["description"]),
+        tags=video_metadata["tags"],
     )
 
     video_id = upload_details["id"]
-    thumbnail_url = upload_details["snippet"]["thumbnails"]["default"]["url"]
     upload_time = datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%SZ")
 
-    print(video_id)
     response = {
         "video_id": video_id,
         "word": audio_generator.word,
@@ -67,6 +54,10 @@ def process_video_and_upload() -> Dict[str, str]:
         "title": video_metadata["title"],
         "description": video_metadata["description"],
         "upload_time": upload_time,
-        "thumbnail_url": thumbnail_url
+        "thumbnail_url": upload_details["snippet"]["thumbnails"]["default"]["url"],
     }
+
+    if db_write_function is True:
+        db_write_function(response)
+
     return response
diff --git a/requirements.txt b/requirements.txt
@@ -3,7 +3,6 @@ google-api-core==2.11.1
 openai==1.7.2
 black==23.12.1
 requests~=2.31.0
-types-requests==2.32.0.20240914
 deep-translator==1.11.4
 gTTS~=2.5.1
 moviepy~=1.0.3
@@ -15,12 +14,12 @@ fastapi==0.114.0
 pydantic~=2.5.3
 botocore==1.35.7
 pyenchant==3.2.2
-spacy==3.7.6
 mypy==1.11.2
 python-dotenv==1.0.1
 google-auth==2.22.0
 google-auth-httplib2==0.1.0
 google-auth-oauthlib==1.2.1
 Django==5.1.3
 djangorestframework==3.15.2
-pillow==10.2.0
+pillow==10.2.0
+mysqlclient==2.1.1