diff --git a/python/Dockerfile b/python/Dockerfile index b5aefa4..9a2b7a3 100644 --- a/python/Dockerfile +++ b/python/Dockerfile @@ -2,7 +2,10 @@ FROM public.ecr.aws/lambda/python:3.10-arm64 WORKDIR /var/task -# Install system dependencies +RUN curl -fsSL https://rpm.nodesource.com/setup_16.x | bash - && \ + yum install -y nodejs + +# Install system-level dependencies RUN yum update -y && \ yum install -y \ wget \ @@ -13,10 +16,25 @@ RUN yum update -y && \ mariadb-devel \ libsndfile \ ffmpeg \ - enchant-devel \ + enchant-devel \ + aspell-esp \ + aspell-es \ + hunspell-es \ make && \ yum clean all +RUN mkdir -p /usr/share/hunspell && \ + curl -o /usr/share/hunspell/es_ES.dic https://cgit.freedesktop.org/libreoffice/dictionaries/plain/es/es_ES.dic && \ + curl -o /usr/share/hunspell/es_ES.aff https://cgit.freedesktop.org/libreoffice/dictionaries/plain/es/es_ES.aff + +ENV ENCHANT_CONFIG_DIR=/usr/share/hunspell +ENV ENCHANT_DATA_DIR=/usr/share/hunspell + +# node.js dependencies +COPY node/package.json /var/task/node/ +RUN cd /var/task/node && npm install + + # Install Python dependencies COPY requirements.txt /var/task/requirements.txt RUN pip install --no-cache-dir -r /var/task/requirements.txt diff --git a/python/custom_logging.py b/python/custom_logging.py new file mode 100644 index 0000000..0d6889a --- /dev/null +++ b/python/custom_logging.py @@ -0,0 +1,59 @@ +from typing import Callable, Type +import logging +from functools import wraps + +logger = logging.getLogger(__name__) +logger.setLevel(logging.INFO) + + +def get_logger(module_name: str) -> logging.Logger: + """ + Creates and configures a logger for the given module. + + Args: + module_name (str): Name of the module requesting the logger. + + Returns: + logging.Logger: Configured logger instance. + """ + logger = logging.getLogger(module_name) + if not logger.hasHandlers(): # Prevents adding handlers multiple times if reused + handler = logging.StreamHandler() # Logs to stdout by default + formatter = logging.Formatter( + "%(asctime)s - %(name)s - %(levelname)s - %(message)s" + ) + handler.setFormatter(formatter) + logger.addHandler(handler) + logger.setLevel(logging.INFO) # Set your desired logging level here + return logger + + +def log_execution(func: Callable) -> Callable: + """Decorator to log the execution of a function or method.""" + @wraps(func) + def wrapper(*args, **kwargs): + logger.info(f"Entering: {func.__qualname__}") + result = func(*args, **kwargs) + logger.info(f"Exiting: {func.__qualname__}") + return result + return wrapper + + +def log_all_methods(cls: Type): + """Class decorator to log all method calls in a class.""" + for attr_name, attr_value in cls.__dict__.items(): + if isinstance(attr_value, property): + getter = log_execution(attr_value.fget) if attr_value.fget else None + setter = log_execution(attr_value.fset) if attr_value.fset else None + setattr(cls, attr_name, property(getter, setter)) + elif callable(attr_value): + if isinstance(attr_value, staticmethod): + setattr(cls, attr_name, staticmethod(log_execution(attr_value.__func__))) + elif isinstance(attr_value, classmethod): + setattr(cls, attr_name, classmethod(log_execution(attr_value.__func__))) + else: + setattr(cls, attr_name, log_execution(attr_value)) + return cls + + + diff --git a/python/s3_organiser.py b/python/s3_organiser.py index aecd172..fd06b8e 100644 --- a/python/s3_organiser.py +++ b/python/s3_organiser.py @@ -8,6 +8,7 @@ from botocore.exceptions import ClientError from python import utils +from python import custom_logging dotenv.load_dotenv() @@ -25,6 +26,7 @@ ) +@custom_logging.log_all_methods class BucketSort: """ Class for reading and writing to S3 diff --git a/python/word_generator.py b/python/word_generator.py index 24826ae..62d5307 100644 --- a/python/word_generator.py +++ b/python/word_generator.py @@ -26,12 +26,14 @@ from python.language_verification import LanguageVerification from python.s3_organiser import BucketSort from python import utils +from python import custom_logging import base_config Image.ANTIALIAS = Image.Resampling.LANCZOS # type: ignore[attr-defined] +@custom_logging.log_all_methods class Audio: def __init__(self, word_list_path: str, @@ -57,8 +59,8 @@ def __init__(self, self.translated_sentence = self.google_translate( source_language=self.language_to_learn, target_language=self.native_language ) + self.audio_duration = None self.audio_path = self.text_to_speech(language=self.language_to_learn) - self.audio_duration = self.get_audio_duration() self.sub_filepath = self.echogarden_generate_subtitles(sentence=self.sentence) @property @@ -79,8 +81,10 @@ def text_to_speech(self, language: str, filepath: Optional[str] = None) -> str: :param filepath: Optional, the filepath to save the resulting .mp3 file to """ dt = datetime.utcnow().strftime("%m-%d-%Y %H:%M:%S") - if filepath is None: + if filepath is None and self.cloud_storage is False: filepath = f"{base_config.BASE_DIR}/{Paths.AUDIO_DIR_PATH}/{dt}.wav" + elif filepath is None and self.cloud_storage is True: + filepath = f"/tmp/{dt}.wav" tts = gTTS(self.sentence, lang=language) if self.cloud_storage: @@ -88,12 +92,10 @@ def text_to_speech(self, language: str, filepath: Optional[str] = None) -> str: tts.write_to_fp(audio_buffer) audio_buffer.seek(0) - s3_key = f"{Paths.AUDIO_DIR_PATH}/{dt}" + s3_key = f"{Paths.AUDIO_DIR_PATH}/{dt}.wav" s3_bucket = BucketSort(bucket=BUCKET_NAME) s3_path = s3_bucket.push_object_to_s3(audio_buffer.read(), s3_key) - return s3_path - tts.save(filepath) return filepath @@ -122,6 +124,8 @@ def generate_srt_file(self, total_syllable_count: int) -> str: Writes the sentence to a .srt subtitle file :param total_syllable_count: The total number of syllables in the audio """ + if self.audio_duration is None: + self.audio_duration = self.get_audio_duration() syllables_per_second = self.audio_duration / total_syllable_count subtitle_length = 3 words = self.sentence.split(" ") @@ -175,15 +179,25 @@ def echogarden_generate_subtitles(self, sentence: str) -> str: :return: The output_file_path that the .srt file was written to if successfully generated, else None """ dt = datetime.utcnow().strftime("%m-%d-%Y %H:%M:%S") - output_file_path = f"{base_config.BASE_DIR}/{Paths.SUBTITLE_DIR_PATH}/{dt}.srt" + if self.cloud_storage is False: + output_file_path = f"{base_config.BASE_DIR}/{Paths.SUBTITLE_DIR_PATH}/{dt}.srt" + else: + output_file_path = f"/tmp/{dt}.srt" file_to_execute = f"{base_config.BASE_DIR}/{Paths.NODE_SUBS_FILE_PATH}" + for log_path in [file_to_execute, self.audio_path]: + no_path = [] + if not os.path.exists(log_path): + no_path.append(log_path) + if len(no_path) > 0: + raise FileNotFoundError(f"paths {no_path} do not exist") + command = ["node", file_to_execute, self.audio_path, sentence, output_file_path] try: result = subprocess.run(command, check=True, capture_output=True, text=True) except subprocess.CalledProcessError as e: raise subprocess.CalledProcessError( e.returncode, e.cmd, stderr=f"Command failed with exit code {e.returncode}. stderr {e.stderr}" - ) + ) from e if self.cloud_storage is True: s3_bucket = BucketSort(bucket=BUCKET_NAME) @@ -337,6 +351,7 @@ def google_translate( return translated_sentence +@custom_logging.log_all_methods class ImageGenerator: """ Can be used to generate and store images @@ -438,6 +453,7 @@ def _check_valid_image_path(self): raise NotImplementedError +@custom_logging.log_all_methods class VideoGenerator: """Class for generating videos"""