From 213a949641af96d3970a68037523b8079c8f1af0 Mon Sep 17 00:00:00 2001
From: ViseshXX <viseshajl@gmail.com>
Date: Fri, 24 May 2024 18:04:36 +0530
Subject: [PATCH 1/2] Error Handling

---
 routes.py | 177 ++++++++++++++++++++++++++++++++++++++----------------
 utils.py  |  66 ++++++++++++++------
 2 files changed, 171 insertions(+), 72 deletions(-)

diff --git a/routes.py b/routes.py
index 1c07bc5..d26c3df 100644
--- a/routes.py
+++ b/routes.py
@@ -1,72 +1,143 @@
 import base64
 import io
+import logging
 from fastapi import APIRouter, HTTPException, Depends
 from pydantic import BaseModel
-from utils import  denoise_with_rnnoise, get_error_arrays, get_pause_count, split_into_phonemes, processLP
-from schemas import TextData,audioData,PhonemesRequest, PhonemesResponse, ErrorArraysResponse
+from utils import denoise_with_rnnoise, get_error_arrays, get_pause_count, split_into_phonemes, processLP
+from schemas import TextData, audioData, PhonemesRequest, PhonemesResponse, ErrorArraysResponse
 from typing import List
 import jiwer
 import eng_to_ipa as p
 
+# Set up logging
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+
 router = APIRouter()
 
 @router.post('/getTextMatrices')
 async def compute_errors(data: TextData):
-    reference = data.reference
-    hypothesis = data.hypothesis
-    language = data.language
-
-    charOut = jiwer.process_characters(reference, hypothesis)
-    wer = jiwer.wer(reference, hypothesis)
-
-    confidence_char_list =[]
-    missing_char_list =[]
-    construct_text=""
-
-    if language == "en":
-       confidence_char_list, missing_char_list,construct_text = processLP(reference,hypothesis)
-
-    # Extract error arrays
-    error_arrays = get_error_arrays(
-        charOut.alignments, reference, hypothesis)
-
-    return {
-        "wer": wer,
-        "cer": charOut.cer,
-        "insertion": error_arrays['insertion'],
-        "insertion_count": len(error_arrays['insertion']),
-        "deletion": error_arrays['deletion'],
-        "deletion_count": len(error_arrays['deletion']),
-        "substitution": error_arrays['substitution'],
-        "substitution_count": len(error_arrays['substitution']),
-        "confidence_char_list":confidence_char_list,
-        "missing_char_list":missing_char_list,
-        "construct_text":construct_text
-    }
+    try:
+        # Validate input data
+        if not data.reference or not data.hypothesis:
+            raise HTTPException(status_code=400, detail="Reference and hypothesis texts must be provided.")
+
+        reference = data.reference
+        hypothesis = data.hypothesis
+        language = data.language
+
+        # Process character-level differences
+        try:
+            charOut = jiwer.process_characters(reference, hypothesis)
+        except Exception as e:
+            logger.error(f"Error processing characters: {str(e)}")
+            raise HTTPException(status_code=500, detail=f"Error processing characters: {str(e)}")
+
+        # Compute WER
+        try:
+            wer = jiwer.wer(reference, hypothesis)
+        except Exception as e:
+            logger.error(f"Error computing WER: {str(e)}")
+            raise HTTPException(status_code=500, detail=f"Error computing WER: {str(e)}")
+
+        confidence_char_list = []
+        missing_char_list = []
+        construct_text = ""
+
+        if language == "en":
+            try:
+                confidence_char_list, missing_char_list, construct_text = processLP(reference, hypothesis)
+            except Exception as e:
+                logger.error(f"Error processing LP: {str(e)}")
+                raise HTTPException(status_code=500, detail=f"Error processing LP: {str(e)}")
 
+        # Extract error arrays
+        try:
+            error_arrays = get_error_arrays(charOut.alignments, reference, hypothesis)
+        except Exception as e:
+            logger.error(f"Error extracting error arrays: {str(e)}")
+            raise HTTPException(status_code=500, detail=f"Error extracting error arrays: {str(e)}")
+
+        return {
+            "wer": wer,
+            "cer": charOut.cer,
+            "insertion": error_arrays['insertion'],
+            "insertion_count": len(error_arrays['insertion']),
+            "deletion": error_arrays['deletion'],
+            "deletion_count": len(error_arrays['deletion']),
+            "substitution": error_arrays['substitution'],
+            "substitution_count": len(error_arrays['substitution']),
+            "confidence_char_list": confidence_char_list,
+            "missing_char_list": missing_char_list,
+            "construct_text": construct_text
+        }
+    except HTTPException as e:
+        raise e
+    except Exception as e:
+        logger.error(f"Unexpected error: {str(e)}")
+        raise HTTPException(status_code=500, detail=f"Unexpected error: {str(e)}")
 
 @router.post("/getPhonemes", response_model=dict)
 async def get_phonemes(data: PhonemesRequest):
-    phonemesList = split_into_phonemes(p.convert(data.text))
-    return {"phonemes": phonemesList}
-
+    try:
+        phonemesList = split_into_phonemes(p.convert(data.text))
+        return {"phonemes": phonemesList}
+    except Exception as e:
+        logger.error(f"Error getting phonemes: {str(e)}")
+        raise HTTPException(status_code=500, detail=f"Error getting phonemes: {str(e)}")
 
 @router.post('/audio_processing')
 async def audio_processing(data: audioData):
-    audio_data = data.base64_string
-    audio_bytes = base64.b64decode(audio_data)
-    audio_io = io.BytesIO(audio_bytes)
-
-    pause_count = 0
-    denoised_audio_base64 = ""
-
-    if data.enablePauseCount:
-        pause_count = get_pause_count(audio_io)
-    if data.enableDenoiser:
-        denoised_audio_base64 = denoise_with_rnnoise(audio_data, data.contentType)
-        if denoised_audio_base64 is None:
-            raise HTTPException(status_code=500, detail="Error during audio denoising")
-    return {
-        "denoised_audio_base64": denoised_audio_base64,
-        "pause_count": pause_count
-    }
\ No newline at end of file
+    try:
+        # Validate input data
+        if not data.base64_string:
+            raise HTTPException(status_code=400, detail="Base64 string of audio must be provided.")
+        if not data.contentType:
+            raise HTTPException(status_code=400, detail="Content type must be specified.")
+        
+        try:
+            audio_data = data.base64_string
+            audio_bytes = base64.b64decode(audio_data)
+            audio_io = io.BytesIO(audio_bytes)
+        except Exception as e:
+            logger.error(f"Invalid base64 string: {str(e)}")
+            raise HTTPException(status_code=400, detail=f"Invalid base64 string: {str(e)}")
+
+        pause_count = 0
+        denoised_audio_base64 = ""
+
+        if data.enablePauseCount:
+            try:
+                pause_count = get_pause_count(audio_io)
+                if pause_count is None:
+                    logger.error("Error during pause count detection")
+                    raise HTTPException(status_code=500, detail="Error during pause count detection")
+            except Exception as e:
+                logger.error(f"Error during pause count detection: {str(e)}")
+                raise HTTPException(status_code=500, detail=f"Error during pause count detection: {str(e)}")
+
+        if data.enableDenoiser:
+            try:
+                denoised_audio_base64 = denoise_with_rnnoise(audio_data, data.contentType)
+                if denoised_audio_base64 is None:
+                    logger.error("Error during audio denoising")
+                    raise HTTPException(status_code=500, detail="Error during audio denoising")
+            except ValueError as e:
+                logger.error(f"Value error in denoise_with_rnnoise: {str(e)}")
+                raise HTTPException(status_code=400, detail=f"Value error in denoise_with_rnnoise: {str(e)}")
+            except RuntimeError as e:
+                logger.error(f"Runtime error in denoise_with_rnnoise: {str(e)}")
+                raise HTTPException(status_code=500, detail=f"Runtime error in denoise_with_rnnoise: {str(e)}")
+            except Exception as e:
+                logger.error(f"Unexpected error in denoise_with_rnnoise: {str(e)}")
+                raise HTTPException(status_code=500, detail=f"Unexpected error in denoise_with_rnnoise: {str(e)}")
+
+        return {
+            "denoised_audio_base64": denoised_audio_base64,
+            "pause_count": pause_count
+        }
+    except HTTPException as e:
+        raise e
+    except Exception as e:
+        logger.error(f"Unexpected error: {str(e)}")
+        raise HTTPException(status_code=500, detail=f"Unexpected error: {str(e)}")
diff --git a/utils.py b/utils.py
index 40ec951..a829058 100644
--- a/utils.py
+++ b/utils.py
@@ -19,7 +19,11 @@
 def denoise_with_rnnoise(audio_base64, content_type, padding_duration=0.1, time_stretch_factor=0.75):
     try:
         # Decode base64 to get the audio data
-        audio_data = base64.b64decode(audio_base64)
+        try:
+            audio_data = base64.b64decode(audio_base64)
+        except base64.binascii.Error as e:
+            raise ValueError(f"Invalid base64 string: {str(e)}")
+
         audio_io = io.BytesIO(audio_data)
         input_audio = audio_io.read()
 
@@ -28,40 +32,64 @@ def denoise_with_rnnoise(audio_base64, content_type, padding_duration=0.1, time_
 
         # Create the ffmpeg filter chain
         filter_chain = []
-        if content_type == 'Word' or content_type == 'word':
+        if content_type.lower() == 'word':
             filter_chain.append(f'apad=pad_dur={padding_duration}')
             filter_chain.append(f'apad=pad_dur={padding_duration}')
         filter_chain.append(f'atempo={time_stretch_factor}')
         filter_chain_str = ','.join(filter_chain)
 
         # Apply the filters and denoise
-        output, _ = (
-            ffmpeg
-            .input('pipe:')
-            .output('pipe:', format='wav', af=f'{filter_chain_str},arnndn=m={model_path}')
-            .run(input=input_audio, capture_stdout=True, capture_stderr=True)
-        )
+        try:
+            output, _ = (
+                ffmpeg
+                .input('pipe:', format='wav')
+                .output('pipe:', format='wav', af=f'{filter_chain_str},arnndn=m={model_path}')
+                .run(input=input_audio, capture_stdout=True, capture_stderr=True)
+            )
+        except ffmpeg.Error as e:
+            raise RuntimeError(f"Error during noise reduction with FFmpeg: {e.stderr.decode()}")
 
         # Convert the processed output back to base64
-        denoised_audio_base64 = base64.b64encode(output).decode('utf-8')
+        try:
+            denoised_audio_base64 = base64.b64encode(output).decode('utf-8')
+        except Exception as e:
+            raise RuntimeError(f"Error encoding output to base64: {str(e)}")
         
         # Clear cache to free memory
         del audio_data
         del audio_io
 
         return denoised_audio_base64
-    
-    except ffmpeg.Error as e:
-        print(f"Error during noise reduction: {e.stderr.decode()}")
-        return None
-                 
+
+    except ValueError as e:
+        print(f"Value error in denoise_with_rnnoise: {str(e)}")
+        raise
+    except RuntimeError as e:
+        print(f"Runtime error in denoise_with_rnnoise: {str(e)}")
+        raise
+    except Exception as e:
+        print(f"Unexpected error in denoise_with_rnnoise: {str(e)}")
+        raise
+
 def convert_to_base64(audio_data, sample_rate):
-    buffer = io.BytesIO()
-    sf.write(buffer, audio_data, sample_rate, format='wav')
-    buffer.seek(0)
-    base64_audio = base64.b64encode(buffer.read()).decode('utf-8')
-    return base64_audio
+    try:
+        buffer = io.BytesIO()
+        try:
+            sf.write(buffer, audio_data, sample_rate, format='wav')
+        except Exception as e:
+            raise RuntimeError(f"Error writing audio data to buffer: {str(e)}")
+
+        buffer.seek(0)
+        try:
+            base64_audio = base64.b64encode(buffer.read()).decode('utf-8')
+        except Exception as e:
+            raise RuntimeError(f"Error encoding buffer to base64: {str(e)}")
 
+        return base64_audio
+    except Exception as e:
+        print(f"Error in convert_to_base64: {str(e)}")
+        return {"error": str(e)}
+       
 def get_error_arrays(alignments, reference, hypothesis):
     insertion = []
     deletion = []

From e63253a4e825d0ed5e79e06b19bd7a32b0eca62b Mon Sep 17 00:00:00 2001
From: ViseshXX <viseshajl@gmail.com>
Date: Fri, 31 May 2024 17:52:40 +0530
Subject: [PATCH 2/2] Language check

---
 routes.py | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/routes.py b/routes.py
index d26c3df..4384a65 100644
--- a/routes.py
+++ b/routes.py
@@ -26,6 +26,11 @@ async def compute_errors(data: TextData):
         hypothesis = data.hypothesis
         language = data.language
 
+        # Validate language
+        allowed_languages = {"en", "ta", "te", "kn", "hi"}
+        if language not in allowed_languages:
+            raise HTTPException(status_code=400, detail=f"Unsupported language: {language}. Supported languages are: {', '.join(allowed_languages)}")
+
         # Process character-level differences
         try:
             charOut = jiwer.process_characters(reference, hypothesis)