Merge pull request #74 from richardr1126/master

Try to add AAC audio format w/ updated test
remsky · Jan 18, 2025 · dba8220 · dba8220
2 parents 57f3cf6 + d51d861
commit dba8220
Show file tree

Hide file tree

Showing 6 changed files with 31 additions and 10 deletions.
diff --git a/api/src/services/audio.py b/api/src/services/audio.py
@@ -6,6 +6,7 @@
 import scipy.io.wavfile as wavfile
 import soundfile as sf
 from loguru import logger
+from pydub import AudioSegment
 
 from ..core.config import settings
 
@@ -52,6 +53,9 @@ class AudioService:
         "flac": {
             "compression_level": 0.0,  # Light compression, still fast
         },
+        "aac": {
+            "bitrate": "192k",  # Default AAC bitrate
+        },
     }
 
     @staticmethod
@@ -144,9 +148,22 @@ def convert_audio(
                     subtype="PCM_16",
                     **settings,
                 )
-            elif output_format == "aac":
-                raise ValueError(
-                    "Format aac not currently supported. Supported formats are: wav, mp3, opus, flac, pcm."
+            elif output_format == "aac":           
+                # Convert numpy array directly to AAC using pydub
+                audio_segment = AudioSegment(
+                    normalized_audio.tobytes(), 
+                    frame_rate=sample_rate,
+                    sample_width=normalized_audio.dtype.itemsize,
+                    channels=1 if len(normalized_audio.shape) == 1 else normalized_audio.shape[1]
+                )
+
+                settings = format_settings.get("aac", {}) if format_settings else {}
+                settings = {**AudioService.DEFAULT_SETTINGS["aac"], **settings}
+
+                audio_segment.export(
+                    buffer,
+                    format="adts",  # ADTS is a common AAC container format
+                    bitrate=settings["bitrate"]
                 )
             else:
                 raise ValueError(

diff --git a/api/tests/test_audio_service.py b/api/tests/test_audio_service.py
@@ -58,14 +58,14 @@ def test_convert_to_flac(sample_audio):
     assert len(result) > 0
 
 
-def test_convert_to_aac_raises_error(sample_audio):
-    """Test that converting to AAC raises an error"""
+def test_convert_to_aac(sample_audio):
+    """Test converting to AAC format"""
     audio_data, sample_rate = sample_audio
-    with pytest.raises(
-        ValueError,
-        match="Failed to convert audio to aac: Format aac not currently supported. Supported formats are: wav, mp3, opus, flac, pcm.",
-    ):
-        AudioService.convert_audio(audio_data, sample_rate, "aac")
+    result = AudioService.convert_audio(audio_data, sample_rate, "aac")
+    assert isinstance(result, bytes)
+    assert len(result) > 0
+    # AAC files typically start with an ADTS header
+    assert result.startswith(b'\xff\xf1') or result.startswith(b'\xff\xf9')
 
 
 def test_convert_to_pcm(sample_audio):

diff --git a/docker/cpu/Dockerfile b/docker/cpu/Dockerfile
@@ -6,6 +6,7 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
     git \
     libsndfile1 \
     curl \
+    ffmpeg \
     && apt-get clean \
     && rm -rf /var/lib/apt/lists/*
 

diff --git a/docker/gpu/Dockerfile b/docker/gpu/Dockerfile
@@ -8,6 +8,7 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
     git \
     libsndfile1 \
     curl \
+    ffmpeg \
     && apt-get clean \
     && rm -rf /var/lib/apt/lists/*
 

diff --git a/docker/shared/pyproject.toml b/docker/shared/pyproject.toml
@@ -33,6 +33,7 @@ dependencies = [
     "munch==4.0.0",
     "tiktoken==0.8.0",
     "loguru==0.7.3",
+    "pydub>=0.25.1",
 ]
 
 [project.optional-dependencies]

diff --git a/pyproject.toml b/pyproject.toml
@@ -33,6 +33,7 @@ dependencies = [
     "openai>=1.59.6",
     "ebooklib>=0.18",
     "html2text>=2024.2.26",
+    "pydub>=0.25.1",
 ]
 
 [project.optional-dependencies]