pavelzbornik
diff --git a/‎.devcontainer/devcontainer.json
Lines changed: 35 additions & 35 deletions b/‎.devcontainer/devcontainer.json
Lines changed: 35 additions & 35 deletions
diff --git a/‎.gitignore
Lines changed: 2 additions & 1 deletion b/‎.gitignore
Lines changed: 2 additions & 1 deletion
diff --git a/‎.vscode/settings.json
Lines changed: 2 additions & 2 deletions b/‎.vscode/settings.json
Lines changed: 2 additions & 2 deletions
diff --git a/‎app/docs/openapi.json
Lines changed: 3 additions & 3 deletions b/‎app/docs/openapi.json
Lines changed: 3 additions & 3 deletions
diff --git a/‎app/docs/openapi.yaml
Lines changed: 5 additions & 5 deletions b/‎app/docs/openapi.yaml
Lines changed: 5 additions & 5 deletions
diff --git a/‎app/tests/.test.env
Lines changed: 0 additions & 2 deletions b/‎app/tests/.test.env
Lines changed: 0 additions & 2 deletions
diff --git a/‎app/whisperx_services.py
Lines changed: 1 addition & 1 deletion b/‎app/whisperx_services.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎dockerfile
Lines changed: 4 additions & 2 deletions b/‎dockerfile
Lines changed: 4 additions & 2 deletions
diff --git a/‎requirements.txt
Lines changed: 3 additions & 12 deletions b/‎requirements.txt
Lines changed: 3 additions & 12 deletions
diff --git a/‎requirements/dev.txt
Lines changed: 18 additions & 0 deletions b/‎requirements/dev.txt
Lines changed: 18 additions & 0 deletions
diff --git a/‎requirements/prod.txt
Lines changed: 10 additions & 0 deletions b/‎requirements/prod.txt
Lines changed: 10 additions & 0 deletions
diff --git a/‎app/tests/__init__.py renamed to ‎tests/__init__.py b/‎app/tests/__init__.py renamed to ‎tests/__init__.py
diff --git a/‎app/tests/pytest.ini renamed to ‎tests/pytest.ini b/‎app/tests/pytest.ini renamed to ‎tests/pytest.ini
diff --git a/‎app/tests/test_all.py renamed to ‎tests/test_all.py
Lines changed: 9 additions & 5 deletions b/‎app/tests/test_all.py renamed to ‎tests/test_all.py
Lines changed: 9 additions & 5 deletions
diff --git a/‎app/tests/test_files/SampleVideo_1280x720_1mb.flv renamed to ‎tests/test_files/SampleVideo_1280x720_1mb.flv b/‎app/tests/test_files/SampleVideo_1280x720_1mb.flv renamed to ‎tests/test_files/SampleVideo_1280x720_1mb.flv
diff --git a/‎app/tests/test_files/aligned_transcript.json renamed to ‎tests/test_files/aligned_transcript.json b/‎app/tests/test_files/aligned_transcript.json renamed to ‎tests/test_files/aligned_transcript.json
diff --git a/‎app/tests/test_files/audio_cs.m4a renamed to ‎tests/test_files/audio_cs.m4a b/‎app/tests/test_files/audio_cs.m4a renamed to ‎tests/test_files/audio_cs.m4a
diff --git a/‎app/tests/test_files/audio_en.mp3 renamed to ‎tests/test_files/audio_en.mp3 b/‎app/tests/test_files/audio_en.mp3 renamed to ‎tests/test_files/audio_en.mp3
diff --git a/‎app/tests/test_files/audio_en.srt renamed to ‎tests/test_files/audio_en.srt b/‎app/tests/test_files/audio_en.srt renamed to ‎tests/test_files/audio_en.srt
diff --git a/‎app/tests/test_files/diarazition.json renamed to ‎tests/test_files/diarazition.json b/‎app/tests/test_files/diarazition.json renamed to ‎tests/test_files/diarazition.json
diff --git a/‎app/tests/test_files/pexels-c-technical-6143537 (2160p).mp4 renamed to ‎tests/test_files/pexels-c-technical-6143537 (2160p).mp4 b/‎app/tests/test_files/pexels-c-technical-6143537 (2160p).mp4 renamed to ‎tests/test_files/pexels-c-technical-6143537 (2160p).mp4
diff --git a/‎app/tests/test_files/transcript-wrong.json renamed to ‎tests/test_files/transcript-wrong.json b/‎app/tests/test_files/transcript-wrong.json renamed to ‎tests/test_files/transcript-wrong.json
diff --git a/‎app/tests/test_files/transcript.json renamed to ‎tests/test_files/transcript.json b/‎app/tests/test_files/transcript.json renamed to ‎tests/test_files/transcript.json
@@ -1,37 +1,37 @@
 
 {
-    "name": "WhisperX-FastAPI",
-    "context": "..",
-    "dockerFile": "../dockerfile",
-    "runArgs": [
-        "--gpus",
-        "all"
-    ],
-    "customizations": {
-        "vscode": {
-        "extensions": [
-            "ms-python.python",
-            "ms-python.vscode-pylance",
-            "dbaeumer.vscode-eslint",
-            "esbenp.prettier-vscode",
-            "github.copilot",
-            "ms-python.black-formatter",
-            "ms-python.flake8",
-            "github.vscode-github-actions",
-            "george-alisson.html-preview-vscode",
-            "ms-python.isort",
-            "prettier.prettier-vscode",
-            "ms-azuretools.vscode-docker"
-        ],
-        "settings": {
-            "editor.defaultFormatter": "esbenp.prettier-vscode",
-            "[json]": {
-            "editor.defaultFormatter": "esbenp.prettier-vscode"
-            },
-            "editor.formatOnSave": true
-        }
-        }
-    },
-    "postCreateCommand": "pip install -r requirements.txt",
-    "remoteUser": "root"
-}
+  "name": "WhisperX-FastAPI",
+  "context": "..",
+  "dockerFile": "../dockerfile",
+  "runArgs": ["--gpus", "all"],
+
+  "forwardPorts": [8000],
+
+  "customizations": {
+    "vscode": {
+      "extensions": [
+        "ms-python.python",
+        "ms-python.vscode-pylance",
+        "dbaeumer.vscode-eslint",
+        "esbenp.prettier-vscode",
+        "github.copilot",
+        "ms-python.black-formatter",
+        "ms-python.flake8",
+        "github.vscode-github-actions",
+        "george-alisson.html-preview-vscode",
+        "ms-python.isort",
+        "prettier.prettier-vscode",
+        "ms-azuretools.vscode-docker"
+      ],
+      "settings": {
+        "editor.defaultFormatter": "esbenp.prettier-vscode",
+        "[json]": {
+          "editor.defaultFormatter": "esbenp.prettier-vscode"
+        },
+        "editor.formatOnSave": true
+      }
+    }
+  },
+  "postCreateCommand": "pip install -r requirements/dev.txt",
+  "remoteUser": "root"
+}
@@ -1,4 +1,5 @@
 .env
 __pycache__
 *.ipynb
-*.db
+*.db
+*.env
@@ -1,7 +1,7 @@
 {
     "python.testing.pytestArgs": [
-        "app"
+        "tests"
     ],
     "python.testing.unittestEnabled": false,
     "python.testing.pytestEnabled": true
-}
+}
@@ -2,7 +2,7 @@
   "openapi": "3.1.0",
   "info": {
     "title": "whisperX REST service",
-    "description": "\n# whisperX REST Service\n\nWelcome to the whisperX RESTful API! This API provides a suite of audio processing services to enhance and analyze your audio content.\n\n## Documentation:\n\nFor detailed information on request and response formats, consult the [WhisperX Documentation](https://github.com/m-bain/whisperX).\n\n## Services:\n\nSpeech-2-Text provides a suite of audio processing services to enhance and analyze your audio content. The following services are available:\n\n1. Transcribe: Transcribe an audio/video  file into text.\n2. Align: Align the transcript to the audio/video file.\n3. Diarize: Diarize an audio/video file into speakers.\n4. Combine Transcript and Diarization: Combine the transcript and diarization results.\n\n## Supported file extensions:\nAUDIO_EXTENSIONS = {'.amr', '.m4a', '.awb', '.aac', '.oga', '.wav', '.mp3', '.ogg', '.wma'}\n\nVIDEO_EXTENSIONS = {'.wmv', '.mkv', '.mp4', '.avi', '.mov'}\n\n",
+    "description": "\n# whisperX REST Service\n\nWelcome to the whisperX RESTful API! This API provides a suite of audio processing services to enhance and analyze your audio content.\n\n## Documentation:\n\nFor detailed information on request and response formats, consult the [WhisperX Documentation](https://github.com/m-bain/whisperX).\n\n## Services:\n\nSpeech-2-Text provides a suite of audio processing services to enhance and analyze your audio content. The following services are available:\n\n1. Transcribe: Transcribe an audio/video  file into text.\n2. Align: Align the transcript to the audio/video file.\n3. Diarize: Diarize an audio/video file into speakers.\n4. Combine Transcript and Diarization: Combine the transcript and diarization results.\n\n## Supported file extensions:\nAUDIO_EXTENSIONS = {'.m4a', '.awb', '.oga', '.ogg', '.amr', '.mp3', '.aac', '.wma', '.wav'}\n\nVIDEO_EXTENSIONS = {'.mp4', '.mkv', '.mov', '.avi', '.wmv'}\n\n",
     "version": "0.0.1"
   },
   "paths": {
@@ -1477,7 +1477,7 @@
             "schema": {
               "$ref": "#/components/schemas/Device",
               "description": "Device to use for PyTorch inference",
-              "default": "cuda"
+              "default": "cpu"
             },
             "description": "Device to use for PyTorch inference"
           },
@@ -1570,7 +1570,7 @@
             "schema": {
               "$ref": "#/components/schemas/Device",
               "description": "Device to use for PyTorch inference",
-              "default": "cuda"
+              "default": "cpu"
             },
             "description": "Device to use for PyTorch inference"
           },
 
@@ -36,11 +36,11 @@ info:
 
     ## Supported file extensions:
 
-    AUDIO_EXTENSIONS = {''.amr'', ''.m4a'', ''.awb'', ''.aac'', ''.oga'', ''.wav'',
-    ''.mp3'', ''.ogg'', ''.wma''}
+    AUDIO_EXTENSIONS = {''.m4a'', ''.awb'', ''.oga'', ''.ogg'', ''.amr'', ''.mp3'',
+    ''.aac'', ''.wma'', ''.wav''}
 
 
-    VIDEO_EXTENSIONS = {''.wmv'', ''.mkv'', ''.mp4'', ''.avi'', ''.mov''}
+    VIDEO_EXTENSIONS = {''.mp4'', ''.mkv'', ''.mov'', ''.avi'', ''.wmv''}
 
 
     '
@@ -1209,7 +1209,7 @@ paths:
         schema:
           $ref: '#/components/schemas/Device'
           description: Device to use for PyTorch inference
-          default: cuda
+          default: cpu
         description: Device to use for PyTorch inference
       - name: align_model
         in: query
@@ -1271,7 +1271,7 @@ paths:
         schema:
           $ref: '#/components/schemas/Device'
           description: Device to use for PyTorch inference
-          default: cuda
+          default: cpu
         description: Device to use for PyTorch inference
       - name: min_speakers
         in: query
 
@@ -78,7 +78,7 @@ def transcribe_with_whisper(
     return result
 
 
-def diarize(audio, device, min_speakers=None, max_speakers=None):
+def diarize(audio, device: str = device, min_speakers=None, max_speakers=None):
     """
     Diarize an audio file using the PyAnnotate model.
 
 
@@ -29,9 +29,11 @@ RUN pip install -U pip setuptools --no-cache-dir
 RUN pip install torch==2.5.1 torchvision==0.20.1 torchaudio==2.5.1 -i https://download.pytorch.org/whl/cu124 --no-cache-dir
 RUN pip install git+https://github.com/m-bain/whisperx.git --no-cache-dir
 
-COPY . .
+COPY requirements requirements
+RUN pip install --no-cache -r requirements/prod.txt
 
-RUN pip install -r requirements.txt --no-cache-dir
+COPY app app
+COPY tests tests
 
 EXPOSE 8000
 ENTRYPOINT ["gunicorn", "--bind", "0.0.0.0:8000", "--workers", "1", "--timeout", "0", "app.main:app", "-k", "uvicorn.workers.UvicornWorker"]
@@ -1,12 +1,3 @@
-# unidecode==1.3.8
-uvicorn==0.32.1
-gunicorn==23.0.0
-tqdm==4.66.1
-python-multipart==0.0.17
-fastapi==0.115.5
-numba==0.60.0
-# openai-whisper
-python-dotenv==1.0.1
-pytest==8.3.3
-httpx==0.27.2
-ctranslate2==4.4.0
+# Included because many Paas's require a requirements.txt file in the project root
+# Just installs the production requirements.
+-r requirements/prod.txt
@@ -0,0 +1,18 @@
+# Everything the developer needs in addition to the production requirements
+-r prod.txt
+
+# Testing
+pytest==8.3.3
+
+# Lint and code style
+black==24.10.0
+flake8-blind-except==0.2.1
+flake8-debugger==4.1.2
+flake8-docstrings==1.7.0
+flake8-isort==6.1.1
+flake8==7.1.1
+isort==5.13.2
+pep8-naming==0.14.1
+
+# Pre-commit
+pre-commit==4.0.1
@@ -0,0 +1,10 @@
+# unidecode==1.3.8
+uvicorn==0.32.1
+gunicorn==23.0.0
+tqdm==4.66.1
+python-multipart==0.0.17
+fastapi==0.115.5
+numba==0.60.0
+python-dotenv==1.0.1
+httpx==0.27.2
+ctranslate2==4.4.0
@@ -1,4 +1,5 @@
 import json
+import os
 import tempfile
 import time
 
@@ -9,9 +10,8 @@
 
 client = TestClient(main.app)
 
-import os
 
-AUDIO_FILE = "app/tests/test_files/audio_en.mp3"
+AUDIO_FILE = "tests/test_files/audio_en.mp3"
 assert os.path.exists(AUDIO_FILE), f"Audio file not found: {AUDIO_FILE}"
 
 # for tiny models a and the can be mixed
@@ -161,6 +161,7 @@ def combine(aligned_transcript_file, diarazition_file):
     return task_result.json()["result"]
 
 
+@pytest.mark.skipif(os.getenv("DEVICE") == "cpu", reason="Test requires GPU")
 def test_speech_to_text():
     assert generic_transcription("/speech-to-text") is not None
 
@@ -170,13 +171,15 @@ def test_transcribe():
 
 
 def test_align():
-    assert align("app/tests/test_files/transcript.json") is not None
+    assert align("tests/test_files/transcript.json") is not None
 
 
+@pytest.mark.skipif(os.getenv("DEVICE") == "cpu", reason="Test requires GPU")
 def test_diarize():
     assert diarize() is not None
 
 
+@pytest.mark.skipif(os.getenv("DEVICE") == "cpu", reason="Test requires GPU")
 def test_flow():
     # Create temporary files for transcript, aligned transcript, and diarization
     with tempfile.NamedTemporaryFile(
@@ -205,15 +208,16 @@ def test_flow():
 
 def test_combine():
     result = combine(
-        "app/tests/test_files/aligned_transcript.json",
-        "app/tests/test_files/diarazition.json",
+        "tests/test_files/aligned_transcript.json",
+        "tests/test_files/diarazition.json",
     )
 
     assert result["segments"][0]["text"].startswith(TRANSCRIPT_RESULT_1) or result[
         "segments"
     ][0]["text"].startswith(TRANSCRIPT_RESULT_2)
 
 
+@pytest.mark.skipif(os.getenv("DEVICE") == "cpu", reason="Test requires GPU")
 def test_speech_to_text_url():
     # There is sometimes issue with CUDA memory better run this test individually
     response = client.post(
-Original file line number
+Diff line change
@@ @@ -1,4 +1,5 @@ @@
 .env
 __pycache__
 *.ipynb
 -*.db
 +*.db
 +*.env