Skip to content

Commit 600aeaa

Browse files
committed
feat: add test configuration and files, update requirements, and improve Docker setup
1 parent 627af46 commit 600aeaa

23 files changed

+92
-68
lines changed

.devcontainer/devcontainer.json

Lines changed: 35 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -1,37 +1,37 @@
11

22
{
3-
"name": "WhisperX-FastAPI",
4-
"context": "..",
5-
"dockerFile": "../dockerfile",
6-
"runArgs": [
7-
"--gpus",
8-
"all"
9-
],
10-
"customizations": {
11-
"vscode": {
12-
"extensions": [
13-
"ms-python.python",
14-
"ms-python.vscode-pylance",
15-
"dbaeumer.vscode-eslint",
16-
"esbenp.prettier-vscode",
17-
"github.copilot",
18-
"ms-python.black-formatter",
19-
"ms-python.flake8",
20-
"github.vscode-github-actions",
21-
"george-alisson.html-preview-vscode",
22-
"ms-python.isort",
23-
"prettier.prettier-vscode",
24-
"ms-azuretools.vscode-docker"
25-
],
26-
"settings": {
27-
"editor.defaultFormatter": "esbenp.prettier-vscode",
28-
"[json]": {
29-
"editor.defaultFormatter": "esbenp.prettier-vscode"
30-
},
31-
"editor.formatOnSave": true
32-
}
33-
}
34-
},
35-
"postCreateCommand": "pip install -r requirements.txt",
36-
"remoteUser": "root"
37-
}
3+
"name": "WhisperX-FastAPI",
4+
"context": "..",
5+
"dockerFile": "../dockerfile",
6+
"runArgs": ["--gpus", "all"],
7+
8+
"forwardPorts": [8000],
9+
10+
"customizations": {
11+
"vscode": {
12+
"extensions": [
13+
"ms-python.python",
14+
"ms-python.vscode-pylance",
15+
"dbaeumer.vscode-eslint",
16+
"esbenp.prettier-vscode",
17+
"github.copilot",
18+
"ms-python.black-formatter",
19+
"ms-python.flake8",
20+
"github.vscode-github-actions",
21+
"george-alisson.html-preview-vscode",
22+
"ms-python.isort",
23+
"prettier.prettier-vscode",
24+
"ms-azuretools.vscode-docker"
25+
],
26+
"settings": {
27+
"editor.defaultFormatter": "esbenp.prettier-vscode",
28+
"[json]": {
29+
"editor.defaultFormatter": "esbenp.prettier-vscode"
30+
},
31+
"editor.formatOnSave": true
32+
}
33+
}
34+
},
35+
"postCreateCommand": "pip install -r requirements/dev.txt",
36+
"remoteUser": "root"
37+
}

.gitignore

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
.env
22
__pycache__
33
*.ipynb
4-
*.db
4+
*.db
5+
*.env

.vscode/settings.json

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
{
22
"python.testing.pytestArgs": [
3-
"app"
3+
"tests"
44
],
55
"python.testing.unittestEnabled": false,
66
"python.testing.pytestEnabled": true
7-
}
7+
}

app/docs/openapi.json

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
"openapi": "3.1.0",
33
"info": {
44
"title": "whisperX REST service",
5-
"description": "\n# whisperX REST Service\n\nWelcome to the whisperX RESTful API! This API provides a suite of audio processing services to enhance and analyze your audio content.\n\n## Documentation:\n\nFor detailed information on request and response formats, consult the [WhisperX Documentation](https://github.com/m-bain/whisperX).\n\n## Services:\n\nSpeech-2-Text provides a suite of audio processing services to enhance and analyze your audio content. The following services are available:\n\n1. Transcribe: Transcribe an audio/video file into text.\n2. Align: Align the transcript to the audio/video file.\n3. Diarize: Diarize an audio/video file into speakers.\n4. Combine Transcript and Diarization: Combine the transcript and diarization results.\n\n## Supported file extensions:\nAUDIO_EXTENSIONS = {'.amr', '.m4a', '.awb', '.aac', '.oga', '.wav', '.mp3', '.ogg', '.wma'}\n\nVIDEO_EXTENSIONS = {'.wmv', '.mkv', '.mp4', '.avi', '.mov'}\n\n",
5+
"description": "\n# whisperX REST Service\n\nWelcome to the whisperX RESTful API! This API provides a suite of audio processing services to enhance and analyze your audio content.\n\n## Documentation:\n\nFor detailed information on request and response formats, consult the [WhisperX Documentation](https://github.com/m-bain/whisperX).\n\n## Services:\n\nSpeech-2-Text provides a suite of audio processing services to enhance and analyze your audio content. The following services are available:\n\n1. Transcribe: Transcribe an audio/video file into text.\n2. Align: Align the transcript to the audio/video file.\n3. Diarize: Diarize an audio/video file into speakers.\n4. Combine Transcript and Diarization: Combine the transcript and diarization results.\n\n## Supported file extensions:\nAUDIO_EXTENSIONS = {'.m4a', '.awb', '.oga', '.ogg', '.amr', '.mp3', '.aac', '.wma', '.wav'}\n\nVIDEO_EXTENSIONS = {'.mp4', '.mkv', '.mov', '.avi', '.wmv'}\n\n",
66
"version": "0.0.1"
77
},
88
"paths": {
@@ -1477,7 +1477,7 @@
14771477
"schema": {
14781478
"$ref": "#/components/schemas/Device",
14791479
"description": "Device to use for PyTorch inference",
1480-
"default": "cuda"
1480+
"default": "cpu"
14811481
},
14821482
"description": "Device to use for PyTorch inference"
14831483
},
@@ -1570,7 +1570,7 @@
15701570
"schema": {
15711571
"$ref": "#/components/schemas/Device",
15721572
"description": "Device to use for PyTorch inference",
1573-
"default": "cuda"
1573+
"default": "cpu"
15741574
},
15751575
"description": "Device to use for PyTorch inference"
15761576
},

app/docs/openapi.yaml

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -36,11 +36,11 @@ info:
3636
3737
## Supported file extensions:
3838
39-
AUDIO_EXTENSIONS = {''.amr'', ''.m4a'', ''.awb'', ''.aac'', ''.oga'', ''.wav'',
40-
''.mp3'', ''.ogg'', ''.wma''}
39+
AUDIO_EXTENSIONS = {''.m4a'', ''.awb'', ''.oga'', ''.ogg'', ''.amr'', ''.mp3'',
40+
''.aac'', ''.wma'', ''.wav''}
4141
4242
43-
VIDEO_EXTENSIONS = {''.wmv'', ''.mkv'', ''.mp4'', ''.avi'', ''.mov''}
43+
VIDEO_EXTENSIONS = {''.mp4'', ''.mkv'', ''.mov'', ''.avi'', ''.wmv''}
4444
4545
4646
'
@@ -1209,7 +1209,7 @@ paths:
12091209
schema:
12101210
$ref: '#/components/schemas/Device'
12111211
description: Device to use for PyTorch inference
1212-
default: cuda
1212+
default: cpu
12131213
description: Device to use for PyTorch inference
12141214
- name: align_model
12151215
in: query
@@ -1271,7 +1271,7 @@ paths:
12711271
schema:
12721272
$ref: '#/components/schemas/Device'
12731273
description: Device to use for PyTorch inference
1274-
default: cuda
1274+
default: cpu
12751275
description: Device to use for PyTorch inference
12761276
- name: min_speakers
12771277
in: query

app/tests/.test.env

Lines changed: 0 additions & 2 deletions
This file was deleted.

app/whisperx_services.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -78,7 +78,7 @@ def transcribe_with_whisper(
7878
return result
7979

8080

81-
def diarize(audio, device, min_speakers=None, max_speakers=None):
81+
def diarize(audio, device: str = device, min_speakers=None, max_speakers=None):
8282
"""
8383
Diarize an audio file using the PyAnnotate model.
8484

dockerfile

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -29,9 +29,11 @@ RUN pip install -U pip setuptools --no-cache-dir
2929
RUN pip install torch==2.5.1 torchvision==0.20.1 torchaudio==2.5.1 -i https://download.pytorch.org/whl/cu124 --no-cache-dir
3030
RUN pip install git+https://github.com/m-bain/whisperx.git --no-cache-dir
3131

32-
COPY . .
32+
COPY requirements requirements
33+
RUN pip install --no-cache -r requirements/prod.txt
3334

34-
RUN pip install -r requirements.txt --no-cache-dir
35+
COPY app app
36+
COPY tests tests
3537

3638
EXPOSE 8000
3739
ENTRYPOINT ["gunicorn", "--bind", "0.0.0.0:8000", "--workers", "1", "--timeout", "0", "app.main:app", "-k", "uvicorn.workers.UvicornWorker"]

requirements.txt

Lines changed: 3 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,3 @@
1-
# unidecode==1.3.8
2-
uvicorn==0.32.1
3-
gunicorn==23.0.0
4-
tqdm==4.66.1
5-
python-multipart==0.0.17
6-
fastapi==0.115.5
7-
numba==0.60.0
8-
# openai-whisper
9-
python-dotenv==1.0.1
10-
pytest==8.3.3
11-
httpx==0.27.2
12-
ctranslate2==4.4.0
1+
# Included because many Paas's require a requirements.txt file in the project root
2+
# Just installs the production requirements.
3+
-r requirements/prod.txt

requirements/dev.txt

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
# Everything the developer needs in addition to the production requirements
2+
-r prod.txt
3+
4+
# Testing
5+
pytest==8.3.3
6+
7+
# Lint and code style
8+
black==24.10.0
9+
flake8-blind-except==0.2.1
10+
flake8-debugger==4.1.2
11+
flake8-docstrings==1.7.0
12+
flake8-isort==6.1.1
13+
flake8==7.1.1
14+
isort==5.13.2
15+
pep8-naming==0.14.1
16+
17+
# Pre-commit
18+
pre-commit==4.0.1

requirements/prod.txt

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
# unidecode==1.3.8
2+
uvicorn==0.32.1
3+
gunicorn==23.0.0
4+
tqdm==4.66.1
5+
python-multipart==0.0.17
6+
fastapi==0.115.5
7+
numba==0.60.0
8+
python-dotenv==1.0.1
9+
httpx==0.27.2
10+
ctranslate2==4.4.0
File renamed without changes.
File renamed without changes.

app/tests/test_all.py renamed to tests/test_all.py

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import json
2+
import os
23
import tempfile
34
import time
45

@@ -9,9 +10,8 @@
910

1011
client = TestClient(main.app)
1112

12-
import os
1313

14-
AUDIO_FILE = "app/tests/test_files/audio_en.mp3"
14+
AUDIO_FILE = "tests/test_files/audio_en.mp3"
1515
assert os.path.exists(AUDIO_FILE), f"Audio file not found: {AUDIO_FILE}"
1616

1717
# for tiny models a and the can be mixed
@@ -161,6 +161,7 @@ def combine(aligned_transcript_file, diarazition_file):
161161
return task_result.json()["result"]
162162

163163

164+
@pytest.mark.skipif(os.getenv("DEVICE") == "cpu", reason="Test requires GPU")
164165
def test_speech_to_text():
165166
assert generic_transcription("/speech-to-text") is not None
166167

@@ -170,13 +171,15 @@ def test_transcribe():
170171

171172

172173
def test_align():
173-
assert align("app/tests/test_files/transcript.json") is not None
174+
assert align("tests/test_files/transcript.json") is not None
174175

175176

177+
@pytest.mark.skipif(os.getenv("DEVICE") == "cpu", reason="Test requires GPU")
176178
def test_diarize():
177179
assert diarize() is not None
178180

179181

182+
@pytest.mark.skipif(os.getenv("DEVICE") == "cpu", reason="Test requires GPU")
180183
def test_flow():
181184
# Create temporary files for transcript, aligned transcript, and diarization
182185
with tempfile.NamedTemporaryFile(
@@ -205,15 +208,16 @@ def test_flow():
205208

206209
def test_combine():
207210
result = combine(
208-
"app/tests/test_files/aligned_transcript.json",
209-
"app/tests/test_files/diarazition.json",
211+
"tests/test_files/aligned_transcript.json",
212+
"tests/test_files/diarazition.json",
210213
)
211214

212215
assert result["segments"][0]["text"].startswith(TRANSCRIPT_RESULT_1) or result[
213216
"segments"
214217
][0]["text"].startswith(TRANSCRIPT_RESULT_2)
215218

216219

220+
@pytest.mark.skipif(os.getenv("DEVICE") == "cpu", reason="Test requires GPU")
217221
def test_speech_to_text_url():
218222
# There is sometimes issue with CUDA memory better run this test individually
219223
response = client.post(
File renamed without changes.
File renamed without changes.
File renamed without changes.

0 commit comments

Comments
 (0)