diff --git a/README.md b/README.md index 2f79be4..479f6ac 100644 --- a/README.md +++ b/README.md @@ -76,6 +76,8 @@ WhisperX supports these model sizes: - `small`, `small.en` - `medium`, `medium.en` - `large`, `large-v1`, `large-v2`, `large-v3`, `large-v3-turbo` +- Distilled models: `distil-large-v2`, `distil-medium.en`, `distil-small.en`, `distil-large-v3` +- Custom models: [`nyrahealth/faster_CrisperWhisper`](https://github.com/nyrahealth/CrisperWhisper) Set default model in `.env` using `WHISPER_MODEL=` (default: tiny) diff --git a/app/schemas.py b/app/schemas.py index 4d53897..4a31f39 100644 --- a/app/schemas.py +++ b/app/schemas.py @@ -152,6 +152,11 @@ class WhisperModel(str, Enum): large_v2 = "large-v2" large_v3 = "large-v3" large_v3_turbo = "large-v3-turbo" + distil_large_v2 = "distil-large-v2" + distil_medium_en = "distil-medium.en" + distil_small_en = "distil-small.en" + distil_large_v3 = "distil-large-v3" + faster_crisper_whisper = "nyrahealth/faster_CrisperWhisper" class Device(str, Enum): diff --git a/app/whisperx_services.py b/app/whisperx_services.py index 1bbc44f..f693c11 100644 --- a/app/whisperx_services.py +++ b/app/whisperx_services.py @@ -71,7 +71,7 @@ def transcribe_with_whisper( logger.debug( "Loading model with config - model: %s, device: %s, compute_type: %s, threads: %d, task: %s, language: %s", - model, + model.value, device, compute_type, faster_whisper_threads, @@ -79,7 +79,7 @@ def transcribe_with_whisper( language, ) model = load_model( - model, + model.value, device, device_index=device_index, compute_type=compute_type, @@ -90,7 +90,9 @@ def transcribe_with_whisper( threads=faster_whisper_threads, ) logger.debug("Transcription model loaded successfully") - result = model.transcribe(audio=audio, batch_size=batch_size, chunk_size=chunk_size, language=language) + result = model.transcribe( + audio=audio, batch_size=batch_size, chunk_size=chunk_size, language=language + ) # Log GPU memory before cleanup if torch.cuda.is_available():