diff --git a/.github/workflows/docker-build-and-push.yaml b/.github/workflows/docker-build-and-push.yaml index 34e8622c..963ca2f2 100644 --- a/.github/workflows/docker-build-and-push.yaml +++ b/.github/workflows/docker-build-and-push.yaml @@ -16,9 +16,9 @@ jobs: dockerfile: [Dockerfile.cuda, Dockerfile.cpu] include: - dockerfile: Dockerfile.cuda - tag-prefix: cuda- + tag-suffix: -cuda - dockerfile: Dockerfile.cpu - tag-prefix: cpu- + tag-suffix: -cpu steps: - uses: actions/checkout@v4 - name: Login to Docker Hub @@ -33,7 +33,8 @@ jobs: fedirz/faster-whisper-server # https://github.com/docker/metadata-action?tab=readme-ov-file#flavor-input flavor: | - prefix=${{ matrix.tag-prefix }} + latest=false + suffix=${{ matrix.tag-suffix}} tags: | type=semver,pattern={{version}} type=semver,pattern={{major}}.{{minor}} diff --git a/Dockerfile.cpu b/Dockerfile.cpu index f7f7a8de..17d34a46 100644 --- a/Dockerfile.cpu +++ b/Dockerfile.cpu @@ -15,7 +15,7 @@ RUN poetry install --only main COPY ./faster_whisper_server ./faster_whisper_server ENTRYPOINT ["poetry", "run"] CMD ["uvicorn", "faster_whisper_server.main:app"] -ENV WHISPER_MODEL=distil-medium.en +ENV WHISPER_MODEL=medium.en ENV WHISPER_INFERENCE_DEVICE=cpu ENV WHISPER_COMPUTE_TYPE=int8 ENV UVICORN_HOST=0.0.0.0 diff --git a/README.md b/README.md index 9d21ed90..e6d75ad4 100644 --- a/README.md +++ b/README.md @@ -60,10 +60,10 @@ print(transcript.text) # If `model` isn't specified, the default model is used curl http://localhost:8000/v1/audio/transcriptions -F "file=@audio.wav" curl http://localhost:8000/v1/audio/transcriptions -F "file=@audio.mp3" -curl http://localhost:8000/v1/audio/transcriptions -F "file=@audio.wav" -F "streaming=true" -curl http://localhost:8000/v1/audio/transcriptions -F "file=@audio.wav" -F "streaming=true" -F "model=distil-large-v3" +curl http://localhost:8000/v1/audio/transcriptions -F "file=@audio.wav" -F "stream=true" +curl http://localhost:8000/v1/audio/transcriptions -F "file=@audio.wav" -F "stream=true" -F "model=distil-large-v3" # It's recommended that you always specify the language as that will reduce the transcription time -curl http://localhost:8000/v1/audio/transcriptions -F "file=@audio.wav" -F "streaming=true" -F "model=distil-large-v3" -F "language=en" +curl http://localhost:8000/v1/audio/transcriptions -F "file=@audio.wav" -F "stream=true" -F "model=distil-large-v3" -F "language=en" curl http://localhost:8000/v1/audio/translations -F "file=@audio.wav" ``` diff --git a/faster_whisper_server/config.py b/faster_whisper_server/config.py index 0768a77e..e68c7d01 100644 --- a/faster_whisper_server/config.py +++ b/faster_whisper_server/config.py @@ -163,7 +163,7 @@ class Language(enum.StrEnum): class WhisperConfig(BaseModel): - model: Model = Field(default=Model.DISTIL_MEDIUM_EN) + model: Model = Field(default=Model.MEDIUM_EN) inference_device: Device = Field(default=Device.AUTO) compute_type: Quantization = Field(default=Quantization.DEFAULT) diff --git a/faster_whisper_server/main.py b/faster_whisper_server/main.py index 2aad33d5..3ca17a64 100644 --- a/faster_whisper_server/main.py +++ b/faster_whisper_server/main.py @@ -58,7 +58,7 @@ def load_model(model_name: Model) -> WhisperModel: compute_type=config.whisper.compute_type, ) logger.info( - f"Loaded {model_name} loaded in {time.perf_counter() - start:.2f} seconds" + f"Loaded {model_name} loaded in {time.perf_counter() - start:.2f} seconds. {config.whisper.inference_device}({config.whisper.compute_type}) will be used for inference." ) models[model_name] = whisper return whisper