From 102a969ba98e08d69a96d585088ceb5edb6868c4 Mon Sep 17 00:00:00 2001 From: KOUNOIKE Yuusuke Date: Tue, 18 Apr 2023 21:16:29 +0000 Subject: [PATCH] =?UTF-8?q?=E6=96=87=E5=AD=97=E8=B5=B7=E3=81=93=E3=81=97?= =?UTF-8?q?=E9=80=9F=E5=BA=A6=E3=81=AE=E6=94=B9=E5=96=84?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- docker-compose/worker/run_whisper.py | 2 +- tasks/transcription_local_task.go | 2 +- whisper/run_whisper.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/docker-compose/worker/run_whisper.py b/docker-compose/worker/run_whisper.py index 60deda9..ffbb9d8 100644 --- a/docker-compose/worker/run_whisper.py +++ b/docker-compose/worker/run_whisper.py @@ -17,7 +17,7 @@ # or run on CPU with INT8 # model = WhisperModel(model_size, device="cpu", compute_type="int8") -segments, info = model.transcribe(sys.argv[2], beam_size=5, language="ja", initial_prompt=initial_prompt) +segments, info = model.transcribe(sys.argv[2], beam_size=5, language="ja", initial_prompt=initial_prompt, vad_filter=True) #print("Detected language '%s' with probability %f" % (info.language, info.language_probability)) diff --git a/tasks/transcription_local_task.go b/tasks/transcription_local_task.go index ac056dd..5bd76d3 100644 --- a/tasks/transcription_local_task.go +++ b/tasks/transcription_local_task.go @@ -77,7 +77,7 @@ func (e *ProgramTranscriberLocal) ProcessTask(ctx context.Context, t *asynq.Task } tmpFile := fmt.Sprintf("/tmp/%d.wav", p.ProgramId) - commandLine := fmt.Sprintf(`ffmpeg -hide_banner -i "%s" -vn "%s" -y`, inputFile, tmpFile) + commandLine := fmt.Sprintf(`ffmpeg -hide_banner -i "%s" -ac 1 -ar 16000 -vn -sn "%s" -y`, inputFile, tmpFile) e.logger.Info("Running split audio command", zap.String("command", commandLine)) diff --git a/whisper/run_whisper.py b/whisper/run_whisper.py index ed502a2..89aa3d4 100644 --- a/whisper/run_whisper.py +++ b/whisper/run_whisper.py @@ -18,7 +18,7 @@ # or run on CPU with INT8 # model = WhisperModel(model_size, device="cpu", compute_type="int8") -segments, info = model.transcribe(sys.argv[2], beam_size=5, language="ja", initial_prompt=initial_prompt) +segments, info = model.transcribe(sys.argv[2], beam_size=5, language="ja", initial_prompt=initial_prompt, vad_filter=True) #print("Detected language '%s' with probability %f" % (info.language, info.language_probability))