Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Bump whisper, clblast, add buffered output #90

Merged
merged 6 commits into from
Apr 18, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion .github/scripts/Package-Windows.ps1
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ param(
[string] $Target = 'x64',
[ValidateSet('Debug', 'RelWithDebInfo', 'Release', 'MinSizeRel')]
[string] $Configuration = 'RelWithDebInfo',
[ValidateSet('cpu', '12.2.0', '11.8.0')]
[ValidateSet('cpu', 'clblast', '12.2.0', '11.8.0')]
[string] $Cublas = 'cpu',
[switch] $BuildInstaller,
[switch] $SkipDeps
Expand Down Expand Up @@ -52,6 +52,8 @@ function Package {
# Check if $cublas is cpu or cuda
if ( $Cublas -eq 'cpu' ) {
$CudaName = 'cpu'
} elseif ( $Cublas -eq 'cblast' ) {
$CudaName = 'cblast'
} else {
$CudaName = "cuda${Cublas}"
}
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/build-project.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -246,7 +246,7 @@ jobs:
needs: check-event
strategy:
matrix:
cublas: [cpu, 12.2.0, 11.8.0]
cublas: [cpu, clblast, 12.2.0, 11.8.0]
defaults:
run:
shell: pwsh
Expand Down
1 change: 1 addition & 0 deletions .github/workflows/push.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,7 @@ jobs:

variants=(
'windows-x64-cpu;zip|exe'
'windows-x64-clblast;zip|exe'
'windows-x64-11.8.0;zip|exe'
'windows-x64-12.2.0;zip|exe'
'macos-arm64;tar.xz|pkg'
Expand Down
3 changes: 2 additions & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,7 @@ target_sources(
src/whisper-utils/whisper-processing.cpp
src/whisper-utils/whisper-utils.cpp
src/whisper-utils/silero-vad-onnx.cpp
src/translation/translation.cpp)
src/translation/translation.cpp
src/utils.cpp)

set_target_properties_plugin(${CMAKE_PROJECT_NAME} PROPERTIES OUTPUT_NAME ${_name})
5 changes: 3 additions & 2 deletions cmake/BuildCTranslate2.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -21,10 +21,11 @@ elseif(WIN32)

# check CPU_OR_CUDA environment variable
if(NOT DEFINED ENV{CPU_OR_CUDA})
message(FATAL_ERROR "Please set the CPU_OR_CUDA environment variable to either CPU or CUDA")
message(
FATAL_ERROR "Please set the CPU_OR_CUDA environment variable to either `cpu`, `clblast`, `12.2.0` or `11.8.0`")
endif()

if($ENV{CPU_OR_CUDA} STREQUAL "cpu")
if($ENV{CPU_OR_CUDA} STREQUAL "cpu" OR $ENV{CPU_OR_CUDA} STREQUAL "clblast")
FetchContent_Declare(
ctranslate2_fetch
URL https://github.com/occ-ai/obs-ai-ctranslate2-dep/releases/download/1.2.0/libctranslate2-windows-4.1.1-Release-cpu.zip
Expand Down
29 changes: 18 additions & 11 deletions cmake/BuildWhispercpp.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -14,12 +14,12 @@ if(APPLE)
endif(NOT DEFINED ENV{MACOS_ARCH})

set(WHISPER_CPP_URL
"https://github.com/occ-ai/occ-ai-dep-whispercpp/releases/download/0.0.1/whispercpp-macos-$ENV{MACOS_ARCH}-0.0.1.tar.gz"
"https://github.com/occ-ai/occ-ai-dep-whispercpp/releases/download/0.0.2/whispercpp-macos-$ENV{MACOS_ARCH}-0.0.2.tar.gz"
)
if($ENV{MACOS_ARCH} STREQUAL "x86_64")
set(WHISPER_CPP_HASH "36F39F02F999AAF157EAD3460DD00C8BDAA3D6C4A769A9E4F64E327871B4B11F")
set(WHISPER_CPP_HASH "00C308AF0BFFF7619934403A8080CC9AFC4EDAA328D7587E617150A2C6A33313")
elseif($ENV{MACOS_ARCH} STREQUAL "arm64")
set(WHISPER_CPP_HASH "6AF7BB904B03B6208B4281D44465B727FB608A32CABD1394B727937C5F4828A1")
set(WHISPER_CPP_HASH "0478E2079E07FA81BEE77506101003F4A4C8F0DF9E23757BD7E1D25DCBD1DB30")
else()
message(
FATAL_ERROR
Expand All @@ -45,24 +45,31 @@ elseif(WIN32)
if(NOT DEFINED ENV{CPU_OR_CUDA})
message(
FATAL_ERROR
"The CPU_OR_CUDA environment variable is not set. Please set it to either `cpu` or `11.8.0` or `12.2.0`")
"The CPU_OR_CUDA environment variable is not set. Please set it to either `cpu`, `clblast` or `11.8.0` or `12.2.0`"
)
endif(NOT DEFINED ENV{CPU_OR_CUDA})

set(CUDA_PREFIX $ENV{CPU_OR_CUDA})
if(NOT $ENV{CPU_OR_CUDA} STREQUAL "cpu")
set(CUDA_PREFIX "cuda$ENV{CPU_OR_CUDA}")
set(ARCH_PREFIX $ENV{CPU_OR_CUDA})
if(NOT $ENV{CPU_OR_CUDA} STREQUAL "cpu" AND NOT $ENV{CPU_OR_CUDA} STREQUAL "clblast")
set(ARCH_PREFIX "cuda$ENV{CPU_OR_CUDA}")
add_compile_definitions("LOCALVOCAL_WITH_CUDA")
elseif($ENV{CPU_OR_CUDA} STREQUAL "cpu")
add_compile_definitions("LOCALVOCAL_WITH_CPU")
else()
add_compile_definitions("LOCALVOCAL_WITH_CLBLAST")
endif()

set(WHISPER_CPP_URL
"https://github.com/occ-ai/occ-ai-dep-whispercpp/releases/download/0.0.1/whispercpp-windows-${CUDA_PREFIX}-0.0.1.zip"
"https://github.com/occ-ai/occ-ai-dep-whispercpp/releases/download/0.0.2/whispercpp-windows-${ARCH_PREFIX}-0.0.2.zip"
)
if($ENV{CPU_OR_CUDA} STREQUAL "cpu")
set(WHISPER_CPP_HASH "5261FCCD18BA52AE7ECD37617452F0514238FAB4B12713F1FCA491F4ABA170AA")
set(WHISPER_CPP_HASH "6DE628A51B9352624A1EC397231591FA3370E6BB42D9364F4F91F11DD18F77D2")
elseif($ENV{CPU_OR_CUDA} STREQUAL "clblast")
set(WHISPER_CPP_HASH "97BF58520F1818B7C9F4E996197F3097934E5E0BBA92B0B016C6B28BE9FF1642")
elseif($ENV{CPU_OR_CUDA} STREQUAL "12.2.0")
set(WHISPER_CPP_HASH "1966A6C7347FCB9529140F8097AED306F31C6DDE328836FD6498A980E20B8E6C")
set(WHISPER_CPP_HASH "48C059A3364E0AAD9FB0D4194BA554865928D22A27ECE5E3C116DC672D5D6EDE")
elseif($ENV{CPU_OR_CUDA} STREQUAL "11.8.0")
set(WHISPER_CPP_HASH "172F4021E888A89A694373AE0888C04DB99BC11F3A2633270248E03AF5AC762E")
set(WHISPER_CPP_HASH "29A5530E83896DE207F0199535CBBB24DF0D63B1373BA66139AD240BA67120EB")
else()
message(
FATAL_ERROR
Expand Down
2 changes: 2 additions & 0 deletions data/locale/ar-SA.ini
Original file line number Diff line number Diff line change
Expand Up @@ -49,3 +49,5 @@ source_language="لغة المصدر"
translate="ترجمة (⚠️ زيادة المعالجة)"
translate_add_context="الترجمة مع السياق"
whisper_translate="ترجمة إلى الإنجليزية (Whisper)"
buffer_size_msec="حجم الذاكرة المؤقتة (ملي ثانية)"
overlap_size_msec="حجم التداخل (ملي ثانية)"
2 changes: 2 additions & 0 deletions data/locale/de-DE.ini
Original file line number Diff line number Diff line change
Expand Up @@ -49,3 +49,5 @@ source_language="Quellsprache"
translate="Übersetzen (⚠️ erhöhte Verarbeitung)"
translate_add_context="Mit Kontext übersetzen"
whisper_translate="Ins Englische übersetzen (Flüstern)"
buffer_size_msec="Puffergröße (ms)"
overlap_size_msec="Überlappungsgröße (ms)"
4 changes: 3 additions & 1 deletion data/locale/en-US.ini
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ text_file_output="Text File output"
output_filename="Output filename"
whisper_model="Whisper Model"
external_model_file="External model file"
whisper_parameters="Advanced Settings"
whisper_parameters="Whisper Model Parameters"
language="Language"
whisper_sampling_method="Whisper Sampling Method"
n_threads="Number of threads"
Expand Down Expand Up @@ -49,3 +49,5 @@ source_language="Source language"
translate="Translate (⚠️ increased processing)"
translate_add_context="Translate with context"
whisper_translate="Translate to English (Whisper)"
buffer_size_msec="Buffer size (ms)"
overlap_size_msec="Overlap size (ms)"
2 changes: 2 additions & 0 deletions data/locale/es-ES.ini
Original file line number Diff line number Diff line change
Expand Up @@ -49,3 +49,5 @@ source_language="Idioma fuente"
translate="Traducir (⚠️ procesamiento aumentado)"
translate_add_context="Traducir con contexto"
whisper_translate="Traducir al inglés (Whisper)"
buffer_size_msec="Tamaño del búfer (ms)"
overlap_size_msec="Tamaño de superposición (ms)"
2 changes: 2 additions & 0 deletions data/locale/fr-FR.ini
Original file line number Diff line number Diff line change
Expand Up @@ -49,3 +49,5 @@ source_language="Langue source"
translate="Traduire (⚠️ traitement accru)"
translate_add_context="Traduire avec contexte"
whisper_translate="Traduire en anglais (Whisper)"
buffer_size_msec="Taille du tampon (ms)"
overlap_size_msec="Taille de chevauchement (ms)"
2 changes: 2 additions & 0 deletions data/locale/hi-IN.ini
Original file line number Diff line number Diff line change
Expand Up @@ -49,3 +49,5 @@ source_language="स्रोत भाषा"
translate="अनुवाद करें (⚠️ बढ़ी प्रसंस्करण)"
translate_add_context="संदर्भ के साथ अनुवाद करें"
whisper_translate="अंग्रेजी में अनुवाद करें (व्हिस्पर)"
buffer_size_msec="बफ़र आकार (ms)"
overlap_size_msec="ओवरलैप आकार (ms)"
2 changes: 2 additions & 0 deletions data/locale/ja-JP.ini
Original file line number Diff line number Diff line change
Expand Up @@ -49,3 +49,5 @@ source_language="ソース言語"
translate="翻訳 (⚠️処理増加)"
translate_add_context="コンテキスト付きで翻訳"
whisper_translate="英語に翻訳(ウィスパー)"
buffer_size_msec="バッファサイズ(ms)"
overlap_size_msec="オーバーラップサイズ(ms)"
2 changes: 2 additions & 0 deletions data/locale/ko-KR.ini
Original file line number Diff line number Diff line change
Expand Up @@ -49,3 +49,5 @@ source_language="원본 언어"
translate="번역 (⚠️ 처리 시간 증가)"
translate_add_context="컨텍스트와 함께 번역"
whisper_translate="영어로 번역 (속삭임)"
buffer_size_msec="버퍼 크기 (ms)"
overlap_size_msec="오버랩 크기 (ms)"
2 changes: 2 additions & 0 deletions data/locale/pl-PL.ini
Original file line number Diff line number Diff line change
Expand Up @@ -49,3 +49,5 @@ source_language="Język źródłowy"
translate="Tłumacz (⚠️ zwiększone przetwarzanie)"
translate_add_context="Tłumacz z kontekstem"
whisper_translate="Tłumacz na angielski (Whisper)"
buffer_size_msec="Rozmiar bufora (ms)"
overlap_size_msec="Rozmiar nakładki (ms)"
2 changes: 2 additions & 0 deletions data/locale/pt-BR.ini
Original file line number Diff line number Diff line change
Expand Up @@ -49,3 +49,5 @@ source_language="Língua de origem"
translate="Traduzir (⚠️ o processamento aumentará)"
translate_add_context="Traduzir com contexto"
whisper_translate="Traduzir para inglês (Whisper)"
buffer_size_msec="Tamanho do buffer (ms)"
overlap_size_msec="Tamanho da sobreposição (ms)"
2 changes: 2 additions & 0 deletions data/locale/ru-RU.ini
Original file line number Diff line number Diff line change
Expand Up @@ -48,3 +48,5 @@ source_language="Исходный язык"
translate="Перевести (⚠️ обработка будет увеличена)"
translate_add_context="Перевести с контекстом"
whisper_translate="Перевести на английский (Whisper)"
buffer_size_msec="Размер буфера (мс)"
overlap_size_msec="Размер перекрытия (мс)"
2 changes: 2 additions & 0 deletions data/locale/zh-CN.ini
Original file line number Diff line number Diff line change
Expand Up @@ -49,3 +49,5 @@ source_language="源语言"
translate="翻译 (⚠️ 增加处理)"
translate_add_context="带上下文翻译"
whisper_translate="翻译为英语(Whisper)"
buffer_size_msec="缓冲区大小(毫秒)"
overlap_size_msec="重叠大小(毫秒)"
118 changes: 118 additions & 0 deletions src/captions-thread.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,118 @@
#ifndef CAPTIONS_THREAD_H
#define CAPTIONS_THREAD_H

#include <queue>
#include <vector>
#include <chrono>
#include <thread>
#include <mutex>
#include <condition_variable>
#include <functional>
#include <string>

#include <obs.h>

#include "plugin-support.h"

class CaptionMonitor {
public:
// default constructor
CaptionMonitor() = default;

~CaptionMonitor()
{
{
std::lock_guard<std::mutex> lock(queueMutex);
stop = true;
}
condVar.notify_all();
workerThread.join();
}

void initialize(std::function<void(const std::string &)> callback_, size_t maxSize_,
std::chrono::seconds maxTime_)
{
this->callback = callback_;
this->maxSize = maxSize_;
this->maxTime = maxTime_;
this->initialized = true;
this->workerThread = std::thread(&CaptionMonitor::monitor, this);
}

void addWords(const std::vector<std::string> &words)
{
{
std::lock_guard<std::mutex> lock(queueMutex);
for (const auto &word : words) {
wordQueue.push_back(word);
}
this->newDataAvailable = true;
}
condVar.notify_all();
}

private:
void monitor()
{
obs_log(LOG_INFO, "CaptionMonitor::monitor");
auto startTime = std::chrono::steady_clock::now();
while (true) {
std::unique_lock<std::mutex> lock(this->queueMutex);
// wait for new data or stop signal
this->condVar.wait(lock,
[this] { return this->newDataAvailable || this->stop; });

if (this->stop) {
break;
}

if (this->wordQueue.empty()) {
continue;
}

// emit up to maxSize words from the wordQueue
std::vector<std::string> emitted;
while (!this->wordQueue.empty() && emitted.size() <= this->maxSize) {
emitted.push_back(this->wordQueue.front());
this->wordQueue.pop_front();
}
// emit the caption, joining the words with a space
std::string output;
for (const auto &word : emitted) {
output += word + " ";
}
this->callback(output);
// push back the words that were emitted, in reverse order
for (auto it = emitted.rbegin(); it != emitted.rend(); ++it) {
this->wordQueue.push_front(*it);
}

if (this->wordQueue.size() >= this->maxSize ||
std::chrono::steady_clock::now() - startTime >= this->maxTime) {
// flush the queue if it's full or we've reached the max time
size_t words_to_flush =
std::min(this->wordQueue.size(), this->maxSize);
for (size_t i = 0; i < words_to_flush; ++i) {
wordQueue.pop_front();
}
startTime = std::chrono::steady_clock::now();
}

newDataAvailable = false;
}
obs_log(LOG_INFO, "CaptionMonitor::monitor: done");
}

std::deque<std::string> wordQueue;
std::thread workerThread;
std::mutex queueMutex;
std::condition_variable condVar;
std::function<void(std::string)> callback;
size_t maxSize;
std::chrono::seconds maxTime;
bool stop;
bool initialized = false;
bool newDataAvailable = false;
};

#endif // CAPTIONS_THREAD_H
16 changes: 8 additions & 8 deletions src/model-utils/model-infos.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ std::map<std::string, ModelInfo> models_info = {{
MODEL_TYPE_TRANSCRIPTION,
{{"https://ggml.ggerganov.com/ggml-model-whisper-base-q5_1.bin",
"422F1AE452ADE6F30A004D7E5C6A43195E4433BC370BF23FAC9CC591F01A8898"}}}},
{"Whisper Base En q5 (57Mb)",
{"Whisper Base English q5 (57Mb)",
{"Whisper Base En q5",
"ggml-model-whisper-base-en-q5_1",
MODEL_TYPE_TRANSCRIPTION,
Expand All @@ -41,7 +41,7 @@ std::map<std::string, ModelInfo> models_info = {{
MODEL_TYPE_TRANSCRIPTION,
{{"https://ggml.ggerganov.com/ggml-model-whisper-base.bin",
"60ED5BC3DD14EEA856493D334349B405782DDCAF0028D4B5DF4088345FBA2EFE"}}}},
{"Whisper Base En (141Mb)",
{"Whisper Base English (141Mb)",
{"Whisper Base En",
"ggml-model-whisper-base-en",
MODEL_TYPE_TRANSCRIPTION,
Expand All @@ -59,7 +59,7 @@ std::map<std::string, ModelInfo> models_info = {{
MODEL_TYPE_TRANSCRIPTION,
{{"https://ggml.ggerganov.com/ggml-model-whisper-medium-q5_0.bin",
"19FEA4B380C3A618EC4723C3EEF2EB785FFBA0D0538CF43F8F235E7B3B34220F"}}}},
{"Whisper Medium En q5 (514Mb)",
{"Whisper Medium English q5 (514Mb)",
{"Whisper Medium En q5",
"ggml-model-whisper-medium-en-q5_0",
MODEL_TYPE_TRANSCRIPTION,
Expand All @@ -71,7 +71,7 @@ std::map<std::string, ModelInfo> models_info = {{
MODEL_TYPE_TRANSCRIPTION,
{{"https://ggml.ggerganov.com/ggml-model-whisper-small-q5_1.bin",
"AE85E4A935D7A567BD102FE55AFC16BB595BDB618E11B2FC7591BC08120411BB"}}}},
{"Whisper Small En q5 (181Mb)",
{"Whisper Small English q5 (181Mb)",
{"Whisper Small En q5",
"ggml-model-whisper-small-en-q5_1",
MODEL_TYPE_TRANSCRIPTION,
Expand All @@ -83,7 +83,7 @@ std::map<std::string, ModelInfo> models_info = {{
MODEL_TYPE_TRANSCRIPTION,
{{"https://ggml.ggerganov.com/ggml-model-whisper-small.bin",
"1BE3A9B2063867B937E64E2EC7483364A79917E157FA98C5D94B5C1FFFEA987B"}}}},
{"Whisper Small En (465Mb)",
{"Whisper Small English (465Mb)",
{"Whisper Small En",
"ggml-model-whisper-small-en",
MODEL_TYPE_TRANSCRIPTION,
Expand All @@ -101,19 +101,19 @@ std::map<std::string, ModelInfo> models_info = {{
MODEL_TYPE_TRANSCRIPTION,
{{"https://ggml.ggerganov.com/ggml-model-whisper-tiny-q5_1.bin",
"818710568DA3CA15689E31A743197B520007872FF9576237BDA97BD1B469C3D7"}}}},
{"Whisper Tiny En q5 (31Mb)",
{"Whisper Tiny English q5 (31Mb)",
{"Whisper Tiny En q5",
"ggml-model-whisper-tiny-en-q5_1",
MODEL_TYPE_TRANSCRIPTION,
{{"https://ggml.ggerganov.com/ggml-model-whisper-tiny.en-q5_1.bin",
"C77C5766F1CEF09B6B7D47F21B546CBDDD4157886B3B5D6D4F709E91E66C7C2B"}}}},
{"Whisper Tiny En q8 (42Mb)",
{"Whisper Tiny English q8 (42Mb)",
{"Whisper Tiny En q8",
"ggml-model-whisper-tiny-en-q8_0",
MODEL_TYPE_TRANSCRIPTION,
{{"https://ggml.ggerganov.com/ggml-model-whisper-tiny.en-q8_0.bin",
"5BC2B3860AA151A4C6E7BB095E1FCCE7CF12C7B020CA08DCEC0C6D018BB7DD94"}}}},
{"Whisper Tiny En (74Mb)",
{"Whisper Tiny English (74Mb)",
{"Whisper Tiny En",
"ggml-model-whisper-tiny-en",
MODEL_TYPE_TRANSCRIPTION,
Expand Down
Loading
Loading