diff --git a/.github/workflows/build-project.yaml b/.github/workflows/build-project.yaml index cbe9fa6..91cabe1 100644 --- a/.github/workflows/build-project.yaml +++ b/.github/workflows/build-project.yaml @@ -115,9 +115,9 @@ jobs: id: ccache-cache with: path: ${{ github.workspace }}/.ccache - key: ${{ runner.os }}-ccache-${{ needs.check-event.outputs.config }} + key: ${{ runner.os }}-ccache-${{ needs.check-event.outputs.config }}-${{ matrix.architecture }} restore-keys: | - ${{ runner.os }}-ccache- + ${{ runner.os }}-ccache-${{ matrix.architecture }}- - name: Set Up Codesigning 🔑 uses: ./.github/actions/setup-macos-codesigning diff --git a/README.md b/README.md index 3c7a705..ac4be39 100644 --- a/README.md +++ b/README.md @@ -10,10 +10,10 @@ [![Discord](https://img.shields.io/discord/1200229425141252116)](https://discord.gg/KbjGU2vvUz)
Download:
- - - - + + + + @@ -85,6 +85,20 @@ Check out our other plugins: ## Download Check out the [latest releases](https://github.com/locaal-ai/obs-localvocal/releases) for downloads and install instructions. +### Available Versions + +LocalVocal is available in multiple versions to cater to different hardware configurations and operating systems. Below is a brief explanation of the different versions you can download: + +- **Windows CUDA**: This version is optimized for systems with NVIDIA GPUs and utilizes CUDA for accelerated performance. Make sure you have the latest NVidia GPU drivers installed. +- **Windows CPU**: This version is designed for systems without dedicated GPUs, running solely on the CPU. +- **Windows HIPBLAS**: This version utilizes the HIP framework from AMD that accelerates computation on AMD GPUs. (⚠️ Experimental ⚠️ Please provide feedback) +- **Windows Vulkan**: This version uses Vulkan for GPU-based acceleration across many vendors like NVidia, AMD, and Intel. (⚠️ Experimental ⚠️ Please provide feedback) +- **macOS Intel (x86_64)**: This version is for Mac computers with Intel processors. +- **macOS Apple Silicon (arm64)**: This version is optimized for Mac computers with Apple Silicon (M1, M2, etc.) processors. +- **Linux x86_64**: This version is for Linux systems with x86_64 architecture. + +Make sure to download the version that matches your system's hardware and operating system for the best performance. + ### Models The plugin ships with the Tiny.en model, and will autonomously download other Whisper models through a dropdown. There's also an option to select an external GGML Whisper model file if you have it on disk. diff --git a/buildspec.json b/buildspec.json index ad62f83..b77b017 100644 --- a/buildspec.json +++ b/buildspec.json @@ -38,7 +38,7 @@ }, "name": "obs-localvocal", "displayName": "OBS Localvocal", - "version": "0.3.8", + "version": "0.3.9", "author": "Roy Shilkrot", "website": "https://github.com/locaal-ai/obs-localvocal", "email": "roy.shil@gmail.com", diff --git a/cmake/BuildCTranslate2.cmake b/cmake/BuildCTranslate2.cmake index 0d60561..b833f32 100644 --- a/cmake/BuildCTranslate2.cmake +++ b/cmake/BuildCTranslate2.cmake @@ -50,6 +50,14 @@ elseif(WIN32) file(GLOB CT2_DLLS ${ctranslate2_fetch_SOURCE_DIR}/bin/*.dll) install(FILES ${CT2_DLLS} DESTINATION "obs-plugins/64bit") else() + # Enable ccache if available + find_program(CCACHE_PROGRAM ccache) + if(CCACHE_PROGRAM) + message(STATUS "Found ccache: ${CCACHE_PROGRAM}") + set(CMAKE_C_COMPILER_LAUNCHER ${CCACHE_PROGRAM}) + set(CMAKE_CXX_COMPILER_LAUNCHER ${CCACHE_PROGRAM}) + endif() + # build cpu_features from source set(CPU_FEATURES_VERSION "0.9.0") set(CPU_FEATURES_URL "https://github.com/google/cpu_features.git") diff --git a/cmake/BuildSentencepiece.cmake b/cmake/BuildSentencepiece.cmake index 024283e..1fa3d41 100644 --- a/cmake/BuildSentencepiece.cmake +++ b/cmake/BuildSentencepiece.cmake @@ -27,6 +27,14 @@ elseif(WIN32) else() + # Enable ccache if available + find_program(CCACHE_PROGRAM ccache) + if(CCACHE_PROGRAM) + message(STATUS "Found ccache: ${CCACHE_PROGRAM}") + set(CMAKE_C_COMPILER_LAUNCHER ${CCACHE_PROGRAM}) + set(CMAKE_CXX_COMPILER_LAUNCHER ${CCACHE_PROGRAM}) + endif() + set(SP_URL "https://github.com/google/sentencepiece.git" CACHE STRING "URL of sentencepiece repository") diff --git a/cmake/BuildWhispercpp.cmake b/cmake/BuildWhispercpp.cmake index b51332e..67d22b4 100644 --- a/cmake/BuildWhispercpp.cmake +++ b/cmake/BuildWhispercpp.cmake @@ -102,6 +102,14 @@ elseif(WIN32) file(GLOB WHISPER_DLLS ${whispercpp_fetch_SOURCE_DIR}/bin/*.dll) install(FILES ${WHISPER_DLLS} DESTINATION "obs-plugins/64bit") else() + # Enable ccache if available + find_program(CCACHE_PROGRAM ccache) + if(CCACHE_PROGRAM) + message(STATUS "Found ccache: ${CCACHE_PROGRAM}") + set(CMAKE_C_COMPILER_LAUNCHER ${CCACHE_PROGRAM}) + set(CMAKE_CXX_COMPILER_LAUNCHER ${CCACHE_PROGRAM}) + endif() + if(${CMAKE_BUILD_TYPE} STREQUAL Release OR ${CMAKE_BUILD_TYPE} STREQUAL RelWithDebInfo) set(Whispercpp_BUILD_TYPE Release) else() diff --git a/data/locale/en-US.ini b/data/locale/en-US.ini index 0d326fb..a2827fd 100644 --- a/data/locale/en-US.ini +++ b/data/locale/en-US.ini @@ -131,4 +131,8 @@ Deepl-Translate="Deepl" Bing-Translate="Bing Translate" OpenAI-Translate="OpenAI" Claude-Translate="Claude" +API-Translate="Custom API" translate_cloud_deepl_free="Use Deepl Free API Endpoint" +translate_cloud_endpoint="API Endpoint" +translate_cloud_body="API Body" +translate_cloud_response_json_path="Response JSON Path" diff --git a/src/transcription-filter-callbacks.cpp b/src/transcription-filter-callbacks.cpp index 98be9c7..044a5bd 100644 --- a/src/transcription-filter-callbacks.cpp +++ b/src/transcription-filter-callbacks.cpp @@ -91,7 +91,8 @@ void send_sentence_to_cloud_translation_async(const std::string &sentence, gf->last_text_for_cloud_translation = sentence; if (gf->translate_cloud && !sentence.empty()) { obs_log(gf->log_level, "Translating text with cloud provider %s. %s -> %s", - gf->translate_cloud_provider.c_str(), source_language.c_str(), + gf->translate_cloud_config.provider.c_str(), + source_language.c_str(), gf->translate_cloud_target_language.c_str()); std::string translated_text; if (sentence == last_text) { @@ -99,14 +100,8 @@ void send_sentence_to_cloud_translation_async(const std::string &sentence, callback(gf->last_text_cloud_translation); return; } - CloudTranslatorConfig config; - config.provider = gf->translate_cloud_provider; - config.access_key = gf->translate_cloud_api_key; - config.secret_key = gf->translate_cloud_secret_key; - config.free = gf->translate_cloud_deepl_free; - config.region = gf->translate_cloud_region; - - translated_text = translate_cloud(config, sentence, + + translated_text = translate_cloud(gf->translate_cloud_config, sentence, gf->translate_cloud_target_language, source_language); if (!translated_text.empty()) { diff --git a/src/transcription-filter-data.h b/src/transcription-filter-data.h index 2134e1c..8201c50 100644 --- a/src/transcription-filter-data.h +++ b/src/transcription-filter-data.h @@ -19,6 +19,7 @@ #include "whisper-utils/silero-vad-onnx.h" #include "whisper-utils/whisper-processing.h" #include "whisper-utils/token-buffer-thread.h" +#include "translation/cloud-translation/translation-cloud.h" #define MAX_PREPROC_CHANNELS 10 @@ -92,16 +93,12 @@ struct transcription_filter_data { // Cloud translation options bool translate_cloud = false; - std::string translate_cloud_provider; + CloudTranslatorConfig translate_cloud_config; std::string translate_cloud_target_language; std::string translate_cloud_output; - std::string translate_cloud_api_key; - std::string translate_cloud_secret_key; bool translate_cloud_only_full_sentences = true; std::string last_text_for_cloud_translation; std::string last_text_cloud_translation; - bool translate_cloud_deepl_free; - std::string translate_cloud_region; // Transcription context sentences int n_context_sentences; diff --git a/src/transcription-filter-properties.cpp b/src/transcription-filter-properties.cpp index cce2666..70c7f1e 100644 --- a/src/transcription-filter-properties.cpp +++ b/src/transcription-filter-properties.cpp @@ -50,6 +50,9 @@ bool translation_cloud_provider_selection_callback(obs_properties_t *props, obs_ { UNUSED_PARAMETER(p); const char *provider = obs_data_get_string(s, "translate_cloud_provider"); + // show the access key for all except the custom provider + obs_property_set_visible(obs_properties_get(props, "translate_cloud_api_key"), + strcmp(provider, "api") != 0); obs_property_set_visible(obs_properties_get(props, "translate_cloud_deepl_free"), strcmp(provider, "deepl") == 0); // show the secret key input for the papago provider only @@ -58,6 +61,14 @@ bool translation_cloud_provider_selection_callback(obs_properties_t *props, obs_ // show the region input for the azure provider only obs_property_set_visible(obs_properties_get(props, "translate_cloud_region"), strcmp(provider, "azure") == 0); + // show the endpoint and body input for the custom provider only + obs_property_set_visible(obs_properties_get(props, "translate_cloud_endpoint"), + strcmp(provider, "api") == 0); + obs_property_set_visible(obs_properties_get(props, "translate_cloud_body"), + strcmp(provider, "api") == 0); + // show the response json path input for the custom provider only + obs_property_set_visible(obs_properties_get(props, "translate_cloud_response_json_path"), + strcmp(provider, "api") == 0); return true; } @@ -67,10 +78,12 @@ bool translation_cloud_options_callback(obs_properties_t *props, obs_property_t UNUSED_PARAMETER(property); // Show/Hide the cloud translation group options const bool translate_enabled = obs_data_get_bool(settings, "translate_cloud"); - for (const auto &prop : {"translate_cloud_provider", "translate_cloud_target_language", - "translate_cloud_output", "translate_cloud_api_key", - "translate_cloud_only_full_sentences", - "translate_cloud_secret_key", "translate_cloud_deepl_free"}) { + for (const auto &prop : + {"translate_cloud_provider", "translate_cloud_target_language", + "translate_cloud_output", "translate_cloud_api_key", + "translate_cloud_only_full_sentences", "translate_cloud_secret_key", + "translate_cloud_deepl_free", "translate_cloud_region", "translate_cloud_endpoint", + "translate_cloud_body", "translate_cloud_response_json_path"}) { obs_property_set_visible(obs_properties_get(props, prop), translate_enabled); } if (translate_enabled) { @@ -259,6 +272,7 @@ void add_translation_cloud_group_properties(obs_properties_t *ppts) "openai"); obs_property_list_add_string(prop_translate_cloud_provider, MT_("Claude-Translate"), "claude"); + obs_property_list_add_string(prop_translate_cloud_provider, MT_("API-Translate"), "api"); // add callback to show/hide the free API option for deepl obs_property_set_modified_callback(prop_translate_cloud_provider, @@ -298,6 +312,16 @@ void add_translation_cloud_group_properties(obs_properties_t *ppts) // add translate_cloud_region for azure obs_properties_add_text(translation_cloud_group, "translate_cloud_region", MT_("translate_cloud_region"), OBS_TEXT_DEFAULT); + + // add input for API endpoint + obs_properties_add_text(translation_cloud_group, "translate_cloud_endpoint", + MT_("translate_cloud_endpoint"), OBS_TEXT_DEFAULT); + // add input for API body + obs_properties_add_text(translation_cloud_group, "translate_cloud_body", + MT_("translate_cloud_body"), OBS_TEXT_MULTILINE); + // add input for json response path + obs_properties_add_text(translation_cloud_group, "translate_cloud_response_json_path", + MT_("translate_cloud_response_json_path"), OBS_TEXT_DEFAULT); } void add_translation_group_properties(obs_properties_t *ppts) @@ -667,6 +691,12 @@ void transcription_filter_defaults(obs_data_t *s) obs_data_set_default_string(s, "translate_cloud_secret_key", ""); obs_data_set_default_bool(s, "translate_cloud_deepl_free", true); obs_data_set_default_string(s, "translate_cloud_region", "eastus"); + obs_data_set_default_string(s, "translate_cloud_endpoint", + "http://localhost:5000/translate"); + obs_data_set_default_string( + s, "translate_cloud_body", + "{\n\t\"text\":\"{{sentence}}\",\n\t\"target\":\"{{target_language}}\"\n}"); + obs_data_set_default_string(s, "translate_cloud_response_json_path", "translations.0.text"); // Whisper parameters apply_whisper_params_defaults_on_settings(s); diff --git a/src/transcription-filter.cpp b/src/transcription-filter.cpp index ebd765d..0f802d4 100644 --- a/src/transcription-filter.cpp +++ b/src/transcription-filter.cpp @@ -333,16 +333,21 @@ void transcription_filter_update(void *data, obs_data_t *s) } gf->translate_cloud = obs_data_get_bool(s, "translate_cloud"); - gf->translate_cloud_provider = obs_data_get_string(s, "translate_cloud_provider"); + gf->translate_cloud_config.provider = obs_data_get_string(s, "translate_cloud_provider"); gf->translate_cloud_target_language = obs_data_get_string(s, "translate_cloud_target_language"); gf->translate_cloud_output = obs_data_get_string(s, "translate_cloud_output"); gf->translate_cloud_only_full_sentences = obs_data_get_bool(s, "translate_cloud_only_full_sentences"); - gf->translate_cloud_api_key = obs_data_get_string(s, "translate_cloud_api_key"); - gf->translate_cloud_secret_key = obs_data_get_string(s, "translate_cloud_secret_key"); - gf->translate_cloud_deepl_free = obs_data_get_bool(s, "translate_cloud_deepl_free"); - gf->translate_cloud_region = obs_data_get_string(s, "translate_cloud_region"); + gf->translate_cloud_config.access_key = obs_data_get_string(s, "translate_cloud_api_key"); + gf->translate_cloud_config.secret_key = + obs_data_get_string(s, "translate_cloud_secret_key"); + gf->translate_cloud_config.free = obs_data_get_bool(s, "translate_cloud_deepl_free"); + gf->translate_cloud_config.region = obs_data_get_string(s, "translate_cloud_region"); + gf->translate_cloud_config.endpoint = obs_data_get_string(s, "translate_cloud_endpoint"); + gf->translate_cloud_config.body = obs_data_get_string(s, "translate_cloud_body"); + gf->translate_cloud_config.response_json_path = + obs_data_get_string(s, "translate_cloud_response_json_path"); obs_log(gf->log_level, "update text source"); // update the text source diff --git a/src/translation/cloud-translation/CMakeLists.txt b/src/translation/cloud-translation/CMakeLists.txt index d6bb1af..70e39bc 100644 --- a/src/translation/cloud-translation/CMakeLists.txt +++ b/src/translation/cloud-translation/CMakeLists.txt @@ -5,6 +5,7 @@ target_sources( ${CMAKE_SOURCE_DIR}/src/translation/cloud-translation/azure.cpp ${CMAKE_SOURCE_DIR}/src/translation/cloud-translation/claude.cpp ${CMAKE_SOURCE_DIR}/src/translation/cloud-translation/curl-helper.cpp + ${CMAKE_SOURCE_DIR}/src/translation/cloud-translation/custom-api.cpp ${CMAKE_SOURCE_DIR}/src/translation/cloud-translation/deepl.cpp ${CMAKE_SOURCE_DIR}/src/translation/cloud-translation/google-cloud.cpp ${CMAKE_SOURCE_DIR}/src/translation/cloud-translation/openai.cpp diff --git a/src/translation/cloud-translation/custom-api.cpp b/src/translation/cloud-translation/custom-api.cpp new file mode 100644 index 0000000..e5a9453 --- /dev/null +++ b/src/translation/cloud-translation/custom-api.cpp @@ -0,0 +1,112 @@ +#include "custom-api.h" +#include "curl-helper.h" +#include +#include +#include +#include + +using json = nlohmann::json; + +CustomApiTranslator::CustomApiTranslator(const std::string &endpoint, + const std::string &body_template, + const std::string &response_json_path) + : endpoint_(endpoint), + body_template_(body_template), + response_json_path_(response_json_path), + curl_helper_(std::make_unique()) +{ +} + +CustomApiTranslator::~CustomApiTranslator() = default; + +std::string CustomApiTranslator::translate(const std::string &text, const std::string &target_lang, + const std::string &source_lang) +{ + // first encode text to JSON compatible string + nlohmann::json tmp = text; + std::string textStr = tmp.dump(); + // remove '"' from the beginning and end of the string + textStr = textStr.substr(1, textStr.size() - 2); + // then replace the placeholders in the body template + std::unordered_map values = { + {"\\{\\{sentence\\}\\}", textStr}, + {"\\{\\{target_lang\\}\\}", target_lang}, + {"\\{\\{source_lang\\}\\}", source_lang}}; + + std::string body = replacePlaceholders(body_template_, values); + std::string response; + + std::unique_ptr curl(curl_easy_init(), + curl_easy_cleanup); + + if (!curl) { + throw std::runtime_error("Failed to initialize CURL session"); + } + + try { + // Set up curl options + curl_easy_setopt(curl.get(), CURLOPT_URL, endpoint_.c_str()); + curl_easy_setopt(curl.get(), CURLOPT_WRITEFUNCTION, CurlHelper::WriteCallback); + curl_easy_setopt(curl.get(), CURLOPT_WRITEDATA, &response); + curl_easy_setopt(curl.get(), CURLOPT_SSL_VERIFYPEER, 1L); + curl_easy_setopt(curl.get(), CURLOPT_SSL_VERIFYHOST, 2L); + curl_easy_setopt(curl.get(), CURLOPT_TIMEOUT, 30L); + + // Set up POST request + curl_easy_setopt(curl.get(), CURLOPT_POST, 1L); + curl_easy_setopt(curl.get(), CURLOPT_POSTFIELDS, body.c_str()); + + // Set up headers + struct curl_slist *headers = nullptr; + headers = curl_slist_append(headers, "Content-Type: application/json"); + curl_easy_setopt(curl.get(), CURLOPT_HTTPHEADER, headers); + + // Perform request + CURLcode res = curl_easy_perform(curl.get()); + + // Clean up headers + curl_slist_free_all(headers); + + if (res != CURLE_OK) { + throw TranslationError(std::string("CURL request failed: ") + + curl_easy_strerror(res)); + } + + return parseResponse(response); + + } catch (const std::exception &e) { + throw TranslationError(std::string("JSON parsing error: ") + e.what()); + } +} + +std::string CustomApiTranslator::replacePlaceholders( + const std::string &template_str, + const std::unordered_map &values) const +{ + std::string result = template_str; + for (const auto &pair : values) { + try { + std::regex placeholder(pair.first); + result = std::regex_replace(result, placeholder, pair.second); + } catch (const std::regex_error &e) { + // Handle regex error + throw TranslationError(std::string("Regex error: ") + e.what()); + } + } + return result; +} + +std::string CustomApiTranslator::parseResponse(const std::string &response_str) +{ + try { + // parse the JSON response + json response = json::parse(response_str); + + // extract the translation from the JSON response + std::string response_out = response[response_json_path_]; + + return response_out; + } catch (const json::exception &e) { + throw TranslationError(std::string("JSON parsing error: ") + e.what()); + } +} diff --git a/src/translation/cloud-translation/custom-api.h b/src/translation/cloud-translation/custom-api.h new file mode 100644 index 0000000..4e19653 --- /dev/null +++ b/src/translation/cloud-translation/custom-api.h @@ -0,0 +1,28 @@ +#pragma once +#include "ITranslator.h" +#include +#include +#include + +class CurlHelper; // Forward declaration + +class CustomApiTranslator : public ITranslator { +public: + explicit CustomApiTranslator(const std::string &endpoint, const std::string &body_template, + const std::string &response_json_path); + ~CustomApiTranslator() override; + + std::string translate(const std::string &text, const std::string &target_lang, + const std::string &source_lang = "auto") override; + +private: + std::string + replacePlaceholders(const std::string &template_str, + const std::unordered_map &values) const; + std::string parseResponse(const std::string &response_str); + + std::string endpoint_; + std::string body_template_; + std::string response_json_path_; + std::unique_ptr curl_helper_; +}; diff --git a/src/translation/cloud-translation/translation-cloud.cpp b/src/translation/cloud-translation/translation-cloud.cpp index 6120698..76aeeb8 100644 --- a/src/translation/cloud-translation/translation-cloud.cpp +++ b/src/translation/cloud-translation/translation-cloud.cpp @@ -10,6 +10,7 @@ #include "papago.h" #include "claude.h" #include "openai.h" +#include "custom-api.h" #include "plugin-support.h" #include @@ -36,6 +37,9 @@ std::unique_ptr createTranslator(const CloudTranslatorConfig &confi return std::make_unique( config.access_key, config.model.empty() ? "gpt-4-turbo-preview" : config.model); + } else if (config.provider == "api") { + return std::make_unique(config.endpoint, config.body, + config.response_json_path); } throw TranslationError("Unknown translation provider: " + config.provider); } diff --git a/src/translation/cloud-translation/translation-cloud.h b/src/translation/cloud-translation/translation-cloud.h index f90de17..7c428bc 100644 --- a/src/translation/cloud-translation/translation-cloud.h +++ b/src/translation/cloud-translation/translation-cloud.h @@ -4,11 +4,14 @@ struct CloudTranslatorConfig { std::string provider; - std::string access_key; // Main API key/Client ID - std::string secret_key; // Secret key/Client secret - std::string region; // For AWS / Azure - std::string model; // For Claude - bool free; // For Deepl + std::string access_key; // Main API key/Client ID + std::string secret_key; // Secret key/Client secret + std::string region; // For AWS / Azure + std::string model; // For Claude + bool free; // For Deepl + std::string endpoint; // For Custom API + std::string body; // For Custom API + std::string response_json_path; // For Custom API }; std::string translate_cloud(const CloudTranslatorConfig &config, const std::string &text,