diff --git a/data/locale/en-US.ini b/data/locale/en-US.ini index 215f2df..6afc2de 100644 --- a/data/locale/en-US.ini +++ b/data/locale/en-US.ini @@ -79,6 +79,7 @@ file_output_group="File Output Configuration" translate_explaination="Enabling translation will increase the processing load on your machine, This feature uses additional resources to translate content in real-time, which may impact performance. Learn More" translate_cloud_explaination="Cloud translation requires an active internet connection and API keys to the translation provider." translate_cloud_provider="Translation Provider" +translate_cloud_only_full_sentences="Translate only full sentences" translate_cloud_api_key="Access Key" translate_cloud_secret_key="Secret Key" log_group="Logging" @@ -106,8 +107,8 @@ Tencent-Translate="Tencent Translate" Alibaba-Translate="Alibaba Translate" Naver-Translate="Naver Translate" Kakao-Translate="Kakao Translate" -Papago-Translate="Papago Translate" -Deepl-Translate="Deepl Translate" +Papago-Translate="Papago" +Deepl-Translate="Deepl" Bing-Translate="Bing Translate" -OpenAI-Translate="OpenAI Translate" -Claude-Translate="Claude Translate" +OpenAI-Translate="OpenAI" +Claude-Translate="Claude" diff --git a/src/transcription-filter-callbacks.cpp b/src/transcription-filter-callbacks.cpp index 2e7ec81..30b2267 100644 --- a/src/transcription-filter-callbacks.cpp +++ b/src/transcription-filter-callbacks.cpp @@ -81,39 +81,46 @@ std::string send_sentence_to_translation(const std::string &sentence, return ""; } -std::string send_text_to_cloud_translation(const std::string &sentence, - struct transcription_filter_data *gf, - const std::string &source_language) +void send_sentence_to_cloud_translation_async(const std::string &sentence, + struct transcription_filter_data *gf, + const std::string &source_language, + std::function callback) { - const std::string last_text = gf->last_text_for_translation; - gf->last_text_for_translation = sentence; - if (gf->translate_cloud && !sentence.empty()) { - obs_log(gf->log_level, "Translating text with cloud provider. %s -> %s", - source_language.c_str(), gf->target_lang.c_str()); - std::string translated_text; - if (sentence == last_text) { - // do not translate the same sentence twice - return gf->last_text_translation; - } - CloudTranslatorConfig config; - config.provider = gf->translate_cloud_provider; - config.access_key = gf->translate_cloud_api_key; - config.secret_key = gf->translate_cloud_secret_key; - - translated_text = translate_cloud( - config, sentence, gf->translate_cloud_target_language, source_language); - if (!translated_text.empty()) { - if (gf->log_words) { - obs_log(LOG_INFO, "Translation: '%s' -> '%s'", sentence.c_str(), - translated_text.c_str()); + std::thread([sentence, gf, source_language, callback]() { + const std::string last_text = gf->last_text_for_cloud_translation; + gf->last_text_for_cloud_translation = sentence; + if (gf->translate_cloud && !sentence.empty()) { + obs_log(gf->log_level, "Translating text with cloud provider %s. %s -> %s", + gf->translate_cloud_provider.c_str(), source_language.c_str(), + gf->translate_cloud_target_language.c_str()); + std::string translated_text; + if (sentence == last_text) { + // do not translate the same sentence twice + callback(gf->last_text_cloud_translation); + return; + } + CloudTranslatorConfig config; + config.provider = gf->translate_cloud_provider; + config.access_key = gf->translate_cloud_api_key; + config.secret_key = gf->translate_cloud_secret_key; + + translated_text = translate_cloud(config, sentence, + gf->translate_cloud_target_language, + source_language); + if (!translated_text.empty()) { + if (gf->log_words) { + obs_log(LOG_INFO, "Cloud Translation: '%s' -> '%s'", + sentence.c_str(), translated_text.c_str()); + } + gf->last_text_translation = translated_text; + callback(translated_text); + return; + } else { + obs_log(gf->log_level, "Failed to translate text"); } - gf->last_text_translation = translated_text; - return translated_text; - } else { - obs_log(gf->log_level, "Failed to translate text"); } - } - return ""; + callback(""); + }).detach(); } void send_sentence_to_file(struct transcription_filter_data *gf, @@ -271,33 +278,50 @@ void set_text_callback(struct transcription_filter_data *gf, } } - bool should_translate = + bool should_translate_local = gf->translate_only_full_sentences ? result.result == DETECTION_RESULT_SPEECH : true; // send the sentence to translation (if enabled) - std::string translated_sentence = - should_translate ? send_sentence_to_translation(str_copy, gf, result.language) : ""; + std::string translated_sentence_local = + should_translate_local ? send_sentence_to_translation(str_copy, gf, result.language) + : ""; if (gf->translate) { if (gf->translation_output == "none") { // overwrite the original text with the translated text - str_copy = translated_sentence; + str_copy = translated_sentence_local; } else { if (gf->buffered_output) { // buffered output - add the sentence to the monitor gf->translation_monitor.addSentenceFromStdString( - translated_sentence, + translated_sentence_local, get_time_point_from_ms(result.start_timestamp_ms), get_time_point_from_ms(result.end_timestamp_ms), result.result == DETECTION_RESULT_PARTIAL); } else { // non-buffered output - send the sentence to the selected source - send_caption_to_source(gf->translation_output, translated_sentence, - gf); + send_caption_to_source(gf->translation_output, + translated_sentence_local, gf); } } } + bool should_translate_cloud = (gf->translate_cloud_only_full_sentences + ? result.result == DETECTION_RESULT_SPEECH + : true) && + gf->translate_cloud; + + if (should_translate_cloud) { + send_sentence_to_cloud_translation_async( + str_copy, gf, result.language, + [gf](const std::string &translated_sentence_cloud) { + if (gf->translate_cloud_output != "none") { + send_caption_to_source(gf->translate_cloud_output, + translated_sentence_cloud, gf); + } + }); + } + if (gf->buffered_output) { gf->captions_monitor.addSentenceFromStdString( str_copy, get_time_point_from_ms(result.start_timestamp_ms), @@ -315,7 +339,7 @@ void set_text_callback(struct transcription_filter_data *gf, if (gf->save_to_file && gf->output_file_path != "" && result.result == DETECTION_RESULT_SPEECH) { - send_sentence_to_file(gf, result, str_copy, translated_sentence); + send_sentence_to_file(gf, result, str_copy, translated_sentence_local); } if (!result.text.empty() && (result.result == DETECTION_RESULT_SPEECH || diff --git a/src/transcription-filter-data.h b/src/transcription-filter-data.h index 1c71aaa..1d248b9 100644 --- a/src/transcription-filter-data.h +++ b/src/transcription-filter-data.h @@ -96,10 +96,9 @@ struct transcription_filter_data { std::string translate_cloud_output; std::string translate_cloud_api_key; std::string translate_cloud_secret_key; - - // Last transcription result - std::string last_text_for_translation; - std::string last_text_translation; + bool translate_cloud_only_full_sentences = true; + std::string last_text_for_cloud_translation; + std::string last_text_cloud_translation; // Transcription context sentences int n_context_sentences; @@ -127,6 +126,9 @@ struct transcription_filter_data { std::string translation_model_index; std::string translation_model_path_external; bool translate_only_full_sentences; + // Last transcription result + std::string last_text_for_translation; + std::string last_text_translation; bool buffered_output = false; TokenBufferThread captions_monitor; diff --git a/src/transcription-filter-properties.cpp b/src/transcription-filter-properties.cpp index f2b325f..77ad89e 100644 --- a/src/transcription-filter-properties.cpp +++ b/src/transcription-filter-properties.cpp @@ -49,8 +49,10 @@ bool translation_cloud_options_callback(obs_properties_t *props, obs_property_t UNUSED_PARAMETER(property); // Show/Hide the cloud translation group options const bool translate_enabled = obs_data_get_bool(settings, "translate_cloud"); - for (const auto &prop : {"translate_cloud_provider", "translate_cloud_target_language", - "translate_cloud_output", "translate_cloud_api_key"}) { + for (const auto &prop : + {"translate_cloud_provider", "translate_cloud_target_language", + "translate_cloud_output", "translate_cloud_api_key", + "translate_cloud_only_full_sentences", "translate_cloud_secret_key"}) { obs_property_set_visible(obs_properties_get(props, prop), translate_enabled); } return true; @@ -209,9 +211,9 @@ void add_translation_cloud_group_properties(obs_properties_t *ppts) MT_("translate_cloud_provider"), OBS_COMBO_TYPE_LIST, OBS_COMBO_FORMAT_STRING); // Populate the dropdown with the cloud translation service providers obs_property_list_add_string(prop_translate_cloud_provider, MT_("Google-Cloud-Translation"), - "google-cloud-translation"); + "google"); obs_property_list_add_string(prop_translate_cloud_provider, MT_("Microsoft-Translator"), - "microsoft-translator"); + "azure"); // obs_property_list_add_string(prop_translate_cloud_provider, MT_("Amazon-Translate"), // "amazon-translate"); // obs_property_list_add_string(prop_translate_cloud_provider, MT_("IBM-Watson-Translate"), @@ -229,13 +231,13 @@ void add_translation_cloud_group_properties(obs_properties_t *ppts) // obs_property_list_add_string(prop_translate_cloud_provider, MT_("Kakao-Translate"), // "kakao-translate"); obs_property_list_add_string(prop_translate_cloud_provider, MT_("Papago-Translate"), - "papago-translate"); + "papago"); obs_property_list_add_string(prop_translate_cloud_provider, MT_("Deepl-Translate"), - "deepl-translate"); + "deepl"); obs_property_list_add_string(prop_translate_cloud_provider, MT_("OpenAI-Translate"), - "openai-translate"); + "openai"); obs_property_list_add_string(prop_translate_cloud_provider, MT_("Claude-Translate"), - "claude-translate"); + "claude"); // add target language selection obs_property_t *prop_tgt = obs_properties_add_list( @@ -253,6 +255,10 @@ void add_translation_cloud_group_properties(obs_properties_t *ppts) obs_property_list_add_string(prop_output, "Write to captions output", "none"); obs_enum_sources(add_sources_to_list, prop_output); + // add boolean option for only full sentences + obs_properties_add_bool(translation_cloud_group, "translate_cloud_only_full_sentences", + MT_("translate_cloud_only_full_sentences")); + // add input for API Key obs_properties_add_text(translation_cloud_group, "translate_cloud_api_key", MT_("translate_cloud_api_key"), OBS_TEXT_DEFAULT); @@ -695,10 +701,12 @@ void transcription_filter_defaults(obs_data_t *s) // cloud translation options obs_data_set_default_bool(s, "translate_cloud", false); - obs_data_set_default_string(s, "translate_cloud_provider", "google-cloud-translation"); + obs_data_set_default_string(s, "translate_cloud_provider", "google"); obs_data_set_default_string(s, "translate_cloud_target_language", "en"); obs_data_set_default_string(s, "translate_cloud_output", "none"); + obs_data_set_default_bool(s, "translate_cloud_only_full_sentences", true); obs_data_set_default_string(s, "translate_cloud_api_key", ""); + obs_data_set_default_string(s, "translate_cloud_secret_key", ""); // Whisper parameters obs_data_set_default_int(s, "whisper_sampling_method", WHISPER_SAMPLING_BEAM_SEARCH); diff --git a/src/transcription-filter.cpp b/src/transcription-filter.cpp index 8c46637..bb36635 100644 --- a/src/transcription-filter.cpp +++ b/src/transcription-filter.cpp @@ -336,6 +336,8 @@ void transcription_filter_update(void *data, obs_data_t *s) gf->translate_cloud_target_language = obs_data_get_string(s, "translate_cloud_target_language"); gf->translate_cloud_output = obs_data_get_string(s, "translate_cloud_output"); + gf->translate_cloud_only_full_sentences = + obs_data_get_bool(s, "translate_cloud_only_full_sentences"); gf->translate_cloud_api_key = obs_data_get_string(s, "translate_cloud_api_key"); gf->translate_cloud_secret_key = obs_data_get_string(s, "translate_cloud_secret_key"); diff --git a/src/translation/cloud-translation/papago.cpp b/src/translation/cloud-translation/papago.cpp index 041fded..3a7ef14 100644 --- a/src/translation/cloud-translation/papago.cpp +++ b/src/translation/cloud-translation/papago.cpp @@ -136,8 +136,13 @@ std::string PapagoTranslator::translate(const std::string &text, const std::stri throw TranslationError("Text exceeds maximum length of 5000 characters"); } + std::string target_lang_valid = target_lang; + target_lang_valid.erase(std::remove(target_lang_valid.begin(), target_lang_valid.end(), + '_'), + target_lang_valid.end()); + std::string papago_source = mapLanguageCode(source_lang); - std::string papago_target = mapLanguageCode(target_lang); + std::string papago_target = mapLanguageCode(target_lang_valid); if (!isLanguagePairSupported(papago_source, papago_target)) { throw TranslationError("Unsupported language pair: " + source_lang + " to " + diff --git a/src/translation/cloud-translation/translation-cloud.cpp b/src/translation/cloud-translation/translation-cloud.cpp index d55b3b9..a1f4537 100644 --- a/src/translation/cloud-translation/translation-cloud.cpp +++ b/src/translation/cloud-translation/translation-cloud.cpp @@ -37,7 +37,7 @@ std::unique_ptr createTranslator(const CloudTranslatorConfig &confi config.access_key, config.model.empty() ? "gpt-4-turbo-preview" : config.model); } - throw std::invalid_argument("Unknown translation provider: " + config.provider); + throw TranslationError("Unknown translation provider: " + config.provider); } std::string translate_cloud(const CloudTranslatorConfig &config, const std::string &text, @@ -45,6 +45,8 @@ std::string translate_cloud(const CloudTranslatorConfig &config, const std::stri { try { auto translator = createTranslator(config); + obs_log(LOG_INFO, "translate with cloud provider %s. %s -> %s", + config.provider.c_str(), source_lang.c_str(), target_lang.c_str()); std::string result = translator->translate(text, target_lang, source_lang); return result; } catch (const TranslationError &e) {