Skip to content

Commit

Permalink
Add support for translating only full sentences in cloud translation
Browse files Browse the repository at this point in the history
  • Loading branch information
royshil committed Nov 21, 2024
1 parent 271affc commit ec353e1
Show file tree
Hide file tree
Showing 7 changed files with 101 additions and 57 deletions.
9 changes: 5 additions & 4 deletions data/locale/en-US.ini
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,7 @@ file_output_group="File Output Configuration"
translate_explaination="Enabling translation will increase the processing load on your machine, This feature uses additional resources to translate content in real-time, which may impact performance. <a href='#'>Learn More</a>"
translate_cloud_explaination="Cloud translation requires an active internet connection and API keys to the translation provider."
translate_cloud_provider="Translation Provider"
translate_cloud_only_full_sentences="Translate only full sentences"
translate_cloud_api_key="Access Key"
translate_cloud_secret_key="Secret Key"
log_group="Logging"
Expand Down Expand Up @@ -106,8 +107,8 @@ Tencent-Translate="Tencent Translate"
Alibaba-Translate="Alibaba Translate"
Naver-Translate="Naver Translate"
Kakao-Translate="Kakao Translate"
Papago-Translate="Papago Translate"
Deepl-Translate="Deepl Translate"
Papago-Translate="Papago"
Deepl-Translate="Deepl"
Bing-Translate="Bing Translate"
OpenAI-Translate="OpenAI Translate"
Claude-Translate="Claude Translate"
OpenAI-Translate="OpenAI"
Claude-Translate="Claude"
100 changes: 62 additions & 38 deletions src/transcription-filter-callbacks.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -81,39 +81,46 @@ std::string send_sentence_to_translation(const std::string &sentence,
return "";
}

std::string send_text_to_cloud_translation(const std::string &sentence,
struct transcription_filter_data *gf,
const std::string &source_language)
void send_sentence_to_cloud_translation_async(const std::string &sentence,
struct transcription_filter_data *gf,
const std::string &source_language,
std::function<void(const std::string &)> callback)
{
const std::string last_text = gf->last_text_for_translation;
gf->last_text_for_translation = sentence;
if (gf->translate_cloud && !sentence.empty()) {
obs_log(gf->log_level, "Translating text with cloud provider. %s -> %s",
source_language.c_str(), gf->target_lang.c_str());
std::string translated_text;
if (sentence == last_text) {
// do not translate the same sentence twice
return gf->last_text_translation;
}
CloudTranslatorConfig config;
config.provider = gf->translate_cloud_provider;
config.access_key = gf->translate_cloud_api_key;
config.secret_key = gf->translate_cloud_secret_key;

translated_text = translate_cloud(
config, sentence, gf->translate_cloud_target_language, source_language);
if (!translated_text.empty()) {
if (gf->log_words) {
obs_log(LOG_INFO, "Translation: '%s' -> '%s'", sentence.c_str(),
translated_text.c_str());
std::thread([sentence, gf, source_language, callback]() {
const std::string last_text = gf->last_text_for_cloud_translation;
gf->last_text_for_cloud_translation = sentence;
if (gf->translate_cloud && !sentence.empty()) {
obs_log(gf->log_level, "Translating text with cloud provider %s. %s -> %s",
gf->translate_cloud_provider.c_str(), source_language.c_str(),
gf->translate_cloud_target_language.c_str());
std::string translated_text;
if (sentence == last_text) {
// do not translate the same sentence twice
callback(gf->last_text_cloud_translation);
return;
}
CloudTranslatorConfig config;
config.provider = gf->translate_cloud_provider;
config.access_key = gf->translate_cloud_api_key;
config.secret_key = gf->translate_cloud_secret_key;

translated_text = translate_cloud(config, sentence,
gf->translate_cloud_target_language,
source_language);
if (!translated_text.empty()) {
if (gf->log_words) {
obs_log(LOG_INFO, "Cloud Translation: '%s' -> '%s'",
sentence.c_str(), translated_text.c_str());
}
gf->last_text_translation = translated_text;
callback(translated_text);
return;
} else {
obs_log(gf->log_level, "Failed to translate text");
}
gf->last_text_translation = translated_text;
return translated_text;
} else {
obs_log(gf->log_level, "Failed to translate text");
}
}
return "";
callback("");
}).detach();
}

void send_sentence_to_file(struct transcription_filter_data *gf,
Expand Down Expand Up @@ -271,33 +278,50 @@ void set_text_callback(struct transcription_filter_data *gf,
}
}

bool should_translate =
bool should_translate_local =
gf->translate_only_full_sentences ? result.result == DETECTION_RESULT_SPEECH : true;

// send the sentence to translation (if enabled)
std::string translated_sentence =
should_translate ? send_sentence_to_translation(str_copy, gf, result.language) : "";
std::string translated_sentence_local =
should_translate_local ? send_sentence_to_translation(str_copy, gf, result.language)
: "";

if (gf->translate) {
if (gf->translation_output == "none") {
// overwrite the original text with the translated text
str_copy = translated_sentence;
str_copy = translated_sentence_local;
} else {
if (gf->buffered_output) {
// buffered output - add the sentence to the monitor
gf->translation_monitor.addSentenceFromStdString(
translated_sentence,
translated_sentence_local,
get_time_point_from_ms(result.start_timestamp_ms),
get_time_point_from_ms(result.end_timestamp_ms),
result.result == DETECTION_RESULT_PARTIAL);
} else {
// non-buffered output - send the sentence to the selected source
send_caption_to_source(gf->translation_output, translated_sentence,
gf);
send_caption_to_source(gf->translation_output,
translated_sentence_local, gf);
}
}
}

bool should_translate_cloud = (gf->translate_cloud_only_full_sentences
? result.result == DETECTION_RESULT_SPEECH
: true) &&
gf->translate_cloud;

if (should_translate_cloud) {
send_sentence_to_cloud_translation_async(
str_copy, gf, result.language,
[gf](const std::string &translated_sentence_cloud) {
if (gf->translate_cloud_output != "none") {
send_caption_to_source(gf->translate_cloud_output,
translated_sentence_cloud, gf);
}
});
}

if (gf->buffered_output) {
gf->captions_monitor.addSentenceFromStdString(
str_copy, get_time_point_from_ms(result.start_timestamp_ms),
Expand All @@ -315,7 +339,7 @@ void set_text_callback(struct transcription_filter_data *gf,

if (gf->save_to_file && gf->output_file_path != "" &&
result.result == DETECTION_RESULT_SPEECH) {
send_sentence_to_file(gf, result, str_copy, translated_sentence);
send_sentence_to_file(gf, result, str_copy, translated_sentence_local);
}

if (!result.text.empty() && (result.result == DETECTION_RESULT_SPEECH ||
Expand Down
10 changes: 6 additions & 4 deletions src/transcription-filter-data.h
Original file line number Diff line number Diff line change
Expand Up @@ -96,10 +96,9 @@ struct transcription_filter_data {
std::string translate_cloud_output;
std::string translate_cloud_api_key;
std::string translate_cloud_secret_key;

// Last transcription result
std::string last_text_for_translation;
std::string last_text_translation;
bool translate_cloud_only_full_sentences = true;
std::string last_text_for_cloud_translation;
std::string last_text_cloud_translation;

// Transcription context sentences
int n_context_sentences;
Expand Down Expand Up @@ -127,6 +126,9 @@ struct transcription_filter_data {
std::string translation_model_index;
std::string translation_model_path_external;
bool translate_only_full_sentences;
// Last transcription result
std::string last_text_for_translation;
std::string last_text_translation;

bool buffered_output = false;
TokenBufferThread captions_monitor;
Expand Down
26 changes: 17 additions & 9 deletions src/transcription-filter-properties.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -49,8 +49,10 @@ bool translation_cloud_options_callback(obs_properties_t *props, obs_property_t
UNUSED_PARAMETER(property);
// Show/Hide the cloud translation group options
const bool translate_enabled = obs_data_get_bool(settings, "translate_cloud");
for (const auto &prop : {"translate_cloud_provider", "translate_cloud_target_language",
"translate_cloud_output", "translate_cloud_api_key"}) {
for (const auto &prop :
{"translate_cloud_provider", "translate_cloud_target_language",
"translate_cloud_output", "translate_cloud_api_key",
"translate_cloud_only_full_sentences", "translate_cloud_secret_key"}) {
obs_property_set_visible(obs_properties_get(props, prop), translate_enabled);
}
return true;
Expand Down Expand Up @@ -209,9 +211,9 @@ void add_translation_cloud_group_properties(obs_properties_t *ppts)
MT_("translate_cloud_provider"), OBS_COMBO_TYPE_LIST, OBS_COMBO_FORMAT_STRING);
// Populate the dropdown with the cloud translation service providers
obs_property_list_add_string(prop_translate_cloud_provider, MT_("Google-Cloud-Translation"),
"google-cloud-translation");
"google");
obs_property_list_add_string(prop_translate_cloud_provider, MT_("Microsoft-Translator"),
"microsoft-translator");
"azure");
// obs_property_list_add_string(prop_translate_cloud_provider, MT_("Amazon-Translate"),
// "amazon-translate");
// obs_property_list_add_string(prop_translate_cloud_provider, MT_("IBM-Watson-Translate"),
Expand All @@ -229,13 +231,13 @@ void add_translation_cloud_group_properties(obs_properties_t *ppts)
// obs_property_list_add_string(prop_translate_cloud_provider, MT_("Kakao-Translate"),
// "kakao-translate");
obs_property_list_add_string(prop_translate_cloud_provider, MT_("Papago-Translate"),
"papago-translate");
"papago");
obs_property_list_add_string(prop_translate_cloud_provider, MT_("Deepl-Translate"),
"deepl-translate");
"deepl");
obs_property_list_add_string(prop_translate_cloud_provider, MT_("OpenAI-Translate"),
"openai-translate");
"openai");
obs_property_list_add_string(prop_translate_cloud_provider, MT_("Claude-Translate"),
"claude-translate");
"claude");

// add target language selection
obs_property_t *prop_tgt = obs_properties_add_list(
Expand All @@ -253,6 +255,10 @@ void add_translation_cloud_group_properties(obs_properties_t *ppts)
obs_property_list_add_string(prop_output, "Write to captions output", "none");
obs_enum_sources(add_sources_to_list, prop_output);

// add boolean option for only full sentences
obs_properties_add_bool(translation_cloud_group, "translate_cloud_only_full_sentences",
MT_("translate_cloud_only_full_sentences"));

// add input for API Key
obs_properties_add_text(translation_cloud_group, "translate_cloud_api_key",
MT_("translate_cloud_api_key"), OBS_TEXT_DEFAULT);
Expand Down Expand Up @@ -695,10 +701,12 @@ void transcription_filter_defaults(obs_data_t *s)

// cloud translation options
obs_data_set_default_bool(s, "translate_cloud", false);
obs_data_set_default_string(s, "translate_cloud_provider", "google-cloud-translation");
obs_data_set_default_string(s, "translate_cloud_provider", "google");
obs_data_set_default_string(s, "translate_cloud_target_language", "en");
obs_data_set_default_string(s, "translate_cloud_output", "none");
obs_data_set_default_bool(s, "translate_cloud_only_full_sentences", true);
obs_data_set_default_string(s, "translate_cloud_api_key", "");
obs_data_set_default_string(s, "translate_cloud_secret_key", "");

// Whisper parameters
obs_data_set_default_int(s, "whisper_sampling_method", WHISPER_SAMPLING_BEAM_SEARCH);
Expand Down
2 changes: 2 additions & 0 deletions src/transcription-filter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -336,6 +336,8 @@ void transcription_filter_update(void *data, obs_data_t *s)
gf->translate_cloud_target_language =
obs_data_get_string(s, "translate_cloud_target_language");
gf->translate_cloud_output = obs_data_get_string(s, "translate_cloud_output");
gf->translate_cloud_only_full_sentences =
obs_data_get_bool(s, "translate_cloud_only_full_sentences");
gf->translate_cloud_api_key = obs_data_get_string(s, "translate_cloud_api_key");
gf->translate_cloud_secret_key = obs_data_get_string(s, "translate_cloud_secret_key");

Expand Down
7 changes: 6 additions & 1 deletion src/translation/cloud-translation/papago.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -136,8 +136,13 @@ std::string PapagoTranslator::translate(const std::string &text, const std::stri
throw TranslationError("Text exceeds maximum length of 5000 characters");
}

std::string target_lang_valid = target_lang;
target_lang_valid.erase(std::remove(target_lang_valid.begin(), target_lang_valid.end(),
'_'),
target_lang_valid.end());

std::string papago_source = mapLanguageCode(source_lang);
std::string papago_target = mapLanguageCode(target_lang);
std::string papago_target = mapLanguageCode(target_lang_valid);

if (!isLanguagePairSupported(papago_source, papago_target)) {
throw TranslationError("Unsupported language pair: " + source_lang + " to " +
Expand Down
4 changes: 3 additions & 1 deletion src/translation/cloud-translation/translation-cloud.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -37,14 +37,16 @@ std::unique_ptr<ITranslator> createTranslator(const CloudTranslatorConfig &confi
config.access_key,
config.model.empty() ? "gpt-4-turbo-preview" : config.model);
}
throw std::invalid_argument("Unknown translation provider: " + config.provider);
throw TranslationError("Unknown translation provider: " + config.provider);
}

std::string translate_cloud(const CloudTranslatorConfig &config, const std::string &text,
const std::string &target_lang, const std::string &source_lang)
{
try {
auto translator = createTranslator(config);
obs_log(LOG_INFO, "translate with cloud provider %s. %s -> %s",
config.provider.c_str(), source_lang.c_str(), target_lang.c_str());
std::string result = translator->translate(text, target_lang, source_lang);
return result;
} catch (const TranslationError &e) {
Expand Down

0 comments on commit ec353e1

Please sign in to comment.