From e4eb9b675483c26e468c77869b8410e7ec18e8f4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kamil=20Ch=C5=82odek?= Date: Wed, 25 May 2022 14:28:27 +0200 Subject: [PATCH] Translation performance fix - batch translation The plugin does invoke translate endpoint per each segment. This approach is not optimal. DeepL API allow to translate up to 50 segments at once. Translating more segments at once reduces pre-translation time by up to 60% (based on my tests). In addition, to make translation process more reliable I've added translation retrying routine in case of some http status codes received from DeepL Service. --- .../Helpers/ExceptionExtensions.cs | 23 +++ .../Sdl.Community.DeepLMTProvider.csproj | 1 + ...LMtTranslationProviderLanguageDirection.cs | 30 ++-- .../DeepLTranslationProviderConnecter.cs | 153 ++++++++++++++++-- 4 files changed, 178 insertions(+), 29 deletions(-) create mode 100644 DeepLMTProvider/Sdl.Community.DeelLMTProvider/Helpers/ExceptionExtensions.cs diff --git a/DeepLMTProvider/Sdl.Community.DeelLMTProvider/Helpers/ExceptionExtensions.cs b/DeepLMTProvider/Sdl.Community.DeelLMTProvider/Helpers/ExceptionExtensions.cs new file mode 100644 index 0000000000..63e293cf78 --- /dev/null +++ b/DeepLMTProvider/Sdl.Community.DeelLMTProvider/Helpers/ExceptionExtensions.cs @@ -0,0 +1,23 @@ +using System.Net; +using System.Net.Http; + +namespace Sdl.Community.DeepLMTProvider.Helpers +{ + public static class ExceptionExtensions + { + private const string HttpStatusCodeDataKey = "StatusCode"; + + public static void SetHttpStatusCode(this HttpRequestException ex, HttpStatusCode statusCode) + { + ex.Data[HttpStatusCodeDataKey] = statusCode; + } + + public static HttpStatusCode? GetHttpStatusCode(this HttpRequestException ex) + { + if (!ex.Data.Contains(HttpStatusCodeDataKey)) + return null; + + return (HttpStatusCode)ex.Data[HttpStatusCodeDataKey]; + } + } +} diff --git a/DeepLMTProvider/Sdl.Community.DeelLMTProvider/Sdl.Community.DeepLMTProvider.csproj b/DeepLMTProvider/Sdl.Community.DeelLMTProvider/Sdl.Community.DeepLMTProvider.csproj index f1e57ba845..45f863cf49 100644 --- a/DeepLMTProvider/Sdl.Community.DeelLMTProvider/Sdl.Community.DeepLMTProvider.csproj +++ b/DeepLMTProvider/Sdl.Community.DeelLMTProvider/Sdl.Community.DeepLMTProvider.csproj @@ -126,6 +126,7 @@ + diff --git a/DeepLMTProvider/Sdl.Community.DeelLMTProvider/Studio/DeepLMtTranslationProviderLanguageDirection.cs b/DeepLMTProvider/Sdl.Community.DeelLMTProvider/Studio/DeepLMtTranslationProviderLanguageDirection.cs index 5666d260c7..0e1c9b0e66 100644 --- a/DeepLMTProvider/Sdl.Community.DeelLMTProvider/Studio/DeepLMtTranslationProviderLanguageDirection.cs +++ b/DeepLMTProvider/Sdl.Community.DeelLMTProvider/Studio/DeepLMtTranslationProviderLanguageDirection.cs @@ -1,6 +1,7 @@ using System; using System.Collections.Generic; using System.Globalization; +using System.Linq; using System.Threading.Tasks; using NLog; using Sdl.Community.DeepLMTProvider.Helpers; @@ -167,7 +168,7 @@ public SearchResults[] SearchTranslationUnitsMasked(SearchSettings settings, Tra if (preTranslateList.Count > 0) { //Create temp file with translations - var translatedSegments = PrepareTempData(preTranslateList).Result; + var translatedSegments = PrepareTempData(preTranslateList); var preTranslateSearchResults = GetPreTranslationSearchResults(translatedSegments); foreach (var result in preTranslateSearchResults) @@ -282,7 +283,7 @@ private string LookupDeepL(string sourceText) return _connecter.Translate(_languageDirection, sourceText); } - private async Task> PrepareTempData(List preTranslateSegments) + private List PrepareTempData(List preTranslateSegments) { try { @@ -307,16 +308,21 @@ private async Task> PrepareTempData(List Parallel.ForEach(preTranslateSegments, segment => - { - if (segment != null) - { - segment.PlainTranslation = _connecter.Translate(_languageDirection, segment.SourceText); - } - })).ConfigureAwait(true); - - return preTranslateSegments; - } + string[] segmentTranslation = _connecter.Translate( + _languageDirection, + preTranslateSegments.Where(ps => ps != null).Select(ps => ps.SourceText)); + + int translatedSegmentIndex = 0; + for (var i = 0; i < preTranslateSegments.Count; i++) + { + if (preTranslateSegments[i] != null) + { + preTranslateSegments[i].PlainTranslation = segmentTranslation[translatedSegmentIndex++]; + } + } + + return preTranslateSegments; + } catch (Exception e) { _logger.Error($"{e.Message}\n {e.StackTrace}"); diff --git a/DeepLMTProvider/Sdl.Community.DeelLMTProvider/Studio/DeepLTranslationProviderConnecter.cs b/DeepLMTProvider/Sdl.Community.DeelLMTProvider/Studio/DeepLTranslationProviderConnecter.cs index f4434a3dfa..8bdee118eb 100644 --- a/DeepLMTProvider/Sdl.Community.DeelLMTProvider/Studio/DeepLTranslationProviderConnecter.cs +++ b/DeepLMTProvider/Sdl.Community.DeelLMTProvider/Studio/DeepLTranslationProviderConnecter.cs @@ -20,6 +20,23 @@ public class DeepLTranslationProviderConnecter private static string _apiKey; private List _supportedSourceLanguages; + /// + /// Maximum count of translation retries + /// + private const int TranslateMaxRetryCount = 3; + /// + /// Too Many Requests status code as it does not exists in + /// + private const int HttpStatusCodeTooManyRequests = 429; + /// + /// The total count of translation retry attempts in a row. + /// + private ulong _totalTranslationRetryAttemptInARowCount = 0; + /// + /// The maximum count of translation retry attempts in a row after which we give up executing retry routine. + /// + private const ulong MaxTranslationRetryAttemptInARowCount = TranslateMaxRetryCount * 5L; + public DeepLTranslationProviderConnecter(string key, Formality formality = Formality.Default) { ApiKey = key; @@ -91,38 +108,129 @@ public bool IsLanguagePairSupported(CultureInfo sourceCulture, CultureInfo targe } public string Translate(LanguagePair languageDirection, string sourceText) + { + return TranslateWithRetry(languageDirection, AsEnumerable(sourceText)).First(); + } + + private static IEnumerable AsEnumerable(TObject value) + { + yield return value; + } + + /// + /// Translate in batch + /// + /// + /// + /// Translated source texts in the same order + public string[] Translate(LanguagePair languageDirection, IEnumerable sourceTexts) + { + return TranslateWithRetry(languageDirection, sourceTexts); + } + + /// + /// Translate in batch with retrying routine in case of errors + /// that might occur temporary, eg. request timeout, bad gateway + /// + /// + /// + /// + /// + private string[] TranslateWithRetry(LanguagePair languageDirection, IEnumerable sourceTexts, int retryAttemptCount = 0) + { + string[] translatedTexts = null; + try + { + translatedTexts = BatchTranslate(languageDirection, sourceTexts); + } + catch (HttpRequestException ex) + { + if (!ex.GetHttpStatusCode().HasValue || retryAttemptCount < TranslateMaxRetryCount || DoesDeepLServiceLooksLikeIsUnavailable()) + throw; + + switch (ex.GetHttpStatusCode().Value) + { + case System.Net.HttpStatusCode.RequestTimeout: + case System.Net.HttpStatusCode.GatewayTimeout: + case System.Net.HttpStatusCode.BadGateway: + case System.Net.HttpStatusCode.ServiceUnavailable: + case (System.Net.HttpStatusCode)HttpStatusCodeTooManyRequests: + ++retryAttemptCount; + ++_totalTranslationRetryAttemptInARowCount; + TimeSpan retryDelay = CalculateRetryDelayTimeSpan(retryAttemptCount); + _logger.Info($"Retrying translation ({retryAttemptCount}) in {retryDelay} because of: {ex.Message} ({ex.GetHttpStatusCode().Value})."); + translatedTexts = TranslateWithRetry(languageDirection, sourceTexts, retryAttemptCount); + break; + default: + throw; + } + } + + _totalTranslationRetryAttemptInARowCount = 0; + return translatedTexts; + } + + private TimeSpan CalculateRetryDelayTimeSpan(int retryAttemptCount) + { + return TimeSpan.FromSeconds(retryAttemptCount * 5); + } + + /// + /// Does DeepL service looks like is unavailable for a while? We would like to prevent continuous and time consuming + /// retrying routine in case of some serious problem on DeepL service side. If the service is unavailable for lets say few hours, + /// we would end up executing pre-translation task for very long. + /// + /// + private bool DoesDeepLServiceLooksLikeIsUnavailable() + { + return _totalTranslationRetryAttemptInARowCount > MaxTranslationRetryAttemptInARowCount; + } + + private string[] BatchTranslate(LanguagePair languageDirection, IEnumerable sourceTexts) { var formality = GetFormality(languageDirection); var targetLanguage = GetLanguage(languageDirection.TargetCulture, SupportedTargetLanguages); var sourceLanguage = GetLanguage(languageDirection.SourceCulture, SupportedSourceLanguages); - var translatedText = string.Empty; + + var translatedTexts = new string[sourceTexts.Count()]; var normalizeHelper = new NormalizeSourceTextHelper(); + string stringContent = null; + HttpResponseMessage response = null; try { - sourceText = normalizeHelper.NormalizeText(sourceText); - - var content = new StringContent($"text={sourceText}" + - $"&source_lang={sourceLanguage}" + - $"&target_lang={targetLanguage}" + - $"&formality={formality.ToString().ToLower()}" + - "&preserve_formatting=1" + - "&tag_handling=xml" + - $"&auth_key={ApiKey}", - Encoding.UTF8, "application/x-www-form-urlencoded"); - - var response = AppInitializer.Client.PostAsync("https://api.deepl.com/v1/translate", content).Result; + stringContent = $"source_lang={sourceLanguage}" + + $"&target_lang={targetLanguage}" + + $"&formality={formality.ToString().ToLower()}" + + "&preserve_formatting=1" + + "&tag_handling=xml" + + $"&auth_key={ApiKey}"; + + + foreach (string sourceText in sourceTexts) + { + string normalizedSourceText = normalizeHelper.NormalizeText(sourceText); + stringContent += $"&text={normalizedSourceText}"; + } + + var content = new StringContent(stringContent, Encoding.UTF8, "application/x-www-form-urlencoded"); + response = AppInitializer.Client.PostAsync("https://api.deepl.com/v1/translate", content).Result; response.EnsureSuccessStatusCode(); var translationResponse = response.Content?.ReadAsStringAsync().Result; var translatedObject = JsonConvert.DeserializeObject(translationResponse); - if (translatedObject != null && translatedObject.Translations.Any()) + if (translatedObject != null) { - translatedText = translatedObject.Translations[0].Text; - translatedText = DecodeWhenNeeded(translatedText); + for (int i = 0; i < translatedObject.Translations.Count; i++) + { + translatedTexts[i] = translatedObject.Translations[i].Text; + translatedTexts[i] = DecodeWhenNeeded(translatedTexts[i]); + + } } + } catch (AggregateException aEx) { @@ -130,6 +238,17 @@ public string Translate(LanguagePair languageDirection, string sourceText) { _logger.Error(innerEx); } + throw; + } + catch (HttpRequestException ex) + { + if (response != null) + { + ex.SetHttpStatusCode(response.StatusCode); + } + + _logger.Error(ex); + throw; } catch (Exception ex) { @@ -137,7 +256,7 @@ public string Translate(LanguagePair languageDirection, string sourceText) throw; } - return translatedText; + return translatedTexts; } private static string GetSupportedLanguages(string type, string apiKey)