From 3a15e6bead642045ccb561654a172b953871ef27 Mon Sep 17 00:00:00 2001 From: tengo-lebanidze Date: Fri, 16 Jan 2026 20:17:25 +0400 Subject: [PATCH] Refactror translation batching --- .../Content/ContentTranslateCommand.cs | 22 ++++++++----------- .../Translation/AzureOpenAiTranslator.cs | 6 ++--- 2 files changed, 12 insertions(+), 16 deletions(-) diff --git a/source/Cute/Commands/Content/ContentTranslateCommand.cs b/source/Cute/Commands/Content/ContentTranslateCommand.cs index 175c9a8..2392bda 100644 --- a/source/Cute/Commands/Content/ContentTranslateCommand.cs +++ b/source/Cute/Commands/Content/ContentTranslateCommand.cs @@ -22,11 +22,7 @@ namespace Cute.Commands.Content; public class ContentTranslateCommand(IConsoleWriter console, ILogger logger, AppSettings appSettings, TranslateFactory translateFactory, HttpClient httpClient) : BaseLoggedInCommand(console, logger, appSettings) -{ - private const int ENTRY_BATCH_SIZE = 20; - private const int TRANSLATION_BATCH_SIZE = 50; - private const int MAX_ENTRY_UPDATE_CONCURRENCY = 10; - +{ private readonly TranslateFactory _translateFactory = translateFactory; private readonly HttpClient _httpClient = httpClient; private readonly ConcurrentDictionary _translatorCache = new(); @@ -59,6 +55,10 @@ public class Settings : ContentCommandSettings [Description("Indicates how many concurrent calls can be made to a translation service for a single entry. Default is 10")] public int MaxConcurrency { get; set; } = 10; + [CommandOption("--entry-batch-size")] + [Description("Indicates how many concurrent calls can be made to a translation service for a single entry. Default is 10")] + public int EntryBatchSize { get; set; } = 10; + [CommandOption("--fallback-service")] [Description("Fallback translation service (Azure, Google, Deepl, GPT4o), in case configured one doesn't return a value. Will translate without a custom model and glossary")] public TranslationService? FallbackService { get; set; } = null; @@ -162,7 +162,6 @@ public override async Task ExecuteCommandAsync(CommandContext context, Sett { // Create semaphores to limit concurrent operations var throttler = new SemaphoreSlim(settings.MaxConcurrency); - var updateSemaphore = new SemaphoreSlim(MAX_ENTRY_UPDATE_CONCURRENCY); bool needToPublish = false; Dictionary> failedEntryIds = new Dictionary>(); @@ -204,7 +203,7 @@ await ProgressBars.Instance() entryBatch.Add(entry); - if (entryBatch.Count >= ENTRY_BATCH_SIZE) + if (entryBatch.Count >= settings.EntryBatchSize) { var (batchSymbols, batchNeedsPublish, batchFailures) = await ProcessEntryBatch( entryBatch, @@ -217,7 +216,6 @@ await ProgressBars.Instance() translationConfiguration, settings, throttler, - updateSemaphore, taskTranslate, glossary); @@ -250,7 +248,6 @@ await ProgressBars.Instance() translationConfiguration, settings, throttler, - updateSemaphore, taskTranslate, glossary); @@ -266,7 +263,7 @@ await ProgressBars.Instance() } // Flush any remaining pending updates - await FlushPendingUpdates(updateSemaphore); + await FlushPendingUpdates(throttler); taskTranslate.Description = $"{Emoji.Known.Robot} Translation completed ({symbols} symbols translated)"; taskTranslate.StopTask(); @@ -314,7 +311,6 @@ await PerformBulkOperations( Dictionary translationConfiguration, Settings settings, SemaphoreSlim throttler, - SemaphoreSlim updateSemaphore, ProgressTask taskTranslate, Dictionary>? glossary = null) { @@ -449,7 +445,7 @@ flatEntryTargetLocaleValue is null || if (entryChanged) { needsPublish = true; - updateTasks.Add(UpdateEntryAsync(entryId, originalEntry, flatEntry, serializer, fieldsToTranslate, targetLocaleCodes, updateSemaphore)); + updateTasks.Add(UpdateEntryAsync(entryId, originalEntry, flatEntry, serializer, fieldsToTranslate, targetLocaleCodes, throttler)); } } @@ -512,7 +508,7 @@ private async Task UpdateEntryAsync( } else { - translations = await translator.Translate(text, from, targetLanguages); + translations = await translator.Translate(text, from, targetLanguages, glossaries); } } catch (Exception ex) diff --git a/source/Cute/Services/Translation/AzureOpenAiTranslator.cs b/source/Cute/Services/Translation/AzureOpenAiTranslator.cs index 76b3b49..15c493c 100644 --- a/source/Cute/Services/Translation/AzureOpenAiTranslator.cs +++ b/source/Cute/Services/Translation/AzureOpenAiTranslator.cs @@ -102,11 +102,11 @@ public AzureOpenAiTranslator(IAzureOpenAiOptionsProvider azureOpenAiOptionsProvi var symbolCount = textToTranslate.Length; var toLanguageCodesArray = toLanguageCodes.ToArray(); var targetLanguagesStr = string.Join(", ", toLanguageCodesArray); - + // Check if we should translate one-by-one: using threshold model (GPT-4o) with multiple languages // When text >= symbolCountThreshold, we use GPT-4o which has limited output tokens, so translate one by one - var isUsingThresholdModel = symbolCountThreshold.HasValue && !string.IsNullOrEmpty(thresholdSetting) && textToTranslate.Length >= symbolCountThreshold; - var shouldTranslateOneByOne = isUsingThresholdModel && toLanguageCodesArray.Length > 1; + var isSubThresholdModel = symbolCountThreshold.HasValue && !string.IsNullOrEmpty(thresholdSetting) && textToTranslate.Length <= symbolCountThreshold; + var shouldTranslateOneByOne = !isSubThresholdModel && toLanguageCodesArray.Length > 1; if (shouldTranslateOneByOne) {