Skip to content

Commit

Permalink
Merge pull request #664 from joakimriedel/joakim/support-gpt4o
Browse files Browse the repository at this point in the history
Use new tokenizer from Microsoft to support newer gpt4o model.
  • Loading branch information
tom-englert authored Oct 6, 2024
2 parents 4a1251c + c631ef0 commit a890246
Show file tree
Hide file tree
Showing 5 changed files with 28 additions and 28 deletions.
2 changes: 1 addition & 1 deletion src/Directory.Packages.props
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
<PackageVersion Include="ILMerge.Fody" Version="1.24.0" />
<PackageVersion Include="JetBrains.Annotations" Version="2024.2.0" />
<PackageVersion Include="Microsoft.CSharp" Version="4.7.0" />
<PackageVersion Include="Microsoft.DeepDev.TokenizerLib" Version="[1.3.3]" />
<PackageVersion Include="Microsoft.ML.Tokenizers" Version="0.22.0-preview.24378.1" />
<PackageVersion Include="Microsoft.NET.Test.Sdk" Version="17.11.1" />
<PackageVersion Include="Microsoft.SourceLink.GitHub" Version="8.0.0" />
<PackageVersion Include="Microsoft.VisualStudio.SDK" Version="[15.0.1]" />
Expand Down
34 changes: 17 additions & 17 deletions src/ResXManager.Translators/AzureOpenAITranslator.cs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
namespace ResXManager.Translators;

using global::Microsoft.DeepDev;
using global::Microsoft.ML.Tokenizers;
using Newtonsoft.Json;
using ResXManager.Infrastructure;
using System;
Expand Down Expand Up @@ -138,11 +138,11 @@ private sealed class ChatCompletionsResponse

private async Task TranslateUsingChatModel(ITranslationSession translationSession, HttpClient client)
{
const string ApiVersion = "2023-05-15";
const string ApiVersion = "2024-06-01";
var endpointUri = new Uri($"/openai/deployments/{ModelDeploymentName}/chat/completions?api-version={ApiVersion}", UriKind.Relative);
var tokenizer = await TokenizerBuilder.CreateByModelNameAsync(
var tokenizer = TiktokenTokenizer.CreateForModel(
ModelName ?? throw new InvalidOperationException("No model name provided in configuration!")
).ConfigureAwait(false);
);

var retries = 0;

Expand Down Expand Up @@ -200,7 +200,7 @@ private async Task TranslateUsingChatModel(ITranslationSession translationSessio
}

private IEnumerable<(ChatMessage message, ICollection<ITranslationItem> items)> PackChatModelMessagesIntoBatches(
ITranslationSession translationSession, IEnumerable<ITranslationItem> items, CultureInfo targetCulture, ITokenizer tokenizer
ITranslationSession translationSession, IEnumerable<ITranslationItem> items, CultureInfo targetCulture, TiktokenTokenizer tokenizer
)
{
var batchItems = new List<ITranslationItem>();
Expand All @@ -209,7 +209,7 @@ private async Task TranslateUsingChatModel(ITranslationSession translationSessio

foreach (var item in items)
{
var currentBatch = batchItems.Concat(new[] { item }).ToList();
var currentBatch = batchItems.Concat([item]).ToList();

var currentMessage = GenerateChatModelMessageForTranslations(translationSession, currentBatch, targetCulture);
if (currentMessage?.Content is null)
Expand All @@ -218,7 +218,7 @@ private async Task TranslateUsingChatModel(ITranslationSession translationSessio
continue;
}

var tokens = tokenizer.Encode(currentMessage.Content, new List<string>()).Count;
var tokens = tokenizer.CountTokens(currentMessage.Content);
if (tokens > PromptTokens)
{
translationSession.AddMessage($"Prompt for resource would exceed {PromptTokens} tokens: {item.Source.Substring(0, 20)}...");
Expand All @@ -235,7 +235,7 @@ private async Task TranslateUsingChatModel(ITranslationSession translationSessio
{
yield return (batchMessage, batchItems);

batchItems = new List<ITranslationItem>();
batchItems = [];
batchTokens = 0;
}

Expand Down Expand Up @@ -414,11 +414,11 @@ private sealed class CompletionsResponse

private async Task TranslateUsingCompletionsModel(ITranslationSession translationSession, HttpClient client)
{
const string ApiVersion = "2023-05-15";
const string ApiVersion = "2024-06-01";
var endpointUri = new Uri($"/openai/deployments/{ModelDeploymentName}/completions?api-version={ApiVersion}", UriKind.Relative);
var tokenizer = await TokenizerBuilder.CreateByModelNameAsync(
var tokenizer = TiktokenTokenizer.CreateForModel(
ModelName ?? throw new InvalidOperationException("No model name provided in configuration!")
).ConfigureAwait(false);
);

var retries = 0;

Expand Down Expand Up @@ -467,7 +467,7 @@ private async Task TranslateUsingCompletionsModel(ITranslationSession translatio
}
}

private IEnumerable<PromptList> PackCompletionModelPromptsIntoBatches(ITranslationSession translationSession, ITokenizer tokenizer)
private IEnumerable<PromptList> PackCompletionModelPromptsIntoBatches(ITranslationSession translationSession, TiktokenTokenizer tokenizer)
{
var batchItems = new PromptList();
var batchTokens = 0;
Expand All @@ -481,7 +481,7 @@ private IEnumerable<PromptList> PackCompletionModelPromptsIntoBatches(ITranslati
continue;
}

var tokens = tokenizer.Encode(prompt, new List<string>()).Count;
var tokens = tokenizer.CountTokens(prompt);

if (tokens > PromptTokens)
{
Expand All @@ -499,7 +499,7 @@ private IEnumerable<PromptList> PackCompletionModelPromptsIntoBatches(ITranslati
{
yield return batchItems;

batchItems = new PromptList();
batchItems = [];
batchTokens = 0;
}

Expand Down Expand Up @@ -634,12 +634,12 @@ public string? ModelName

private static IList<ICredentialItem> GetCredentials()
{
return new ICredentialItem[]
{
return
[
new CredentialItem("AuthenticationKey", "Key"),
new CredentialItem("Url", "Endpoint Url", false),
new CredentialItem("ModelDeploymentName", "Model Deployment Name", false),
new CredentialItem("ModelName", "Model Name", false),
};
];
}
}
2 changes: 1 addition & 1 deletion src/ResXManager.Translators/FodyWeavers.xml
Original file line number Diff line number Diff line change
Expand Up @@ -2,5 +2,5 @@
<Weavers xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:noNamespaceSchemaLocation="FodyWeavers.xsd">
<Throttle />
<PropertyChanged />
<ILMerge IncludeAssemblies="Microsoft.DeepDev.TokenizerLib" />
<ILMerge IncludeAssemblies="Microsoft.ML.Tokenizers" />
</Weavers>
16 changes: 8 additions & 8 deletions src/ResXManager.Translators/OpenAITranslator.cs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
namespace ResXManager.Translators;

using global::Microsoft.DeepDev;
using global::Microsoft.ML.Tokenizers;
using Newtonsoft.Json;
using ResXManager.Infrastructure;
using System;
Expand Down Expand Up @@ -145,9 +145,9 @@ private sealed class CompletionsResponse
private async Task TranslateUsingCompletionsModel(ITranslationSession translationSession, HttpClient client)
{
var endpointUri = new Uri($"/v1/chat/completions", UriKind.Relative);
var tokenizer = await TokenizerBuilder.CreateByModelNameAsync(
var tokenizer = TiktokenTokenizer.CreateForModel(
ModelName ?? throw new InvalidOperationException("No model name provided in configuration!")
).ConfigureAwait(false);
);

var retries = 0;

Expand Down Expand Up @@ -200,7 +200,7 @@ private async Task TranslateUsingCompletionsModel(ITranslationSession translatio
}
}

private IEnumerable<(ITranslationItem item, string prompt)> PackCompletionModelPrompts(ITranslationSession translationSession, ITokenizer tokenizer)
private IEnumerable<(ITranslationItem item, string prompt)> PackCompletionModelPrompts(ITranslationSession translationSession, TiktokenTokenizer tokenizer)
{
foreach (var item in translationSession.Items)
{
Expand All @@ -211,7 +211,7 @@ private async Task TranslateUsingCompletionsModel(ITranslationSession translatio
continue;
}

var tokens = tokenizer.Encode(prompt, new List<string>()).Count;
var tokens = tokenizer.CountTokens(prompt);

if (tokens > PromptTokens)
{
Expand Down Expand Up @@ -328,10 +328,10 @@ public string? ModelName

private static IList<ICredentialItem> GetCredentials()
{
return new ICredentialItem[]
{
return
[
new CredentialItem("AuthenticationKey", "Key"),
new CredentialItem("ModelName", "Model Name", false),
};
];
}
}
2 changes: 1 addition & 1 deletion src/ResXManager.Translators/ResXManager.Translators.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@
<ItemGroup>
<PackageReference Include="Fody" PrivateAssets="all" />
<PackageReference Include="ILMerge.Fody" PrivateAssets="all" />
<PackageReference Include="Microsoft.DeepDev.TokenizerLib" />
<PackageReference Include="Microsoft.ML.Tokenizers" />
<PackageReference Include="PropertyChanged.Fody" PrivateAssets="all" />
<PackageReference Include="System.ComponentModel.Annotations" />
<PackageReference Include="System.Net.Http" />
Expand Down

0 comments on commit a890246

Please sign in to comment.