Skip to content

Commit

Permalink
Merge pull request #63 from pavel-zhur/feature/audio
Browse files Browse the repository at this point in the history
dog & dragon: audio transcriptions
  • Loading branch information
pavel-zhur authored Jan 2, 2025
2 parents e057a6d + 6febf3f commit 0106d07
Show file tree
Hide file tree
Showing 15 changed files with 249 additions and 40 deletions.
2 changes: 2 additions & 0 deletions OneShelf.Billing/OneShelf.Billing.Api/Functions/All.cs
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ protected override async Task<AllResponse> Execute(AllRequest allRequest)
"gpt-4-1106-preview" or "gpt-4-0125-preview" => .01f * x.InputTokens / 1000 + .03f * x.OutputTokens / 1000,
"gpt-4o" when x.CreatedOn < new DateTime(2024, 10, 2) => (.01f * x.InputTokens / 1000 + .03f * x.OutputTokens / 1000) * 0.5f,
"gpt-4o" => .01f * x.InputTokens / 1000 / 4 + .01f * x.OutputTokens / 1000,
"whisper-1" => x.Count / 60f * .006f,
_ => null,
},
Category = x.UseCase switch
Expand All @@ -62,6 +63,7 @@ protected override async Task<AllResponse> Execute(AllRequest allRequest)
"gpt-4-1106-preview" or "gpt-4-0125-preview" or "gpt-4o" => "chat text",
_ => "chat images",
},
"audio" => "audio transcription",
_ => x.Model switch
{
"gpt-4-1106-preview" or "gpt-4-0125-preview" or "gpt-4o" => "song text",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -39,4 +39,6 @@ public enum InteractionType
ImagesSuccess,

OwnChatterImageMessage,

OwnChatterAudio,
}
Original file line number Diff line number Diff line change
Expand Up @@ -223,6 +223,11 @@ async Task IInteractionsRepository<InteractionType>.Add(List<IInteraction<Intera
await SaveChangesAsyncX();
}

async Task IInteractionsRepository<InteractionType>.Update(IInteraction<InteractionType> interaction)
{
await SaveChangesAsyncX();
}

InteractionType IInteractionsRepository<InteractionType>.OwnChatterMessage => InteractionType.OwnChatterMessage;

InteractionType IInteractionsRepository<InteractionType>.OwnChatterImageMessage => InteractionType.OwnChatterImageMessage;
Expand All @@ -235,5 +240,7 @@ async Task IInteractionsRepository<InteractionType>.Add(List<IInteraction<Intera

InteractionType IInteractionsRepository<InteractionType>.ImagesSuccess => InteractionType.ImagesSuccess;

InteractionType IInteractionsRepository<InteractionType>.Audio => InteractionType.OwnChatterAudio;

#endregion
}
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ public static IServiceCollection AddOpenAi(this IServiceCollection services, ICo

services
.AddScoped<DialogRunner>()
.AddScoped<Transcriber>()
.AddBillingApiClient(configuration)
.AddHttpClient();

Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
using System.Text.Json;
using System.Text.Json.Nodes;
using System.Text.RegularExpressions;
using Azure.Core;
using Microsoft.Extensions.Logging;
using Microsoft.Extensions.Options;
using OneShelf.Billing.Api.Client;
Expand Down
45 changes: 45 additions & 0 deletions OneShelf.Common/OneShelf.Common.OpenAi/Services/Transcriber.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
using Microsoft.Extensions.Logging;
using Microsoft.Extensions.Options;
using OneShelf.Billing.Api.Client;
using OneShelf.Common.OpenAi.Models;
using OpenAI;
using OpenAI.Audio;

namespace OneShelf.Common.OpenAi.Services;

public class Transcriber
{
private readonly ILogger<Transcriber> _logger;
private readonly BillingApiClient _billingApiClient;
private readonly OpenAIClient _client;
private readonly OpenAiOptions _options;

public Transcriber(IOptions<OpenAiOptions> options, ILogger<Transcriber> logger, BillingApiClient billingApiClient)
{
_logger = logger;
_billingApiClient = billingApiClient;
_options = options.Value;
_client = new(new(options.Value.OpenAiApiKey));
}

public async Task<string> TranscribeAudio(byte[] audio, DialogConfiguration configuration)
{
var started = DateTime.Now;
using var stream = new MemoryStream(audio);
var model = "whisper-1";
var response = await _client.AudioEndpoint.CreateTranscriptionJsonAsync(new(stream, "stream.webm", model, responseFormat: AudioResponseFormat.Verbose_Json));
_logger.LogInformation("Audio transcribed. Took {ms} ms.", DateTime.Now - started);
await _billingApiClient.Add(new()
{
Count = (int)Math.Ceiling(response.Duration ?? 0),
UserId = configuration.UserId,
Model = model,
UseCase = configuration.UseCase,
AdditionalInfo = configuration.AdditionalBillingInfo,
DomainId = configuration.DomainId,
ChatId = configuration.ChatId,
});

return response.Text;
}
}
7 changes: 7 additions & 0 deletions OneShelf.OneDog/OneShelf.OneDog.Database/DogDatabase.cs
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,11 @@ async Task IInteractionsRepository<InteractionType>.Add(List<IInteraction<Intera
await SaveChangesAsync();
}

async Task IInteractionsRepository<InteractionType>.Update(IInteraction<InteractionType> interaction)
{
await SaveChangesAsync();
}

InteractionType IInteractionsRepository<InteractionType>.OwnChatterMessage => InteractionType.OwnChatterMessage;

InteractionType IInteractionsRepository<InteractionType>.OwnChatterImageMessage => InteractionType.OwnChatterImageMessage;
Expand All @@ -64,5 +69,7 @@ async Task IInteractionsRepository<InteractionType>.Add(List<IInteraction<Intera

InteractionType IInteractionsRepository<InteractionType>.ImagesSuccess => InteractionType.ImagesSuccess;

InteractionType IInteractionsRepository<InteractionType>.Audio => InteractionType.OwnChatterAudio;

#endregion
}
Original file line number Diff line number Diff line change
Expand Up @@ -15,4 +15,6 @@ public enum InteractionType
ImagesLimit,

OwnChatterImageMessage,

OwnChatterAudio
}
Original file line number Diff line number Diff line change
Expand Up @@ -21,9 +21,10 @@ public AiDialogHandler(ILogger<AiDialogHandler> logger,
DogDatabase dogDatabase,
DialogRunner dialogRunner,
IScopedAbstractions scopedAbstractions,
DogContext dogContext,
IHttpClientFactory httpClientFactory)
: base(scopedAbstractions, logger, dogDatabase, dialogRunner, httpClientFactory)
DogContext dogContext,
IHttpClientFactory httpClientFactory,
Transcriber transcriber)
: base(scopedAbstractions, logger, dogDatabase, dialogRunner, httpClientFactory, transcriber)
{
_dogDatabase = dogDatabase;
_dogContext = dogContext;
Expand All @@ -34,6 +35,11 @@ protected override void OnInitializing(long userId, long chatId)
_dogDatabase.InitializeInteractionsRepositoryScope(_dogContext.DomainId);
}

protected override bool TranscribeAudio(Update update)
{
return false;
}

protected override bool CheckRelevant(Update update)
{
if (update.Message?.Chat.Id != _dogContext.Domain.ChatId) return false;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,11 @@ async Task IInteractionsRepository<InteractionType>.Add(List<IInteraction<Intera
await SaveChangesAsync();
}

async Task IInteractionsRepository<InteractionType>.Update(IInteraction<InteractionType> interaction)
{
await SaveChangesAsync();
}

InteractionType IInteractionsRepository<InteractionType>.OwnChatterMessage => InteractionType.AiMessage;

InteractionType IInteractionsRepository<InteractionType>.OwnChatterImageMessage => InteractionType.AiImageMessage;
Expand All @@ -68,5 +73,7 @@ async Task IInteractionsRepository<InteractionType>.Add(List<IInteraction<Intera

InteractionType IInteractionsRepository<InteractionType>.ImagesSuccess => InteractionType.AiImagesSuccess;

InteractionType IInteractionsRepository<InteractionType>.Audio => InteractionType.AiAudio;

#endregion
}
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ public enum InteractionType
AiResetDialog,
AiImagesLimit,
AiImagesSuccess,
AiAudio,

DirectImagesSuccess,
DirectImagesLimit,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,8 +29,9 @@ public AiDialogHandler(IScopedAbstractions scopedAbstractions,
DragonScope dragonScope,
Availability availability,
IOptions<TelegramOptions> options,
IHttpClientFactory httpClientFactory)
: base(scopedAbstractions, logger, dragonDatabase, dialogRunner, httpClientFactory)
IHttpClientFactory httpClientFactory,
Transcriber transcriber)
: base(scopedAbstractions, logger, dragonDatabase, dialogRunner, httpClientFactory, transcriber)
{
_dragonDatabase = dragonDatabase;
_dragonScope = dragonScope;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,13 @@ public interface IInteractionsRepository<TInteractionType>

Task Add(List<IInteraction<TInteractionType>> interactions);

Task Update(IInteraction<TInteractionType> interaction);

TInteractionType OwnChatterMessage { get; }
TInteractionType OwnChatterImageMessage { get; }
TInteractionType OwnChatterMemoryPoint { get; }
TInteractionType OwnChatterResetDialog { get; }
TInteractionType ImagesLimit { get; }
TInteractionType ImagesSuccess { get; }
TInteractionType Audio { get; }
}
Loading

0 comments on commit 0106d07

Please sign in to comment.