Skip to content

Commit 9a8fe53

Browse files
authored
Merge pull request #217 from sj-distributor/enhance-ai-speech-session
Enhance ai speech assistant session
2 parents d5231f9 + 1d926a3 commit 9a8fe53

File tree

5 files changed

+24
-5
lines changed

5 files changed

+24
-5
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
alter table `ai_speech_assistant_function_call` add column `type` int not null default 0;

src/SmartTalk.Core/Domain/AISpeechAssistant/AiSpeechAssistantFunctionCall.cs

+4
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
using System.ComponentModel.DataAnnotations;
22
using System.ComponentModel.DataAnnotations.Schema;
3+
using SmartTalk.Messages.Enums.AiSpeechAssistant;
34

45
namespace SmartTalk.Core.Domain.AISpeechAssistant;
56

@@ -20,6 +21,9 @@ public class AiSpeechAssistantFunctionCall : IEntity, IHasCreatedFields
2021
[Column("content")]
2122
public string Content { get; set; }
2223

24+
[Column("type")]
25+
public AiSpeechAssistantSessionConfigType Type { get; set; }
26+
2327
[Column("created_date")]
2428
public DateTimeOffset CreatedDate { get; set; }
2529
}

src/SmartTalk.Core/Services/AiSpeechAssistant/AiSpeechAssistantService.cs

+12-5
Original file line numberDiff line numberDiff line change
@@ -729,32 +729,39 @@ private async Task SendToWebSocketAsync(WebSocket socket, object message)
729729

730730
private async Task SendSessionUpdateAsync(WebSocket openAiWebSocket, Domain.AISpeechAssistant.AiSpeechAssistant assistant, string prompt)
731731
{
732-
var tools = await InitialSessionToolsAsync(assistant).ConfigureAwait(false);
732+
var configs = await InitialSessionConfigAsync(assistant).ConfigureAwait(false);
733733

734734
var sessionUpdate = new
735735
{
736736
type = "session.update",
737737
session = new
738738
{
739-
turn_detection = new { type = "server_vad" },
739+
turn_detection = InitialSessionTurnDirection(configs),
740740
input_audio_format = "g711_ulaw",
741741
output_audio_format = "g711_ulaw",
742742
voice = string.IsNullOrEmpty(assistant.Voice) ? "alloy" : assistant.Voice,
743743
instructions = prompt,
744744
modalities = new[] { "text", "audio" },
745745
temperature = 0.8,
746746
input_audio_transcription = new { model = "whisper-1" },
747-
tools = tools
747+
tools = configs.Where(x => x.Type == AiSpeechAssistantSessionConfigType.Tool).Select(x => x.Config)
748748
}
749749
};
750750

751751
await SendToWebSocketAsync(openAiWebSocket, sessionUpdate);
752752
}
753753

754-
private async Task<IEnumerable<OpenAiRealtimeToolDto>> InitialSessionToolsAsync(Domain.AISpeechAssistant.AiSpeechAssistant assistant, CancellationToken cancellationToken = default)
754+
private async Task<List<(AiSpeechAssistantSessionConfigType Type, object Config)>> InitialSessionConfigAsync(Domain.AISpeechAssistant.AiSpeechAssistant assistant, CancellationToken cancellationToken = default)
755755
{
756756
var functions = await _aiSpeechAssistantDataProvider.GetAiSpeechAssistantFunctionCallByAssistantIdAsync(assistant.Id, cancellationToken).ConfigureAwait(false);
757757

758-
return functions.Count == 0 ? [] : functions.Where(x => !string.IsNullOrWhiteSpace(x.Content)).Select(x => JsonConvert.DeserializeObject<OpenAiRealtimeToolDto>(x.Content));
758+
return functions.Count == 0 ? [] : functions.Where(x => !string.IsNullOrWhiteSpace(x.Content)).Select(x => (x.Type, JsonConvert.DeserializeObject<object>(x.Content))).ToList();
759759
}
760+
761+
private object InitialSessionTurnDirection(List<(AiSpeechAssistantSessionConfigType Type, object Config)> configs)
762+
{
763+
var turnDetection = configs.FirstOrDefault(x => x.Type == AiSpeechAssistantSessionConfigType.TurnDirection);
764+
765+
return turnDetection.Config ?? new { type = "server_vad" };
766+
}
760767
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
namespace SmartTalk.Messages.Enums.AiSpeechAssistant;
2+
3+
public enum AiSpeechAssistantSessionConfigType
4+
{
5+
Tool,
6+
TurnDirection
7+
}

0 commit comments

Comments
 (0)