From 5e4148811970cf10d5e7091c4c391df5849e5b22 Mon Sep 17 00:00:00 2001 From: kercre123 Date: Fri, 12 Apr 2024 18:23:24 -0500 Subject: [PATCH] Add LLM command code --- chipper/pkg/vars/vars.go | 24 +++ chipper/pkg/wirepod/ttr/kgsim.go | 97 +++++----- chipper/pkg/wirepod/ttr/kgsim_cmds.go | 255 ++++++++++++++++++++++++++ chipper/pkg/wirepod/ttr/words2num.go | 123 +++++-------- chipper/webroot/initial.html | 4 +- chipper/webroot/setup.html | 4 +- 6 files changed, 377 insertions(+), 130 deletions(-) create mode 100644 chipper/pkg/wirepod/ttr/kgsim_cmds.go diff --git a/chipper/pkg/vars/vars.go b/chipper/pkg/vars/vars.go index 285dd1b8..8c1f438f 100644 --- a/chipper/pkg/vars/vars.go +++ b/chipper/pkg/vars/vars.go @@ -4,6 +4,7 @@ import ( "crypto/x509" "encoding/json" "encoding/pem" + "errors" "fmt" "os" "path/filepath" @@ -11,6 +12,7 @@ import ( "strconv" "strings" + "github.com/fforchino/vector-go-sdk/pkg/vector" "github.com/kercre123/wire-pod/chipper/pkg/logger" "github.com/sashabaranov/go-openai" ) @@ -417,3 +419,25 @@ func LoadChats() { } json.Unmarshal(file, &RememberedChats) } + +func GetRobot(esn string) (*vector.Vector, error) { + var guid string + var target string + matched := false + for _, bot := range BotInfo.Robots { + if esn == bot.Esn { + guid = bot.GUID + target = bot.IPAddress + ":443" + matched = true + break + } + } + if !matched { + return nil, errors.New("robot not in botsdkinfo") + } + robot, err := vector.New(vector.WithSerialNo(esn), vector.WithToken(guid), vector.WithTarget(target)) + if err != nil { + return nil, err + } + return robot, nil +} diff --git a/chipper/pkg/wirepod/ttr/kgsim.go b/chipper/pkg/wirepod/ttr/kgsim.go index 9ee8ad11..5da71c51 100644 --- a/chipper/pkg/wirepod/ttr/kgsim.go +++ b/chipper/pkg/wirepod/ttr/kgsim.go @@ -92,7 +92,7 @@ func StreamingKGSim(req interface{}, esn string, transcribedText string) (string } else { robName = "Vector" } - defaultPrompt := "You are a helpful robot called " + robName + ". The prompt may not be punctuated or spelled correctly as the STT model is small. The answer will be put through TTS, so it should be a speakable string. Keep the answer concise yet informative." + defaultPrompt := "You are a helpful, animated robot called " + robName + ". Keep the response concise yet informative." var nChat []openai.ChatCompletionMessage @@ -105,6 +105,10 @@ func StreamingKGSim(req interface{}, esn string, transcribedText string) (string smsg.Content = defaultPrompt } + smsg.Content = CreatePrompt(smsg.Content) + + logger.Println("Full prompt: " + smsg.Content) + nChat = append(nChat, smsg) if vars.APIConfig.Knowledge.SaveChat { rchat := GetChat(esn) @@ -122,7 +126,7 @@ func StreamingKGSim(req interface{}, esn string, transcribedText string) (string Stream: true, } if vars.APIConfig.Knowledge.Provider == "openai" { - aireq.Model = openai.GPT4Turbo1106 + aireq.Model = "gpt-4-turbo" logger.Println("Using " + aireq.Model) } else { logger.Println("Using " + vars.APIConfig.Knowledge.Model) @@ -239,19 +243,19 @@ func StreamingKGSim(req interface{}, esn string, transcribedText string) (string ctx, ) if err != nil { - log.Println(err) + logger.Println(err) return } if err := r.Send(controlRequest); err != nil { - log.Println(err) + logger.Println(err) return } for { ctrlresp, err := r.Recv() if err != nil { - log.Println(err) + logger.Println(err) return } if ctrlresp.GetControlGrantedResponse() != nil { @@ -282,8 +286,8 @@ func StreamingKGSim(req interface{}, esn string, transcribedText string) (string // * end - modified from official vector-go-sdk }() - var stopTTSLoop bool - TTSLoopStopped := make(chan bool) + //var stopTTSLoop bool + //TTSLoopStopped := make(chan bool) for range start { time.Sleep(time.Millisecond * 300) robot.Conn.PlayAnimation( @@ -295,29 +299,29 @@ func StreamingKGSim(req interface{}, esn string, transcribedText string) (string Loops: 1, }, ) - go func() { - for { - if stopTTSLoop { - TTSLoopStopped <- true - break - } - robot.Conn.PlayAnimation( - ctx, - &vectorpb.PlayAnimationRequest{ - Animation: &vectorpb.Animation{ - Name: "anim_tts_loop_02", - }, - Loops: 1, - }, - ) - } - }() + // go func() { + // for { + // if stopTTSLoop { + // TTSLoopStopped <- true + // break + // } + // robot.Conn.PlayAnimation( + // ctx, + // &vectorpb.PlayAnimationRequest{ + // Animation: &vectorpb.Animation{ + // Name: "anim_tts_loop_02", + // }, + // Loops: 1, + // }, + // ) + // } + // }() numInResp := 0 for { respSlice := fullRespSlice if len(respSlice)-1 < numInResp { if !isDone { - fmt.Println("waiting...") + logger.Println("Waiting for more content from LLM...") for range speakReady { respSlice = fullRespSlice break @@ -327,36 +331,25 @@ func StreamingKGSim(req interface{}, esn string, transcribedText string) (string } } logger.Println(respSlice[numInResp]) - _, err := robot.Conn.SayText( - ctx, - &vectorpb.SayTextRequest{ - Text: respSlice[numInResp], - UseVectorVoice: true, - DurationScalar: 1.0, - }, - ) - if err != nil { - logger.Println("KG SayText error: " + err.Error()) - stop <- true - break - } + acts := GetActionsFromString(respSlice[numInResp]) + PerformActions(acts, robot) numInResp = numInResp + 1 } - stopTTSLoop = true - for range TTSLoopStopped { - time.Sleep(time.Millisecond * 100) - robot.Conn.PlayAnimation( - ctx, - &vectorpb.PlayAnimationRequest{ - Animation: &vectorpb.Animation{ - Name: "anim_knowledgegraph_success_01", - }, - Loops: 1, + //stopTTSLoop = true + // for range TTSLoopStopped { + time.Sleep(time.Millisecond * 100) + robot.Conn.PlayAnimation( + ctx, + &vectorpb.PlayAnimationRequest{ + Animation: &vectorpb.Animation{ + Name: "anim_knowledgegraph_success_01", }, - ) - //time.Sleep(time.Millisecond * 3300) - stop <- true - } + Loops: 1, + }, + ) + //time.Sleep(time.Millisecond * 3300) + stop <- true + //} } return "", nil } diff --git a/chipper/pkg/wirepod/ttr/kgsim_cmds.go b/chipper/pkg/wirepod/ttr/kgsim_cmds.go new file mode 100644 index 00000000..c3e7d6d6 --- /dev/null +++ b/chipper/pkg/wirepod/ttr/kgsim_cmds.go @@ -0,0 +1,255 @@ +package wirepod_ttr + +import ( + "context" + "strings" + + "github.com/fforchino/vector-go-sdk/pkg/vector" + "github.com/fforchino/vector-go-sdk/pkg/vectorpb" + "github.com/kercre123/wire-pod/chipper/pkg/logger" +) + +const ( + // arg: text to say + // not a command + ActionSayText = 0 + // arg: animation name + ActionPlayAnimation = 1 + // arg: animation name + ActionPlayAnimationWI = 2 + // arg: sound file + ActionPlaySound = 3 +) + +var animationMap [][2]string = [][2]string{ + //"happy, veryHappy, sad, verySad, angry, dartingEyes, confused, thinking, celebrate" + { + "happy", + "anim_onboarding_reacttoface_happy_01", + }, + { + "veryHappy", + "anim_onboarding_reacttoface_happy_01", + }, + { + "sad", + "anim_feedback_meanwords_01", + }, + { + "verySad", + "anim_feedback_meanwords_01", + }, + { + "angry", + "anim_keepaway_getout_frustrated_01", + }, + { + "frustrated", + "anim_keepaway_getout_frustrated_01", + }, + { + "dartingEyes", + "anim_observing_self_absorbed_01", + }, + { + "confused", + "anim_meetvictor_lookface_timeout_01", + }, + { + "thinking", + "anim_explorer_scan_short_04", + }, + { + "celebrate", + "anim_pounce_success_03", + }, +} + +var soundMap [][2]string = [][2]string{ + //"happy, veryHappy, sad, verySad, angry, dartingEyes, confused, thinking, celebrate" + { + "drumroll", + "sounds/drumroll.wav", + }, +} + +type RobotAction struct { + Action int + Parameter string +} + +type LLMCommand struct { + Command string + Description string + ParamChoices string + Action int +} + +// create function which parses from LLM and makes a struct of RobotActions + +var ValidLLMCommands []LLMCommand = []LLMCommand{ + { + Command: "playAnimation", + Description: "Plays an animation on the robot. This will interrupt speech.", + ParamChoices: "happy, veryHappy, sad, verySad, angry, frustrated, dartingEyes, confused, thinking, celebrate", + Action: ActionPlayAnimation, + }, + { + Command: "playAnimationWI", + Description: "Plays an animation on the robot without interrupting speech.", + ParamChoices: "happy, veryHappy, sad, verySad, angry, frustrated, dartingEyes, confused, thinking, celebrate", + Action: ActionPlayAnimationWI, + }, + // { + // Command: "playSound", + // Description: "Plays a sound on the robot.", + // ParamChoices: "drumroll", + // Action: ActionPlaySound, + // }, +} + +func CreatePrompt(origPrompt string) string { + prompt := origPrompt + "\n\n" + "IMPORTANT INSTRUCTIONS YOU MUST FOLLOW: The user input might not be spelt/puntuated correctly as it is coming from speech-to-text software. Do not include special characters in your answer. This includes the following characters (not including the quotes): '& ^ * # @ -'. If you want to use a hyphen, Use it like this: 'something something -- something -- something something'. DO NOT USE EMOJIS! Use the playAnimaion or playAnimationWI commands if you want to express emotion! IF YOU DO NOT ABIDE BY THESE RULES, I WILL CANCEL YOUR RESPONSE AND WILL MAKE YOU START OVER. Also, don't start your answer with 'Well, well, well', it's getting old. You are very animated and good at following instructions. Animation takes precendence over words. You are to include many animations in your response. When you are told NOT to do something, you don't do it (it's so simple)." + prompt = prompt + "\n\n" + "ALSO IMPORTANT: You are running ON an Anki Vector robot. You have a set of commands. YOU ARE TO USE THESE. DO NOT BE AFRAID TO LITTER YOUR RESPONSE WITH THEM. Your response MUST include THREE OF THESE COMMANDS OR MORE. You are going to litter your response with them. If you include just one, I will make you start over. If you include an emoji, I will make you start over. If you want to use a command but it doesn't exist or your desired parameter isn't in the list, avoid using the command. The format is {{command||parameter}}. You can embed these in sentences. Example: \"User: How are you feeling? | Response: \"{{playAnimationWI||sad}} I'm feeling sad...\"\n\nHere is every valid command:" + for _, cmd := range ValidLLMCommands { + promptAppendage := "\n\nCommand Name: " + cmd.Command + "\nDescription: " + cmd.Description + "\nParameter choices: " + cmd.ParamChoices + prompt = prompt + promptAppendage + } + return prompt +} + +func GetActionsFromString(input string) []RobotAction { + splitInput := strings.Split(input, "{{") + if len(splitInput) == 1 { + return []RobotAction{ + { + Action: ActionSayText, + Parameter: input, + }, + } + } + var actions []RobotAction + for _, spl := range splitInput { + if strings.TrimSpace(spl) == "" { + continue + } + if !strings.Contains(spl, "}}") { + // sayText + action := RobotAction{ + Action: ActionSayText, + Parameter: strings.TrimSpace(spl), + } + actions = append(actions, action) + continue + } + + cmdPlusParam := strings.Split(strings.TrimSpace(strings.Split(spl, "}}")[0]), "||") + cmd := strings.TrimSpace(cmdPlusParam[0]) + param := strings.TrimSpace(cmdPlusParam[1]) + action := CmdParamToAction(cmd, param) + if action.Action != -1 { + actions = append(actions, action) + } + if len(strings.Split(spl, "}}")) != 1 { + action := RobotAction{ + Action: ActionSayText, + Parameter: strings.TrimSpace(strings.Split(spl, "}}")[1]), + } + actions = append(actions, action) + } + } + return actions +} + +func CmdParamToAction(cmd, param string) RobotAction { + for _, command := range ValidLLMCommands { + if cmd == command.Command { + return RobotAction{ + Action: command.Action, + Parameter: param, + } + } + } + logger.Println("LLM tried to do a command which doesn't exist: " + cmd + " (param: " + param + ")") + return RobotAction{ + Action: -1, + } +} + +func DoPlayAnimation(animation string, robot *vector.Vector) error { + for _, animThing := range animationMap { + if animation == animThing[0] { + robot.Conn.PlayAnimation( + context.Background(), + &vectorpb.PlayAnimationRequest{ + Animation: &vectorpb.Animation{ + Name: animThing[1], + }, + Loops: 1, + }, + ) + return nil + } + } + logger.Println("Animation provided by LLM doesn't exist: " + animation) + return nil +} + +func DoPlayAnimationWI(animation string, robot *vector.Vector) error { + for _, animThing := range animationMap { + if animation == animThing[0] { + go func() { + robot.Conn.PlayAnimation( + context.Background(), + &vectorpb.PlayAnimationRequest{ + Animation: &vectorpb.Animation{ + Name: animThing[1], + }, + Loops: 1, + }, + ) + }() + return nil + } + } + logger.Println("Animation provided by LLM doesn't exist: " + animation) + return nil +} + +func DoPlaySound(sound string, robot *vector.Vector) error { + for _, soundThing := range soundMap { + if sound == soundThing[0] { + logger.Println("Would play sound") + } + } + logger.Println("Sound provided by LLM doesn't exist: " + sound) + return nil +} + +func DoSayText(input string, robot *vector.Vector) error { + robot.Conn.SayText( + context.Background(), + &vectorpb.SayTextRequest{ + Text: input, + UseVectorVoice: true, + DurationScalar: 0.95, + }, + ) + return nil +} + +func PerformActions(actions []RobotAction, robot *vector.Vector) { + // assuming we have behavior control already + for _, action := range actions { + switch { + case action.Action == ActionSayText: + DoSayText(action.Parameter, robot) + case action.Action == ActionPlayAnimation: + DoPlayAnimation(action.Parameter, robot) + case action.Action == ActionPlayAnimationWI: + DoPlayAnimationWI(action.Parameter, robot) + case action.Action == ActionPlaySound: + DoPlaySound(action.Parameter, robot) + } + } +} diff --git a/chipper/pkg/wirepod/ttr/words2num.go b/chipper/pkg/wirepod/ttr/words2num.go index 64e1e211..cec1eed3 100755 --- a/chipper/pkg/wirepod/ttr/words2num.go +++ b/chipper/pkg/wirepod/ttr/words2num.go @@ -9,31 +9,6 @@ import ( // This file contains words2num. It is given the spoken text and returns a string which contains the true number. -var number int = 0 - -func basicspeechText2num(speechText string) int { - if strings.Contains(speechText, "one") && !strings.Contains(speechText, "one hundred") { - return 1 - } else if strings.Contains(speechText, "two") && !strings.Contains(speechText, "two hundred") { - return 2 - } else if strings.Contains(speechText, "three") && !strings.Contains(speechText, "three hundred") { - return 3 - } else if strings.Contains(speechText, "four") && !strings.Contains(speechText, "four hundred") { - return 4 - } else if strings.Contains(speechText, "five") && !strings.Contains(speechText, "five hundred") { - return 5 - } else if strings.Contains(speechText, "six ") && !strings.Contains(speechText, "six hundred") { - return 6 - } else if strings.Contains(speechText, "seven ") && !strings.Contains(speechText, "seven hundred") { - return 7 - } else if strings.Contains(speechText, "eight ") && !strings.Contains(speechText, "eight hundred") { - return 8 - } else if strings.Contains(speechText, "nine ") && !strings.Contains(speechText, "nine hundred") { - return 9 - } - return 0 -} - func whisperSpeechtoNum(input string) string { // whisper returns actual numbers in its response // ex. "set a timer for 10 minutes and 11 seconds" @@ -61,60 +36,60 @@ func whisperSpeechtoNum(input string) string { return strconv.Itoa(totalSeconds) } -func words2num(speechText string) string { +var textToNumber = map[string]int{ + "zero": 0, "one": 1, "two": 2, "three": 3, "four": 4, "five": 5, + "six": 6, "seven": 7, "eight": 8, "nine": 9, "ten": 10, + "eleven": 11, "twelve": 12, "thirteen": 13, "fourteen": 14, "fifteen": 15, + "sixteen": 16, "seventeen": 17, "eighteen": 18, "nineteen": 19, "twenty": 20, + "thirty": 30, "forty": 40, "fifty": 50, "sixty": 60, +} + +func words2num(input string) string { if os.Getenv("STT_SERVICE") == "whisper.cpp" { - return whisperSpeechtoNum(speechText) + return whisperSpeechtoNum(input) } - number = basicspeechText2num(speechText) - if number == 0 { - number = 1 + totalSeconds := 0 + + input = strings.ToLower(input) + if strings.Contains(input, "one hour") || strings.Contains(input, "an hour") { + return "3600" } - if strings.Contains(speechText, "teen") { - number = 10 - if strings.Contains(speechText, "thir") { - number = 14 - } else if strings.Contains(speechText, "four") { - number = 14 - } else if strings.Contains(speechText, "fif") { - number = 15 - } else if strings.Contains(speechText, "six") { - number = 16 - } else if strings.Contains(speechText, "seven") { - number = 17 - } else if strings.Contains(speechText, "eight") { - number = 18 - } else if strings.Contains(speechText, "nine") { - number = 19 + + timePattern := regexp.MustCompile(`(\d+|\w+(?:-\w+)?)\s*(minute|second|hour)s?`) + + matches := timePattern.FindAllStringSubmatch(input, -1) + for _, match := range matches { + unit := match[2] + number := match[1] + + value, err := strconv.Atoi(number) + if err != nil { + value = mapTextToNumber(number) + } + + switch unit { + case "minute": + totalSeconds += value * 60 + case "second": + totalSeconds += value + case "hour": + totalSeconds += value * 3600 } - } else if strings.Contains(speechText, "ten") { - number = 10 - } else if strings.Contains(speechText, "eleven") { - number = 11 - } else if strings.Contains(speechText, "twelve") { - number = 12 - } - if strings.Contains(speechText, "twenty") { - number = 20 + basicspeechText2num(speechText) - } else if strings.Contains(speechText, "thirty") { - number = 30 + basicspeechText2num(speechText) - } else if strings.Contains(speechText, "forty") { - number = 40 + basicspeechText2num(speechText) - } else if strings.Contains(speechText, "fifty") { - number = 50 + basicspeechText2num(speechText) - } else if strings.Contains(speechText, "sixty") { - number = 60 + basicspeechText2num(speechText) - } else if strings.Contains(speechText, "seventy") { - number = 70 + basicspeechText2num(speechText) - } else if strings.Contains(speechText, "eighty") { - number = 80 + basicspeechText2num(speechText) - } else if strings.Contains(speechText, "ninety") { - number = 90 + basicspeechText2num(speechText) } - if strings.Contains(speechText, "hundred") { - number = number + 100 + + return strconv.Itoa(totalSeconds) +} + +func mapTextToNumber(text string) int { + if val, ok := textToNumber[text]; ok { + return val } - if strings.Contains(speechText, "minute") { - number = number * 60 + parts := strings.Split(text, "-") + sum := 0 + for _, part := range parts { + if val, ok := textToNumber[part]; ok { + sum += val + } } - return strconv.Itoa(number) + return sum } diff --git a/chipper/webroot/initial.html b/chipper/webroot/initial.html index 809abff0..e7d0f026 100644 --- a/chipper/webroot/initial.html +++ b/chipper/webroot/initial.html @@ -106,7 +106,7 @@

Knowledge Graph Setup



- +
Would you like to enable the intent graph feature? This forwards the request to Together if the regular intent processor didn't understand what you said.
@@ -124,7 +124,7 @@

Knowledge Graph Setup



- +
Would you like to enable the intent graph feature? This forwards the request to OpenAI if the regular intent processor didn't understand what you said.
diff --git a/chipper/webroot/setup.html b/chipper/webroot/setup.html index 03dbad2d..2ec80ab8 100644 --- a/chipper/webroot/setup.html +++ b/chipper/webroot/setup.html @@ -76,7 +76,7 @@

Knowledge Graph Setup



- +
Would you like to enable the intent graph feature? This forwards the request to Together if the regular intent processor didn't understand what you said.
@@ -93,7 +93,7 @@

Knowledge Graph Setup



- +
Would you like to enable the intent graph feature? This forwards the request to OpenAI if the regular intent processor didn't understand what you said.