From 5941f348bea2ca69ec63ba8feb8b763bac437a3b Mon Sep 17 00:00:00 2001 From: Jacob Stella Date: Sat, 5 Apr 2025 17:54:18 -0400 Subject: [PATCH 1/7] made some changes about stuff :) --- config/contentgen_yamls/3dgen.yaml | 10 +++++----- config/contentgen_yamls/gifgen.yaml | 12 ++++++------ config/contentgen_yamls/imagegen.yaml | 10 +++++----- config/contentgen_yamls/videogen.yaml | 12 ++++++------ config/general.yaml | 2 ++ config/speechtotext.yaml | 2 +- internal/listeners/vosk.go | 2 +- 7 files changed, 26 insertions(+), 24 deletions(-) diff --git a/config/contentgen_yamls/3dgen.yaml b/config/contentgen_yamls/3dgen.yaml index 34bb1d0..c626916 100644 --- a/config/contentgen_yamls/3dgen.yaml +++ b/config/contentgen_yamls/3dgen.yaml @@ -3,11 +3,11 @@ # a (possibly nonexistent) List of options and a (possibly nonexistent) default val. #For more details see https://platform.openai.com/docs/api-reference/images/create -endpoint: "https://api.meshy.ai/openapi/v2/text-to-3d" -method: "POST" -headers: - Authorization: "Bearer &THREED_MODEL_API_KEY" - Content-Type: "application/json" +# endpoint: "https://api.meshy.ai/openapi/v2/text-to-3d" +# method: "POST" +# headers: +# Authorization: "Bearer &THREED_MODEL_API_KEY" +# Content-Type: "application/json" requiredParameters: mode: default: "preview" diff --git a/config/contentgen_yamls/gifgen.yaml b/config/contentgen_yamls/gifgen.yaml index 9dc23c4..2d20f74 100644 --- a/config/contentgen_yamls/gifgen.yaml +++ b/config/contentgen_yamls/gifgen.yaml @@ -3,12 +3,12 @@ # a (possibly nonexistent) List of options and a (possibly nonexistent) default val. #For more details see https://docs.leonardo.ai/docs/generate-images-using-leonardo-phoenix-model#sample-output -endpoint: "https://cloud.leonardo.ai/api/rest/v1/generations" -method: "POST" -headers: - accept: "application/json" - authorization: "Bearer &GIF_API_KEY" - content-Type: "application/json" +# endpoint: "https://cloud.leonardo.ai/api/rest/v1/generations" +# method: "POST" +# headers: +# accept: "application/json" +# authorization: "Bearer &GIF_API_KEY" +# content-Type: "application/json" requiredParameters: prompt: description: "A text description of the desired gif(s)." diff --git a/config/contentgen_yamls/imagegen.yaml b/config/contentgen_yamls/imagegen.yaml index e36dca9..440f4fb 100644 --- a/config/contentgen_yamls/imagegen.yaml +++ b/config/contentgen_yamls/imagegen.yaml @@ -3,11 +3,11 @@ # a (possibly nonexistent) List of options and a (possibly nonexistent) default val. #For more details see https://platform.openai.com/docs/api-reference/images/create -endpoint: "https://api.openai.com/v1/images/generations" -method: "POST" -headers: - Authorization: "Bearer &IMAGE_API_KEY" - Content-Type: "application/json" +# endpoint: "https://api.openai.com/v1/images/generations" +# method: "POST" +# headers: +# Authorization: "Bearer &IMAGE_API_KEY" +# Content-Type: "application/json" requiredParameters: prompt: description: "A text description of the desired image(s)." diff --git a/config/contentgen_yamls/videogen.yaml b/config/contentgen_yamls/videogen.yaml index b6dc5ec..665847e 100644 --- a/config/contentgen_yamls/videogen.yaml +++ b/config/contentgen_yamls/videogen.yaml @@ -1,9 +1,9 @@ -endpoint: "https://api.lumalabs.ai/dream-machine/v1/generations" -method: "POST" -headers: - accept: "application/json" - authorization: "Bearer &VIDEO_API_KEY" - content-Type: "application/json" +# endpoint: "https://api.lumalabs.ai/dream-machine/v1/generations" +# method: "POST" +# headers: +# accept: "application/json" +# authorization: "Bearer &VIDEO_API_KEY" +# content-Type: "application/json" requiredParameters: prompt: description: "A text description of the desired video." diff --git a/config/general.yaml b/config/general.yaml index bebd1ea..b49374d 100644 --- a/config/general.yaml +++ b/config/general.yaml @@ -3,3 +3,5 @@ openWebsocket: false #log lvl order, from lowest to highest #Trace → Debug → Info → Warn → Error → Fatal → Panic log_level: "Trace" +yaml:"numIntentDetectionRetries: 1 + diff --git a/config/speechtotext.yaml b/config/speechtotext.yaml index 09f3581..414d70b 100644 --- a/config/speechtotext.yaml +++ b/config/speechtotext.yaml @@ -1,2 +1,2 @@ VoskURL: ws://localhost:2700 -keyword: "quantum computer" +keyword: "hey holo table" diff --git a/internal/listeners/vosk.go b/internal/listeners/vosk.go index fd3fe78..a502735 100644 --- a/internal/listeners/vosk.go +++ b/internal/listeners/vosk.go @@ -25,7 +25,7 @@ var vosk *websocket.Conn func SendAudio(audio []byte) { err := vosk.WriteMessage(websocket.BinaryMessage, audio) if err != nil { - log.Print("Failed to send audio to vosk, ", err) + //log.Print("Failed to send audio to vosk, ", err) } } From 8677b0c2f46d7abf2649cb4f7a6789c64a6d0a91 Mon Sep 17 00:00:00 2001 From: Jacob Stella Date: Sat, 5 Apr 2025 18:05:08 -0400 Subject: [PATCH 2/7] removed un-necessary paramaters from yamls and fixed general.yaml typo --- config/contentgen_yamls/3dgen.yaml | 5 ----- config/contentgen_yamls/gifgen.yaml | 6 ------ config/contentgen_yamls/imagegen.yaml | 5 ----- config/contentgen_yamls/videogen.yaml | 6 +----- config/general.yaml | 2 +- 5 files changed, 2 insertions(+), 22 deletions(-) diff --git a/config/contentgen_yamls/3dgen.yaml b/config/contentgen_yamls/3dgen.yaml index c626916..0f851cd 100644 --- a/config/contentgen_yamls/3dgen.yaml +++ b/config/contentgen_yamls/3dgen.yaml @@ -3,11 +3,6 @@ # a (possibly nonexistent) List of options and a (possibly nonexistent) default val. #For more details see https://platform.openai.com/docs/api-reference/images/create -# endpoint: "https://api.meshy.ai/openapi/v2/text-to-3d" -# method: "POST" -# headers: -# Authorization: "Bearer &THREED_MODEL_API_KEY" -# Content-Type: "application/json" requiredParameters: mode: default: "preview" diff --git a/config/contentgen_yamls/gifgen.yaml b/config/contentgen_yamls/gifgen.yaml index 2d20f74..229111f 100644 --- a/config/contentgen_yamls/gifgen.yaml +++ b/config/contentgen_yamls/gifgen.yaml @@ -3,12 +3,6 @@ # a (possibly nonexistent) List of options and a (possibly nonexistent) default val. #For more details see https://docs.leonardo.ai/docs/generate-images-using-leonardo-phoenix-model#sample-output -# endpoint: "https://cloud.leonardo.ai/api/rest/v1/generations" -# method: "POST" -# headers: -# accept: "application/json" -# authorization: "Bearer &GIF_API_KEY" -# content-Type: "application/json" requiredParameters: prompt: description: "A text description of the desired gif(s)." diff --git a/config/contentgen_yamls/imagegen.yaml b/config/contentgen_yamls/imagegen.yaml index 440f4fb..a558e3d 100644 --- a/config/contentgen_yamls/imagegen.yaml +++ b/config/contentgen_yamls/imagegen.yaml @@ -3,11 +3,6 @@ # a (possibly nonexistent) List of options and a (possibly nonexistent) default val. #For more details see https://platform.openai.com/docs/api-reference/images/create -# endpoint: "https://api.openai.com/v1/images/generations" -# method: "POST" -# headers: -# Authorization: "Bearer &IMAGE_API_KEY" -# Content-Type: "application/json" requiredParameters: prompt: description: "A text description of the desired image(s)." diff --git a/config/contentgen_yamls/videogen.yaml b/config/contentgen_yamls/videogen.yaml index 665847e..ffd2da0 100644 --- a/config/contentgen_yamls/videogen.yaml +++ b/config/contentgen_yamls/videogen.yaml @@ -1,9 +1,5 @@ # endpoint: "https://api.lumalabs.ai/dream-machine/v1/generations" -# method: "POST" -# headers: -# accept: "application/json" -# authorization: "Bearer &VIDEO_API_KEY" -# content-Type: "application/json" + requiredParameters: prompt: description: "A text description of the desired video." diff --git a/config/general.yaml b/config/general.yaml index b49374d..e68b2ca 100644 --- a/config/general.yaml +++ b/config/general.yaml @@ -3,5 +3,5 @@ openWebsocket: false #log lvl order, from lowest to highest #Trace → Debug → Info → Warn → Error → Fatal → Panic log_level: "Trace" -yaml:"numIntentDetectionRetries: 1 +numIntentDetectionRetries: 1 From dc66e653bb68240d9904f5dbe7b0313d4863faa4 Mon Sep 17 00:00:00 2001 From: Jacob Stella Date: Sat, 5 Apr 2025 18:17:00 -0400 Subject: [PATCH 3/7] fix print comment --- internal/listeners/vosk.go | 2 ++ 1 file changed, 2 insertions(+) diff --git a/internal/listeners/vosk.go b/internal/listeners/vosk.go index a502735..4c02897 100644 --- a/internal/listeners/vosk.go +++ b/internal/listeners/vosk.go @@ -26,6 +26,8 @@ func SendAudio(audio []byte) { err := vosk.WriteMessage(websocket.BinaryMessage, audio) if err != nil { //log.Print("Failed to send audio to vosk, ", err) + //This error keep popping up when we temporatrily shutdown the mic and floods the logs so we are ignoring it for now + _ = err } } From b195a76f165b06879a82b1d5933813498f6b486c Mon Sep 17 00:00:00 2001 From: Jacob Stella Date: Sun, 6 Apr 2025 12:52:09 -0400 Subject: [PATCH 4/7] added folder to store additrional models, updated env to use the name of the models --- .env.example | 11 +-- .../gif/leonardo/gifWorkflow.yaml | 51 ++++++++++++++ .../gif/leonardo/gifgen.yaml | 27 ++++++++ .../image/dalle/imageWorkflow.yaml | 17 +++++ .../image/dalle/imagegen.yaml | 34 ++++++++++ .../image/deepai/imageWorkflow.yaml | 18 +++++ .../image/deepai/imagegen.yaml | 4 ++ .../image/leonardo/imageWorkflow.yaml | 28 ++++++++ .../image/leonardo/imagegen.yaml | 27 ++++++++ .../intent_detection/gpt/intentdetection.yaml | 67 +++++++++++++++++++ .../model/meshy/3dWorkflow.yaml | 57 ++++++++++++++++ .../additional_models/model/meshy/3dgen.yaml | 25 +++++++ .../video/luma/videoWorkflow.yaml | 28 ++++++++ .../video/luma/videogen.yaml | 51 ++++++++++++++ config/contentgen_workflows/3dWorkflow.yaml | 10 +-- config/contentgen_workflows/gifWorkflow.yaml | 8 +-- .../contentgen_workflows/imageWorkflow.yaml | 2 +- .../contentgen_workflows/videoWorkflow.yaml | 4 +- config/intentdetection.yaml | 2 +- 19 files changed, 453 insertions(+), 18 deletions(-) create mode 100644 config/additional_models/gif/leonardo/gifWorkflow.yaml create mode 100644 config/additional_models/gif/leonardo/gifgen.yaml create mode 100644 config/additional_models/image/dalle/imageWorkflow.yaml create mode 100644 config/additional_models/image/dalle/imagegen.yaml create mode 100644 config/additional_models/image/deepai/imageWorkflow.yaml create mode 100644 config/additional_models/image/deepai/imagegen.yaml create mode 100644 config/additional_models/image/leonardo/imageWorkflow.yaml create mode 100644 config/additional_models/image/leonardo/imagegen.yaml create mode 100644 config/additional_models/intent_detection/gpt/intentdetection.yaml create mode 100644 config/additional_models/model/meshy/3dWorkflow.yaml create mode 100644 config/additional_models/model/meshy/3dgen.yaml create mode 100644 config/additional_models/video/luma/videoWorkflow.yaml create mode 100644 config/additional_models/video/luma/videogen.yaml diff --git a/.env.example b/.env.example index 76d6c16..669ebf8 100644 --- a/.env.example +++ b/.env.example @@ -1,6 +1,7 @@ # API Keys for AI models -INTENT_DETECTION_API_KEY: FILL_ME_IN -VIDEO_API_KEY: FILL_ME_IN -IMAGE_API_KEY: FILL_ME_IN -THREED_MODEL_API_KEY: FILL_ME_IN -GIF_API_KEY: FILL_ME_IN +# As long as the api key name matches what it is in the workflow, you can name it whatever you want +OPENAI_API_KEY: FILL_ME_IN +LUMA_API_KEY: FILL_ME_IN +MESHY_API_KEY: FILL_ME_IN +LEONARDO_API_KEY: FILL_ME_IN +DEEPAI_API_KEY: FILL_ME_IN diff --git a/config/additional_models/gif/leonardo/gifWorkflow.yaml b/config/additional_models/gif/leonardo/gifWorkflow.yaml new file mode 100644 index 0000000..bd9e00f --- /dev/null +++ b/config/additional_models/gif/leonardo/gifWorkflow.yaml @@ -0,0 +1,51 @@ +gif: + steps: + - name: "submit_image_generation" + method: "POST" + url: "https://cloud.leonardo.ai/api/rest/v1/generations" + headers: + Authorization: "Bearer &LEONARDO_API_KEY" + Content-Type: "application/json" + body: + intent_detection_step: true + response_placeholders: + generation_id: sdGenerationJob.generationId + + - name: "poll_image_generation_status" + method: "GET" + url: "https://cloud.leonardo.ai/api/rest/v1/generations/{generation_id}" + headers: + Authorization: "Bearer &LEONARDO_API_KEY" + response_placeholders: + status: generations_by_pk.status + image_id: generations_by_pk.generated_images.0.id + poll: + until: "COMPLETE" + interval: 10 + + - name: "submit_motion_generation" + method: "POST" + url: "https://cloud.leonardo.ai/api/rest/v1/generations-motion-svd" + headers: + Authorization: "Bearer &LEONARDO_API_KEY" + Content-Type: "application/json" + body: + imageId: "{image_id}" + response_placeholders: + motion_generation_id: motionSvdGenerationJob.generationId + + - name: "poll_motion_generation_status" + method: "GET" + url: "https://cloud.leonardo.ai/api/rest/v1/generations/{motion_generation_id}" + headers: + Authorization: "Bearer &LEONARDO_API_KEY" + response_placeholders: + status: generations_by_pk.status + poll: + until: "COMPLETE" + interval: 10 + content_extraction: + response_path: "generations_by_pk.generated_images.0.motionMP4URL" #should be this once 0 index gets fixed generations_by_pk.generated_images.0.motionMP4URL + response_format: "url" + file_extention: "mp4" + \ No newline at end of file diff --git a/config/additional_models/gif/leonardo/gifgen.yaml b/config/additional_models/gif/leonardo/gifgen.yaml new file mode 100644 index 0000000..229111f --- /dev/null +++ b/config/additional_models/gif/leonardo/gifgen.yaml @@ -0,0 +1,27 @@ +#Paramaters for Leonardos Image Gen API, we need to make an image to then make a gif +#The APIConfig struct assumes that each required and optional paramater has a description +# a (possibly nonexistent) List of options and a (possibly nonexistent) default val. +#For more details see https://docs.leonardo.ai/docs/generate-images-using-leonardo-phoenix-model#sample-output + +requiredParameters: + prompt: + description: "A text description of the desired gif(s)." + options: [] + modelId: + default: "de7d3faf-762f-48e0-b3b7-9d0ac3a3fcf3" + options: ["de7d3faf-762f-48e0-b3b7-9d0ac3a3fcf3"] + description: "The model to use for image generation, currently Leonardo Phoenix 1.0" + contrast: + default: 2.5 + description: "The contrast of the generated gif, If alchemy is true, contrast needs to be 2.5 or higher." + options: [1.0, 1.3, 1.8, 2.5, 3, 3.5, 4, 4.5] +optionalParameters: + alchemy: + default: false + description: "If true, the output will be generated with an alchemy effect." + options: [true, false] + enhancePrompt: + default: true + description: "If true, the prompt will be enhanced." + options: [true, false] + diff --git a/config/additional_models/image/dalle/imageWorkflow.yaml b/config/additional_models/image/dalle/imageWorkflow.yaml new file mode 100644 index 0000000..1f28e88 --- /dev/null +++ b/config/additional_models/image/dalle/imageWorkflow.yaml @@ -0,0 +1,17 @@ +#For more details see https://platform.openai.com/docs/api-reference/images/create + +image: + steps: + - name: "generate_image" + method: "POST" + url: "https://api.openai.com/v1/images/generations" + headers: + Authorization: "Bearer &OPENAI_API_KEY" + Content-Type: "application/json" + body: + intent_detection_step: true + response_key: "data" + content_extraction: + response_path: "data.0.url" + response_format: "url" + file_extention: "png" diff --git a/config/additional_models/image/dalle/imagegen.yaml b/config/additional_models/image/dalle/imagegen.yaml new file mode 100644 index 0000000..a558e3d --- /dev/null +++ b/config/additional_models/image/dalle/imagegen.yaml @@ -0,0 +1,34 @@ +#Paramaters for OpenAI's Image Generation API +#The APIConfig struct assumes that each required and optional paramater has a description +# a (possibly nonexistent) List of options and a (possibly nonexistent) default val. +#For more details see https://platform.openai.com/docs/api-reference/images/create + +requiredParameters: + prompt: + description: "A text description of the desired image(s)." + options: [] +optionalParameters: + model: + default: "dall-e-2" + description: "The model to use for image generation." + options: ["dall-e-2", "dall-e-3"] + n: + default: 1 + description: "The number of images to generate. Must be between 1 and 10. Dall-e 3 Only supports 1 image per call" + options: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10] + quality: + default: "standard" + description: "The quality of the generated image. hd Only supported for dall-e-3." + options: ["standard", "hd"] + response_format: + default: "url" + description: "The format in which the generated images are returned." + options: ["url", "b64_json"] + size: + default: "1024x1024" + description: "The size of the generated images." + options: ["256x256", "512x512", "1024x1024", "1024x1024", "1792x1024", "1024x1792"] + style: + default: "vivid" + description: "The style of the generated images. Only supported for dall-e-3." + options: ["vivid", "natural"] \ No newline at end of file diff --git a/config/additional_models/image/deepai/imageWorkflow.yaml b/config/additional_models/image/deepai/imageWorkflow.yaml new file mode 100644 index 0000000..2cd9566 --- /dev/null +++ b/config/additional_models/image/deepai/imageWorkflow.yaml @@ -0,0 +1,18 @@ +#For more details see https://platform.openai.com/docs/api-reference/images/create + +image: + steps: + - name: "generate_image" + method: "POST" + url: "https://api.deepai.org/api/text2img" + headers: + api-key: "&DEEPAI_API_KEY" + Content-Type: "application/json" + body: + intent_detection_step: true + response_key: "data" + response_placeholders: + response_path: "output_url" + response_format: "url" + file_id_path: "id" + file_extention: "jpg" \ No newline at end of file diff --git a/config/additional_models/image/deepai/imagegen.yaml b/config/additional_models/image/deepai/imagegen.yaml new file mode 100644 index 0000000..0f62ca8 --- /dev/null +++ b/config/additional_models/image/deepai/imagegen.yaml @@ -0,0 +1,4 @@ +requiredParameters: + text: + description: "A text description of the desired image(s)." + options: [] \ No newline at end of file diff --git a/config/additional_models/image/leonardo/imageWorkflow.yaml b/config/additional_models/image/leonardo/imageWorkflow.yaml new file mode 100644 index 0000000..3c97691 --- /dev/null +++ b/config/additional_models/image/leonardo/imageWorkflow.yaml @@ -0,0 +1,28 @@ +image: + steps: + - name: "submit_image_generation" + method: "POST" + url: "https://cloud.leonardo.ai/api/rest/v1/generations" + headers: + Authorization: "Bearer &LEONARDO_API_KEY" + Content-Type: "application/json" + body: + intent_detection_step: true + response_placeholders: + generation_id: sdGenerationJob.generationId + + - name: "poll_image_generation_status" + method: "GET" + url: "https://cloud.leonardo.ai/api/rest/v1/generations/{generation_id}" + headers: + Authorization: "Bearer &LEONARDO_API_KEY" + response_placeholders: + status: generations_by_pk.status + image_id: generations_by_pk.generated_images.id + response_format: "url" + file_id_path: "generations_by_pk.generated_images.id" + file_extention: "jpg" + response_path: "generations_by_pk.generated_images.url" + poll: + until: "COMPLETE" + interval: 10 \ No newline at end of file diff --git a/config/additional_models/image/leonardo/imagegen.yaml b/config/additional_models/image/leonardo/imagegen.yaml new file mode 100644 index 0000000..229111f --- /dev/null +++ b/config/additional_models/image/leonardo/imagegen.yaml @@ -0,0 +1,27 @@ +#Paramaters for Leonardos Image Gen API, we need to make an image to then make a gif +#The APIConfig struct assumes that each required and optional paramater has a description +# a (possibly nonexistent) List of options and a (possibly nonexistent) default val. +#For more details see https://docs.leonardo.ai/docs/generate-images-using-leonardo-phoenix-model#sample-output + +requiredParameters: + prompt: + description: "A text description of the desired gif(s)." + options: [] + modelId: + default: "de7d3faf-762f-48e0-b3b7-9d0ac3a3fcf3" + options: ["de7d3faf-762f-48e0-b3b7-9d0ac3a3fcf3"] + description: "The model to use for image generation, currently Leonardo Phoenix 1.0" + contrast: + default: 2.5 + description: "The contrast of the generated gif, If alchemy is true, contrast needs to be 2.5 or higher." + options: [1.0, 1.3, 1.8, 2.5, 3, 3.5, 4, 4.5] +optionalParameters: + alchemy: + default: false + description: "If true, the output will be generated with an alchemy effect." + options: [true, false] + enhancePrompt: + default: true + description: "If true, the prompt will be enhanced." + options: [true, false] + diff --git a/config/additional_models/intent_detection/gpt/intentdetection.yaml b/config/additional_models/intent_detection/gpt/intentdetection.yaml new file mode 100644 index 0000000..ba2094b --- /dev/null +++ b/config/additional_models/intent_detection/gpt/intentdetection.yaml @@ -0,0 +1,67 @@ +endpoint: "https://api.openai.com/v1/chat/completions" +method: "POST" +headers: + Authorization: "Bearer &OPENAI_API_KEY" + Content-Type: "application/json" +payload: + model: "gpt-4o" + messages: + - role: "system" + content: "%initialPrompt" + - role: "user" + content: "%userPrompt" +initialPrompt: | + "From now on, respond strictly with raw JSON—no explanations, no formatting, and no extra characters. + + + 1. **Determine Content Type**: + - Read the YAML files provided and determine what type of content the user wants to generate. + - If the request relates to generating a **3D model**, set `"ContentType": "model"`. + - If the request is for an **image**, set `"ContentType": "image"`. + - If the request is for a **video**, set `"ContentType": "video"`. + - If the request is for a **GIF**, set `"ContentType": "gif"`. + - If no valid content type is detected, return `"ContentType": "none"`. + + + 2. **Extract Required Parameters (Ensure All Are Included)**: + - Every required parameter from the YAML file **must always be present** in `"requiredParameters"`, even if the user does not specify it. + - If a required parameter has a default value in YAML, **always include it in the response.** + - If `"prompt"` is missing from user input, use `"prompt": "A detailed 3D model."` as a fallback default. + - If a required parameter does not have a default and is missing, include it but leave it blank. + + + 3. **Extract Optional Parameters**: + - Populate `"optionalParameters"` only with values that enhance the request. + - If an optional parameter has a default value, **include it only if it improves the request.** + - Use values only from the `"options"` list in YAML. + + + 4. **Ensure Proper Formatting of Required Parameters**: + - `"requiredParameters"` must contain **every field listed in the YAML file** under `requiredParameters`. + - `"mode"` must always be `"preview"` since it has a default value. + - `"prompt"` must always be included and should never be empty. + + + 5. **Exclude Unnecessary Fields**: + - Do not include `endpoint`, `method`, `headers`, `description`, `default`, or `options`. + + + 6. **Output Requirements**: + - Respond with a valid **JSON object**, not a string. + - Do NOT escape quotation marks or return a stringified JSON object. + - Output must always include `"ContentType"`, `"requiredParameters"`, and `"optionalParameters"`. + + + 7. **Fallback Response**: + - If no valid content type or parameters are detected, return: + { + "ContentType": "none", + "requiredParameters": { + "prompt": "none" + }, + "optionalParameters": {} + } + + + Await my next input and respond strictly with raw JSON—no formatting, no markdown, no additional text." +responsePath: "choices.0.message.content" \ No newline at end of file diff --git a/config/additional_models/model/meshy/3dWorkflow.yaml b/config/additional_models/model/meshy/3dWorkflow.yaml new file mode 100644 index 0000000..0099e49 --- /dev/null +++ b/config/additional_models/model/meshy/3dWorkflow.yaml @@ -0,0 +1,57 @@ +model: + steps: + - name: "submit_preview_task" + method: "POST" + url: "https://api.meshy.ai/openapi/v2/text-to-3d" + headers: + Authorization: "Bearer &MESHY_API_KEY" + Content-Type: "application/json" + body: + intent_detection_step: true + response_placeholders: + preview_task_id: result # This is the only response we get from the API + + - name: "poll_preview_status" + method: "GET" + url: "https://api.meshy.ai/openapi/v2/text-to-3d/{preview_task_id}" + headers: + Authorization: "Bearer &MESHY_API_KEY" + response_placeholders: + status: status + poll: + until: "SUCCEEDED" + interval: 5 + + - name: "submit_refine_task" + method: "POST" + url: "https://api.meshy.ai/openapi/v2/text-to-3d" + headers: + Authorization: "Bearer &MESHY_API_KEY" + Content-Type: "application/json" + body: + mode: "refine" + preview_task_id: "{preview_task_id}" + response_placeholders: + refine_task_id: result # This is the new task ID we need + + + - name: "poll_refine_status" + method: "GET" + url: "https://api.meshy.ai/openapi/v2/text-to-3d/{refine_task_id}" + headers: + Authorization: "Bearer &MESHY_API_KEY" + response_placeholders: + status: status + poll: + until: "SUCCEEDED" + interval: 5 + + - name: "fetch_final_model" + method: "GET" + url: "https://api.meshy.ai/openapi/v2/text-to-3d/{refine_task_id}" + headers: + Authorization: "Bearer &MESHY_API_KEY" + content_extraction: + response_path: "model_urls.glb" + response_format: "url" + file_extention: "glb" diff --git a/config/additional_models/model/meshy/3dgen.yaml b/config/additional_models/model/meshy/3dgen.yaml new file mode 100644 index 0000000..0f851cd --- /dev/null +++ b/config/additional_models/model/meshy/3dgen.yaml @@ -0,0 +1,25 @@ +#Paramaters for OpenAI's Image Generation API +#The APIConfig struct assumes that each required and optional paramater has a description +# a (possibly nonexistent) List of options and a (possibly nonexistent) default val. +#For more details see https://platform.openai.com/docs/api-reference/images/create + +requiredParameters: + mode: + default: "preview" + options: ["preview"] + prompt: + description: "A textual description of the desired 3D model." + options: [] +optionalParameters: + art_style: + description: "Defines the artistic style of the generated model." + options: ["realistic", "sculpture"] + topology: + default: "triangle" + description: "Specifies the topology of the generated model." + options: ["quad", "triangle"] + symmetry_mode: + default: "auto" + description: "Controls symmetry behavior during model generation." + options: ["off", "auto", "on"] + diff --git a/config/additional_models/video/luma/videoWorkflow.yaml b/config/additional_models/video/luma/videoWorkflow.yaml new file mode 100644 index 0000000..42e70dd --- /dev/null +++ b/config/additional_models/video/luma/videoWorkflow.yaml @@ -0,0 +1,28 @@ +video: + steps: + - name: "submit_text_to_video_generation" + method: "POST" + url: "https://api.lumalabs.ai/dream-machine/v1/generations" + headers: + Authorization: "Bearer &LUMA_API_KEY" + Content-Type: "application/json" + body: + intent_detection_step: true + response_placeholders: + generation_id: id + + - name: "poll_video_generation_status" + method: "GET" + url: "https://api.lumalabs.ai/dream-machine/v1/generations/{generation_id}" + headers: + Authorization: "Bearer &LUMA_API_KEY" + response_placeholders: + status: state + poll: + until: "completed" + interval: 10 + content_extraction: + response_path: "assets.video" + response_format: "url" + file_extention: "mp4" + diff --git a/config/additional_models/video/luma/videogen.yaml b/config/additional_models/video/luma/videogen.yaml new file mode 100644 index 0000000..ffd2da0 --- /dev/null +++ b/config/additional_models/video/luma/videogen.yaml @@ -0,0 +1,51 @@ +# endpoint: "https://api.lumalabs.ai/dream-machine/v1/generations" + +requiredParameters: + prompt: + description: "A text description of the desired video." + options: [] + model: + default: "ray-2" + resolution: + default: "540p" + duration: + default: "5s" + generation_type: + default: "video" +# optionalParameters: +# width: +# default: 1024 +# description: "Width of the generated video in pixels." +# options: [512, 768, 1024] +# height: +# default: 576 +# description: "Height of the generated video in pixels." +# options: [512, 576, 720] +# duration: +# default: 4 +# description: "Duration of the generated video in seconds." +# options: [1, 2, 3, 4, 5, 6] +# motion: +# default: "cinematic" +# description: "The style of camera motion applied to the video." +# options: ["cinematic", "steady", "handheld"] +# model: +# default: "gen-1" +# description: "The model to use for video generation." +# options: ["gen-1", "gen-2"] +# aspect_ratio: +# default: "16:9" +# description: "Aspect ratio of the generated video." +# options: ["16:9", "9:16", "1:1"] +# fps: +# default: 24 +# description: "Frames per second of the generated video." +# options: [12, 24, 30] +# seed: +# default: null +# description: "Seed for randomization; leave null for random results." +# options: [] +# response_format: +# default: "url" +# description: "The format in which the generated video is returned." +# options: ["url"] diff --git a/config/contentgen_workflows/3dWorkflow.yaml b/config/contentgen_workflows/3dWorkflow.yaml index fc26b69..0099e49 100644 --- a/config/contentgen_workflows/3dWorkflow.yaml +++ b/config/contentgen_workflows/3dWorkflow.yaml @@ -4,7 +4,7 @@ model: method: "POST" url: "https://api.meshy.ai/openapi/v2/text-to-3d" headers: - Authorization: "Bearer &THREED_MODEL_API_KEY" + Authorization: "Bearer &MESHY_API_KEY" Content-Type: "application/json" body: intent_detection_step: true @@ -15,7 +15,7 @@ model: method: "GET" url: "https://api.meshy.ai/openapi/v2/text-to-3d/{preview_task_id}" headers: - Authorization: "Bearer &THREED_MODEL_API_KEY" + Authorization: "Bearer &MESHY_API_KEY" response_placeholders: status: status poll: @@ -26,7 +26,7 @@ model: method: "POST" url: "https://api.meshy.ai/openapi/v2/text-to-3d" headers: - Authorization: "Bearer &THREED_MODEL_API_KEY" + Authorization: "Bearer &MESHY_API_KEY" Content-Type: "application/json" body: mode: "refine" @@ -39,7 +39,7 @@ model: method: "GET" url: "https://api.meshy.ai/openapi/v2/text-to-3d/{refine_task_id}" headers: - Authorization: "Bearer &THREED_MODEL_API_KEY" + Authorization: "Bearer &MESHY_API_KEY" response_placeholders: status: status poll: @@ -50,7 +50,7 @@ model: method: "GET" url: "https://api.meshy.ai/openapi/v2/text-to-3d/{refine_task_id}" headers: - Authorization: "Bearer &THREED_MODEL_API_KEY" + Authorization: "Bearer &MESHY_API_KEY" content_extraction: response_path: "model_urls.glb" response_format: "url" diff --git a/config/contentgen_workflows/gifWorkflow.yaml b/config/contentgen_workflows/gifWorkflow.yaml index 4b64015..bd9e00f 100644 --- a/config/contentgen_workflows/gifWorkflow.yaml +++ b/config/contentgen_workflows/gifWorkflow.yaml @@ -4,7 +4,7 @@ gif: method: "POST" url: "https://cloud.leonardo.ai/api/rest/v1/generations" headers: - Authorization: "Bearer &GIF_API_KEY" + Authorization: "Bearer &LEONARDO_API_KEY" Content-Type: "application/json" body: intent_detection_step: true @@ -15,7 +15,7 @@ gif: method: "GET" url: "https://cloud.leonardo.ai/api/rest/v1/generations/{generation_id}" headers: - Authorization: "Bearer &GIF_API_KEY" + Authorization: "Bearer &LEONARDO_API_KEY" response_placeholders: status: generations_by_pk.status image_id: generations_by_pk.generated_images.0.id @@ -27,7 +27,7 @@ gif: method: "POST" url: "https://cloud.leonardo.ai/api/rest/v1/generations-motion-svd" headers: - Authorization: "Bearer &GIF_API_KEY" + Authorization: "Bearer &LEONARDO_API_KEY" Content-Type: "application/json" body: imageId: "{image_id}" @@ -38,7 +38,7 @@ gif: method: "GET" url: "https://cloud.leonardo.ai/api/rest/v1/generations/{motion_generation_id}" headers: - Authorization: "Bearer &GIF_API_KEY" + Authorization: "Bearer &LEONARDO_API_KEY" response_placeholders: status: generations_by_pk.status poll: diff --git a/config/contentgen_workflows/imageWorkflow.yaml b/config/contentgen_workflows/imageWorkflow.yaml index c348e18..1f28e88 100644 --- a/config/contentgen_workflows/imageWorkflow.yaml +++ b/config/contentgen_workflows/imageWorkflow.yaml @@ -6,7 +6,7 @@ image: method: "POST" url: "https://api.openai.com/v1/images/generations" headers: - Authorization: "Bearer &IMAGE_API_KEY" + Authorization: "Bearer &OPENAI_API_KEY" Content-Type: "application/json" body: intent_detection_step: true diff --git a/config/contentgen_workflows/videoWorkflow.yaml b/config/contentgen_workflows/videoWorkflow.yaml index e5bf359..42e70dd 100644 --- a/config/contentgen_workflows/videoWorkflow.yaml +++ b/config/contentgen_workflows/videoWorkflow.yaml @@ -4,7 +4,7 @@ video: method: "POST" url: "https://api.lumalabs.ai/dream-machine/v1/generations" headers: - Authorization: "Bearer &VIDEO_API_KEY" + Authorization: "Bearer &LUMA_API_KEY" Content-Type: "application/json" body: intent_detection_step: true @@ -15,7 +15,7 @@ video: method: "GET" url: "https://api.lumalabs.ai/dream-machine/v1/generations/{generation_id}" headers: - Authorization: "Bearer &VIDEO_API_KEY" + Authorization: "Bearer &LUMA_API_KEY" response_placeholders: status: state poll: diff --git a/config/intentdetection.yaml b/config/intentdetection.yaml index dfa5bd2..ba2094b 100644 --- a/config/intentdetection.yaml +++ b/config/intentdetection.yaml @@ -1,7 +1,7 @@ endpoint: "https://api.openai.com/v1/chat/completions" method: "POST" headers: - Authorization: "Bearer &INTENT_DETECTION_API_KEY" + Authorization: "Bearer &OPENAI_API_KEY" Content-Type: "application/json" payload: model: "gpt-4o" From 34796ef367bbeb29a91781b89ec40584be521d26 Mon Sep 17 00:00:00 2001 From: Jacob Stella Date: Sun, 6 Apr 2025 13:31:26 -0400 Subject: [PATCH 5/7] updated leo and deep ai workflows to add content extraction features --- .../additional_models/image/deepai/imageWorkflow.yaml | 3 +-- .../image/leonardo/imageWorkflow.yaml | 11 +++++------ 2 files changed, 6 insertions(+), 8 deletions(-) diff --git a/config/additional_models/image/deepai/imageWorkflow.yaml b/config/additional_models/image/deepai/imageWorkflow.yaml index 2cd9566..bf5e2c1 100644 --- a/config/additional_models/image/deepai/imageWorkflow.yaml +++ b/config/additional_models/image/deepai/imageWorkflow.yaml @@ -11,8 +11,7 @@ image: body: intent_detection_step: true response_key: "data" - response_placeholders: + content_extraction: response_path: "output_url" response_format: "url" - file_id_path: "id" file_extention: "jpg" \ No newline at end of file diff --git a/config/additional_models/image/leonardo/imageWorkflow.yaml b/config/additional_models/image/leonardo/imageWorkflow.yaml index 3c97691..90d3e15 100644 --- a/config/additional_models/image/leonardo/imageWorkflow.yaml +++ b/config/additional_models/image/leonardo/imageWorkflow.yaml @@ -18,11 +18,10 @@ image: Authorization: "Bearer &LEONARDO_API_KEY" response_placeholders: status: generations_by_pk.status - image_id: generations_by_pk.generated_images.id - response_format: "url" - file_id_path: "generations_by_pk.generated_images.id" - file_extention: "jpg" - response_path: "generations_by_pk.generated_images.url" poll: until: "COMPLETE" - interval: 10 \ No newline at end of file + interval: 10 + content_extraction: + response_path: "generations_by_pk.generated_images.0.url" + response_format: "url" + file_extention: "jpg" \ No newline at end of file From 5a8e9ef508a346726cd9ad2ff15fc47957d6d7d8 Mon Sep 17 00:00:00 2001 From: Jacob Stella Date: Sun, 6 Apr 2025 13:58:11 -0400 Subject: [PATCH 6/7] fixing keyword, now its hey hollow table --- config/speechtotext.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/config/speechtotext.yaml b/config/speechtotext.yaml index 414d70b..c34fcbe 100644 --- a/config/speechtotext.yaml +++ b/config/speechtotext.yaml @@ -1,2 +1,2 @@ VoskURL: ws://localhost:2700 -keyword: "hey holo table" +keyword: "hey hollow table" From adb40b79beb8f1b0fc93920dbadfc2aa1ffb7f8b Mon Sep 17 00:00:00 2001 From: Jacob Stella Date: Mon, 7 Apr 2025 11:35:38 -0400 Subject: [PATCH 7/7] improved id propmt --- config/general.yaml | 2 +- config/intentdetection.yaml | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/config/general.yaml b/config/general.yaml index e68b2ca..f0144de 100644 --- a/config/general.yaml +++ b/config/general.yaml @@ -1,5 +1,5 @@ dataDir: "./datastorage/" -openWebsocket: false +openWebsocket: true #log lvl order, from lowest to highest #Trace → Debug → Info → Warn → Error → Fatal → Panic log_level: "Trace" diff --git a/config/intentdetection.yaml b/config/intentdetection.yaml index ba2094b..93ccb0b 100644 --- a/config/intentdetection.yaml +++ b/config/intentdetection.yaml @@ -25,9 +25,9 @@ initialPrompt: | 2. **Extract Required Parameters (Ensure All Are Included)**: - Every required parameter from the YAML file **must always be present** in `"requiredParameters"`, even if the user does not specify it. - - If a required parameter has a default value in YAML, **always include it in the response.** - - If `"prompt"` is missing from user input, use `"prompt": "A detailed 3D model."` as a fallback default. + - If a required parameter has a default value in YAML, **always include it in the response.** - If a required parameter does not have a default and is missing, include it but leave it blank. + - Ensure to always extract the prompt from the users input and include it in the relevant field. Note that the prompt should never be empty. 3. **Extract Optional Parameters**: