METIL-HoloAI · Jacob-Stella · Apr 7, 2025 · Apr 5, 2025 · Apr 5, 2025 · Apr 5, 2025
diff --git a/.env.example b/.env.example
@@ -1,6 +1,7 @@
 # API Keys for AI models
-INTENT_DETECTION_API_KEY: FILL_ME_IN
-VIDEO_API_KEY: FILL_ME_IN
-IMAGE_API_KEY: FILL_ME_IN
-THREED_MODEL_API_KEY: FILL_ME_IN
-GIF_API_KEY: FILL_ME_IN
+# As long as the api key name matches what it is in the workflow, you can name it whatever you want
+OPENAI_API_KEY: FILL_ME_IN
+LUMA_API_KEY: FILL_ME_IN
+MESHY_API_KEY: FILL_ME_IN
+LEONARDO_API_KEY: FILL_ME_IN
+DEEPAI_API_KEY: FILL_ME_IN
diff --git a/config/additional_models/gif/leonardo/gifWorkflow.yaml b/config/additional_models/gif/leonardo/gifWorkflow.yaml
@@ -0,0 +1,51 @@
+gif:
+  steps:
+    - name: "submit_image_generation"
+      method: "POST"
+      url: "https://cloud.leonardo.ai/api/rest/v1/generations"
+      headers:
+        Authorization: "Bearer &LEONARDO_API_KEY"
+        Content-Type: "application/json"
+      body:
+        intent_detection_step: true
+      response_placeholders:
+        generation_id: sdGenerationJob.generationId
+
+    - name: "poll_image_generation_status"
+      method: "GET"
+      url: "https://cloud.leonardo.ai/api/rest/v1/generations/{generation_id}"
+      headers:
+        Authorization: "Bearer &LEONARDO_API_KEY"
+      response_placeholders:
+        status: generations_by_pk.status
+        image_id: generations_by_pk.generated_images.0.id
+      poll:
+        until: "COMPLETE"
+        interval: 10
+
+    - name: "submit_motion_generation"
+      method: "POST"
+      url: "https://cloud.leonardo.ai/api/rest/v1/generations-motion-svd"
+      headers:
+        Authorization: "Bearer &LEONARDO_API_KEY"
+        Content-Type: "application/json"
+      body:
+        imageId: "{image_id}"
+      response_placeholders:
+        motion_generation_id: motionSvdGenerationJob.generationId
+
+    - name: "poll_motion_generation_status"
+      method: "GET"
+      url: "https://cloud.leonardo.ai/api/rest/v1/generations/{motion_generation_id}"
+      headers:
+        Authorization: "Bearer &LEONARDO_API_KEY"
+      response_placeholders:
+        status: generations_by_pk.status
+      poll:
+        until: "COMPLETE"
+        interval: 10
+      content_extraction:
+        response_path: "generations_by_pk.generated_images.0.motionMP4URL" #should be this once 0 index gets fixed generations_by_pk.generated_images.0.motionMP4URL
+        response_format: "url"
+        file_extention: "mp4" 
+
diff --git a/config/additional_models/gif/leonardo/gifgen.yaml b/config/additional_models/gif/leonardo/gifgen.yaml
@@ -0,0 +1,27 @@
+#Paramaters for Leonardos Image Gen API, we need to make an image to then make a gif
+#The APIConfig struct assumes that each required and optional paramater has a description
+# a (possibly nonexistent) List of options and a (possibly nonexistent) default val.
+#For more details see https://docs.leonardo.ai/docs/generate-images-using-leonardo-phoenix-model#sample-output
+
+requiredParameters:
+  prompt:
+    description: "A text description of the desired gif(s)."
+    options: [] 
+  modelId:
+    default: "de7d3faf-762f-48e0-b3b7-9d0ac3a3fcf3"
+    options: ["de7d3faf-762f-48e0-b3b7-9d0ac3a3fcf3"]
+    description: "The model to use for image generation, currently Leonardo Phoenix 1.0"
+  contrast:
+    default: 2.5
+    description: "The contrast of the generated gif, If alchemy is true, contrast needs to be 2.5 or higher."
+    options: [1.0, 1.3, 1.8, 2.5, 3, 3.5, 4, 4.5]
+optionalParameters:
+  alchemy:
+    default: false
+    description: "If true, the output will be generated with an alchemy effect."
+    options: [true, false]
+  enhancePrompt:
+    default: true
+    description: "If true, the prompt will be enhanced."
+    options: [true, false]
+
diff --git a/config/additional_models/image/dalle/imageWorkflow.yaml b/config/additional_models/image/dalle/imageWorkflow.yaml
@@ -0,0 +1,17 @@
+#For more details see https://platform.openai.com/docs/api-reference/images/create
+
+image:
+  steps:
+    - name: "generate_image"
+      method: "POST"
+      url: "https://api.openai.com/v1/images/generations"
+      headers:
+        Authorization: "Bearer &OPENAI_API_KEY"
+        Content-Type: "application/json"
+      body:
+        intent_detection_step: true
+      response_key: "data"
+      content_extraction:
+        response_path: "data.0.url"
+        response_format: "url"
+        file_extention: "png"
diff --git a/config/additional_models/image/dalle/imagegen.yaml b/config/additional_models/image/dalle/imagegen.yaml
@@ -0,0 +1,34 @@
+#Paramaters for OpenAI's Image Generation API
+#The APIConfig struct assumes that each required and optional paramater has a description
+# a (possibly nonexistent) List of options and a (possibly nonexistent) default val.
+#For more details see https://platform.openai.com/docs/api-reference/images/create
+
+requiredParameters:
+  prompt:
+    description: "A text description of the desired image(s)."
+    options: [] 
+optionalParameters:
+  model:
+    default: "dall-e-2"
+    description: "The model to use for image generation."
+    options: ["dall-e-2", "dall-e-3"]
+  n:
+    default: 1
+    description: "The number of images to generate. Must be between 1 and 10. Dall-e 3 Only supports 1 image per call"
+    options: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
+  quality:
+    default: "standard"
+    description: "The quality of the generated image. hd Only supported for dall-e-3."
+    options: ["standard", "hd"]
+  response_format:
+    default: "url"
+    description: "The format in which the generated images are returned."
+    options: ["url", "b64_json"]
+  size:
+    default: "1024x1024"
+    description: "The size of the generated images."
+    options: ["256x256", "512x512", "1024x1024", "1024x1024", "1792x1024", "1024x1792"]
+  style:
+    default: "vivid"
+    description: "The style of the generated images. Only supported for dall-e-3."
+    options: ["vivid", "natural"]
diff --git a/config/additional_models/image/deepai/imageWorkflow.yaml b/config/additional_models/image/deepai/imageWorkflow.yaml
@@ -0,0 +1,17 @@
+#For more details see https://platform.openai.com/docs/api-reference/images/create
+
+image:
+  steps:
+    - name: "generate_image"
+      method: "POST"
+      url: "https://api.deepai.org/api/text2img"
+      headers:
+        api-key: "&DEEPAI_API_KEY"
+        Content-Type: "application/json"
+      body:
+        intent_detection_step: true
+      response_key: "data"
+      content_extraction:
+        response_path: "output_url"
+        response_format: "url"
+        file_extention: "jpg"
diff --git a/config/additional_models/image/deepai/imagegen.yaml b/config/additional_models/image/deepai/imagegen.yaml
@@ -0,0 +1,4 @@
+requiredParameters:
+  text:
+    description: "A text description of the desired image(s)."
+    options: [] 
diff --git a/config/additional_models/image/leonardo/imageWorkflow.yaml b/config/additional_models/image/leonardo/imageWorkflow.yaml
@@ -0,0 +1,27 @@
+image:
+  steps:
+    - name: "submit_image_generation"
+      method: "POST"
+      url: "https://cloud.leonardo.ai/api/rest/v1/generations"
+      headers:
+        Authorization: "Bearer &LEONARDO_API_KEY"
+        Content-Type: "application/json"
+      body:
+        intent_detection_step: true
+      response_placeholders:
+        generation_id: sdGenerationJob.generationId
+
+    - name: "poll_image_generation_status"
+      method: "GET"
+      url: "https://cloud.leonardo.ai/api/rest/v1/generations/{generation_id}"
+      headers:
+        Authorization: "Bearer &LEONARDO_API_KEY"
+      response_placeholders:
+        status: generations_by_pk.status
+      poll:
+        until: "COMPLETE"
+        interval: 10
+      content_extraction:
+        response_path: "generations_by_pk.generated_images.0.url" 
+        response_format: "url"
+        file_extention: "jpg"
diff --git a/config/additional_models/image/leonardo/imagegen.yaml b/config/additional_models/image/leonardo/imagegen.yaml
@@ -0,0 +1,27 @@
+#Paramaters for Leonardos Image Gen API, we need to make an image to then make a gif
+#The APIConfig struct assumes that each required and optional paramater has a description
+# a (possibly nonexistent) List of options and a (possibly nonexistent) default val.
+#For more details see https://docs.leonardo.ai/docs/generate-images-using-leonardo-phoenix-model#sample-output
+
+requiredParameters:
+  prompt:
+    description: "A text description of the desired gif(s)."
+    options: [] 
+  modelId:
+    default: "de7d3faf-762f-48e0-b3b7-9d0ac3a3fcf3"
+    options: ["de7d3faf-762f-48e0-b3b7-9d0ac3a3fcf3"]
+    description: "The model to use for image generation, currently Leonardo Phoenix 1.0"
+  contrast:
+    default: 2.5
+    description: "The contrast of the generated gif, If alchemy is true, contrast needs to be 2.5 or higher."
+    options: [1.0, 1.3, 1.8, 2.5, 3, 3.5, 4, 4.5]
+optionalParameters:
+  alchemy:
+    default: false
+    description: "If true, the output will be generated with an alchemy effect."
+    options: [true, false]
+  enhancePrompt:
+    default: true
+    description: "If true, the prompt will be enhanced."
+    options: [true, false]
+
diff --git a/config/additional_models/intent_detection/gpt/intentdetection.yaml b/config/additional_models/intent_detection/gpt/intentdetection.yaml
@@ -0,0 +1,67 @@
+endpoint: "https://api.openai.com/v1/chat/completions"
+method: "POST"
+headers:
+  Authorization: "Bearer &OPENAI_API_KEY"
+  Content-Type: "application/json"
+payload:
+  model: "gpt-4o"
+  messages:
+    - role: "system"
+      content: "%initialPrompt"
+    - role: "user"
+      content: "%userPrompt"
+initialPrompt: |
+  "From now on, respond strictly with raw JSON—no explanations, no formatting, and no extra characters.
+
+
+  1. **Determine Content Type**:  
+     - Read the YAML files provided and determine what type of content the user wants to generate.
+     - If the request relates to generating a **3D model**, set `"ContentType": "model"`.  
+     - If the request is for an **image**, set `"ContentType": "image"`.  
+     - If the request is for a **video**, set `"ContentType": "video"`.  
+     - If the request is for a **GIF**, set `"ContentType": "gif"`.  
+     - If no valid content type is detected, return `"ContentType": "none"`.
+
+
+  2. **Extract Required Parameters (Ensure All Are Included)**:  
+     - Every required parameter from the YAML file **must always be present** in `"requiredParameters"`, even if the user does not specify it.  
+     - If a required parameter has a default value in YAML, **always include it in the response.**  
+     - If `"prompt"` is missing from user input, use `"prompt": "A detailed 3D model."` as a fallback default.  
+     - If a required parameter does not have a default and is missing, include it but leave it blank.
+
+
+  3. **Extract Optional Parameters**:  
+     - Populate `"optionalParameters"` only with values that enhance the request.  
+     - If an optional parameter has a default value, **include it only if it improves the request.**  
+     - Use values only from the `"options"` list in YAML.  
+
+
+  4. **Ensure Proper Formatting of Required Parameters**:  
+     - `"requiredParameters"` must contain **every field listed in the YAML file** under `requiredParameters`.  
+     - `"mode"` must always be `"preview"` since it has a default value.  
+     - `"prompt"` must always be included and should never be empty.
+
+
+  5. **Exclude Unnecessary Fields**:  
+     - Do not include `endpoint`, `method`, `headers`, `description`, `default`, or `options`.
+
+
+  6. **Output Requirements**:  
+     - Respond with a valid **JSON object**, not a string.  
+     - Do NOT escape quotation marks or return a stringified JSON object.  
+     - Output must always include `"ContentType"`, `"requiredParameters"`, and `"optionalParameters"`.  
+
+
+  7. **Fallback Response**:  
+     - If no valid content type or parameters are detected, return:  
+       {
+         "ContentType": "none",
+         "requiredParameters": {
+           "prompt": "none"
+         },
+         "optionalParameters": {}
+       }
+
+
+  Await my next input and respond strictly with raw JSON—no formatting, no markdown, no additional text."
+responsePath: "choices.0.message.content"
diff --git a/config/additional_models/model/meshy/3dWorkflow.yaml b/config/additional_models/model/meshy/3dWorkflow.yaml
@@ -0,0 +1,57 @@
+model:
+  steps:
+    - name: "submit_preview_task"
+      method: "POST"
+      url: "https://api.meshy.ai/openapi/v2/text-to-3d"
+      headers:
+        Authorization: "Bearer &MESHY_API_KEY"
+        Content-Type: "application/json"
+      body:
+        intent_detection_step: true
+      response_placeholders:
+        preview_task_id: result  # This is the only response we get from the API
+
+    - name: "poll_preview_status"
+      method: "GET"
+      url: "https://api.meshy.ai/openapi/v2/text-to-3d/{preview_task_id}"
+      headers:
+        Authorization: "Bearer &MESHY_API_KEY"
+      response_placeholders:
+        status: status
+      poll:
+        until: "SUCCEEDED"
+        interval: 5
+
+    - name: "submit_refine_task"
+      method: "POST"
+      url: "https://api.meshy.ai/openapi/v2/text-to-3d"
+      headers:
+        Authorization: "Bearer &MESHY_API_KEY"
+        Content-Type: "application/json"
+      body:
+        mode: "refine"
+        preview_task_id: "{preview_task_id}"
+      response_placeholders:
+        refine_task_id: result  # This is the new task ID we need
+
+
+    - name: "poll_refine_status"
+      method: "GET"
+      url: "https://api.meshy.ai/openapi/v2/text-to-3d/{refine_task_id}"
+      headers:
+        Authorization: "Bearer &MESHY_API_KEY"
+      response_placeholders:
+        status: status
+      poll:
+        until: "SUCCEEDED"
+        interval: 5
+
+    - name: "fetch_final_model"
+      method: "GET"
+      url: "https://api.meshy.ai/openapi/v2/text-to-3d/{refine_task_id}"
+      headers:
+        Authorization: "Bearer &MESHY_API_KEY"
+      content_extraction:
+        response_path: "model_urls.glb"
+        response_format: "url"
+        file_extention: "glb"
diff --git a/config/additional_models/model/meshy/3dgen.yaml b/config/additional_models/model/meshy/3dgen.yaml
@@ -0,0 +1,25 @@
+#Paramaters for OpenAI's Image Generation API
+#The APIConfig struct assumes that each required and optional paramater has a description
+# a (possibly nonexistent) List of options and a (possibly nonexistent) default val.
+#For more details see https://platform.openai.com/docs/api-reference/images/create
+
+requiredParameters:
+  mode:
+    default: "preview"
+    options: ["preview"]
+  prompt:
+    description: "A textual description of the desired 3D model."
+    options: []
+optionalParameters:
+  art_style:
+    description: "Defines the artistic style of the generated model."
+    options: ["realistic", "sculpture"]
+  topology:
+    default: "triangle"
+    description: "Specifies the topology of the generated model."
+    options: ["quad", "triangle"]
+  symmetry_mode:
+    default: "auto"
+    description: "Controls symmetry behavior during model generation."
+    options: ["off", "auto", "on"]
+