From d66fc58265fea0b272abc2b3e4e4ebbaef57cf7d Mon Sep 17 00:00:00 2001 From: Daryl Date: Mon, 31 Mar 2025 16:28:10 -0400 Subject: [PATCH 1/3] refactor: remove file ID path from content extraction parameters and update related functions --- .DS_Store | Bin 8196 -> 6148 bytes config/contentgen_workflows/3dWorkflow.yaml | 1 - config/contentgen_workflows/gifWorkflow.yaml | 1 - .../contentgen_workflows/imageWorkflow.yaml | 1 - .../contentgen_workflows/videoWorkflow.yaml | 1 - go.mod | 2 ++ go.sum | 2 ++ internal/callers/contentextraction.go | 24 ++++++------------ internal/callers/contentgen.go | 12 ++++----- internal/callers/contentstorage.go | 9 ++++--- 10 files changed, 23 insertions(+), 30 deletions(-) diff --git a/.DS_Store b/.DS_Store index c2242d30b125ba27273a6f78463f603eb5506491..e4cb5b2d7b3f76f6c385dba8bbb65581e3e893d2 100644 GIT binary patch delta 127 zcmZp1XfcprU|?W$DortDU=RQ@Ie-{MGjUEV6q~50$jJth2Z?Qr+RVtvHrYfVbn;8V zh{;QZk~WsTVOh-1!6C>DR09M8+(5z=q<3TCcjn3bGL9g_LAqH$G!w*hu-N8!o;l0_ DR*n=C literal 8196 zcmeHMO=}ZD7=9;TZU}`O6mk(3yvCX|rC54dV|wu5r4c=-#B4WB7dJa4pGXMgtbaxD z>M!xXc+%&cnU>v6au7i+&I|KCvop^-AJ0s8GFu`N?OD(ysu7Wc#8|DsZ85Irv|%|@ zQUwac)3ZpP`;qkV%w^WB0#*U5fK|XMU={fH6u_Cy=Il84b*=SQ0jt1&sQ|wpTqH)% z;98@4b-<}B0I+~!Wyq_a{y>)tfS$p%MwGyW4h8B^VXhd$+#G}sN4;nGYmGXbgz6dX zn4X2Xp$K#F5U!$=@HAR)6|f3yD!_j(1=^;X~f%&+7DhemlM&p~v1@*Ok6-iC3n?T2I^XwShJ!_~diSY}r~NPu#&rT-Dv>@j;{R9=48FtFm+N!K0J2-uq}2%a2M& z66PgFd;0mte?o4@?B?BZ9LabH21n$~g-JF9ir_|09Gn0sbtK~`hD z&s-!%&)`}k7jd~dg@N|`XF;r41#Xc7c|Fz|-~Uf{fB(NlGqOXq3RnexRRK}xbUSTC zsZ)JRG@tS=(mNzJ=B+iV5V+J20X*OQ!w`8FRF3fst~H_rW Date: Wed, 2 Apr 2025 16:10:28 -0400 Subject: [PATCH 2/3] fix: update response paths in workflows and disable websocket connection --- config/contentgen_workflows/gifWorkflow.yaml | 2 +- .../contentgen_workflows/imageWorkflow.yaml | 2 +- config/general.yaml | 2 +- internal/callers/contentextraction.go | 204 ++++++++---------- 4 files changed, 87 insertions(+), 123 deletions(-) diff --git a/config/contentgen_workflows/gifWorkflow.yaml b/config/contentgen_workflows/gifWorkflow.yaml index 8057598..4b64015 100644 --- a/config/contentgen_workflows/gifWorkflow.yaml +++ b/config/contentgen_workflows/gifWorkflow.yaml @@ -45,7 +45,7 @@ gif: until: "COMPLETE" interval: 10 content_extraction: - response_path: "generations_by_pk.generated_images.motionMP4URL" #should be this once 0 index gets fixed generations_by_pk.generated_images.0.motionMP4URL + response_path: "generations_by_pk.generated_images.0.motionMP4URL" #should be this once 0 index gets fixed generations_by_pk.generated_images.0.motionMP4URL response_format: "url" file_extention: "mp4" \ No newline at end of file diff --git a/config/contentgen_workflows/imageWorkflow.yaml b/config/contentgen_workflows/imageWorkflow.yaml index 8f8c46e..c348e18 100644 --- a/config/contentgen_workflows/imageWorkflow.yaml +++ b/config/contentgen_workflows/imageWorkflow.yaml @@ -12,6 +12,6 @@ image: intent_detection_step: true response_key: "data" content_extraction: - response_path: "data.url" + response_path: "data.0.url" response_format: "url" file_extention: "png" diff --git a/config/general.yaml b/config/general.yaml index 721fd10..bebd1ea 100644 --- a/config/general.yaml +++ b/config/general.yaml @@ -1,5 +1,5 @@ dataDir: "./datastorage/" -openWebsocket: true +openWebsocket: false #log lvl order, from lowest to highest #Trace → Debug → Info → Warn → Error → Fatal → Panic log_level: "Trace" diff --git a/internal/callers/contentextraction.go b/internal/callers/contentextraction.go index 24fe612..583783a 100644 --- a/internal/callers/contentextraction.go +++ b/internal/callers/contentextraction.go @@ -2,57 +2,77 @@ package callers import ( "encoding/json" - "errors" "fmt" - "strconv" "strings" "github.com/METIL-HoloAI/HoloTable-Middleware/internal/config" + "github.com/sirupsen/logrus" ) // ContentExtraction extracts content from the response input based on the data type. -// The response can be either a JSON string or a mapped input (already parsed JSON). func ContentExtraction(response interface{}, dataType string) (string, string, string, error) { - var jsonData interface{} - switch v := response.(type) { - case string: - // If the response is a string, assume it is a JSON string and unmarshal it. - if err := json.Unmarshal([]byte(v), &jsonData); err != nil { - return "", "", "", err - } - default: - // Otherwise, assume it's already a parsed map/slice. - jsonData = response + // Parse the response into a JSON-compatible structure. + jsonData, err := parseResponse(response) + if err != nil { + logrus.Errorf("Failed to parse response: %v", err) + return "", "", "", err } - var responseFormat, responsePath, fileType string - - // Select configuration parameters based on the provided data type. - lastStep := len(config.Workflows[dataType].Steps) - 1 - switch dataType { - case "image", "video", "gif", "model": - responseFormat, responsePath, fileType = getConfigParams(dataType, lastStep) - default: - return "", "", "", errors.New("unknown data type: " + dataType) + // Retrieve configuration parameters for the given data type. + configParams, err := getConfigParams(dataType) + if err != nil { + logrus.Errorf("Failed to retrieve config params for data type '%s': %v", dataType, err) + return "", "", "", err } - // Extract the data (URL or raw data string). - dataExtracted, err := extractValueFromData(jsonData, responsePath) + // Extract the data (URL or raw data string) using the response path. + dataExtracted, err := extractValueFromData(jsonData, configParams.responsePath) if err != nil { - return "", responseFormat, "", err + logrus.Errorf("Failed to extract value from data using path '%s': %v", configParams.responsePath, err) + return "", configParams.responseFormat, "", err } + println("data:", dataExtracted) - // Extract file ID if a file_id_path is provided. + return dataExtracted, configParams.responseFormat, configParams.fileType, nil +} - return dataExtracted, responseFormat, fileType, nil +// parseResponse parses the response into a JSON-compatible structure. +func parseResponse(response interface{}) (interface{}, error) { + switch v := response.(type) { + case string: + var jsonData interface{} + if err := json.Unmarshal([]byte(v), &jsonData); err != nil { + logrus.Errorf("Failed to unmarshal JSON string: %v", err) + return nil, err + } + return jsonData, nil + default: + return response, nil + } } -// getConfigParams retrieves configuration parameters for the given data type and step index. -func getConfigParams(dataType string, stepIndex int) (string, string, string) { - workflow := config.Workflows[dataType].Steps[stepIndex].ContentExtraction - return getStringFromMap(workflow, "response_format"), - getStringFromMap(workflow, "response_path"), - getStringFromMap(workflow, "file_extention") +// ConfigParams holds configuration parameters for content extraction. +type ConfigParams struct { + responseFormat string + responsePath string + fileType string +} + +// getConfigParams retrieves configuration parameters for the given data type. +func getConfigParams(dataType string) (*ConfigParams, error) { + workflow, exists := config.Workflows[dataType] + if !exists || len(workflow.Steps) == 0 { + err := fmt.Errorf("unknown or invalid data type: %s", dataType) + logrus.Error(err) + return nil, err + } + + lastStep := workflow.Steps[len(workflow.Steps)-1].ContentExtraction + return &ConfigParams{ + responseFormat: getStringFromMap(lastStep, "response_format"), + responsePath: getStringFromMap(lastStep, "response_path"), + fileType: getStringFromMap(lastStep, "file_extention"), + }, nil } // getStringFromMap safely retrieves a string value from a map. @@ -60,111 +80,55 @@ func getStringFromMap(m map[string]interface{}, key string) string { if val, ok := m[key].(string); ok { return val } + logrus.Warnf("Key '%s' not found or not a string in map", key) return "" } -// extractValueFromData traverses the JSON data using the provided JSON path (dot-separated) -// and returns the final value as a string. If the final value is not a string, it converts it. +// extractValueFromData traverses the JSON data using the provided JSON path. func extractValueFromData(data interface{}, responsePath string) (string, error) { - // If the input data is already a map and doesn't contain a "response" key, - // remove the "response." prefix from the responsePath. - if m, ok := data.(map[string]interface{}); ok { - if _, exists := m["response"]; !exists { - responsePath = strings.TrimPrefix(responsePath, "response.") - } - } - - // Handle misconfigured response paths. - // If the responsePath equals "Extracted URL:" (as seen in your error), - // override it with the expected key path for your mapped input. - if responsePath == "Extracted URL:" { - responsePath = "data[0].url" - } - parts := strings.Split(responsePath, ".") current := data - var err error + for _, part := range parts { + var err error current, err = navigateJSON(current, part) if err != nil { + logrus.Errorf("Failed to navigate JSON for part '%s': %v", part, err) return "", err } } - - // If the final value is not a string, convert it to one. - if str, ok := current.(string); ok { - return str, nil - } + // Convert the final value to a string if necessary. return fmt.Sprintf("%v", current), nil } // navigateJSON navigates through the JSON data based on a path segment. -// It supports simple keys and, if no explicit array index is provided, -// automatically selects the first element when encountering an array. -func navigateJSON(current interface{}, part string) (interface{}, error) { - // If current is an array and the path segment does not start with an explicit index, - // automatically select the first element. - if len(part) > 0 && part[0] != '[' { - if arr, ok := current.([]interface{}); ok { - if len(arr) == 0 { - return nil, errors.New("array is empty when processing key: " + part) - } - current = arr[0] - } - } - - // If the segment contains an explicit array index, process it. - if idx := strings.Index(part, "["); idx != -1 { - // Process the key portion before the '[' (if any). - key := part[:idx] - if key != "" { - m, ok := current.(map[string]interface{}) - if !ok { - return nil, errors.New("expected JSON object for key: " + key) +func navigateJSON(data interface{}, path string) (interface{}, error) { + keys := strings.Split(path, ".") // Split "choices.0.message.content" into ["choices", "0", "message", "content"] + var current interface{} = data // Used to keep track of where we are in the JSON structure + + for _, key := range keys { + switch v := current.(type) { + case map[string]interface{}: // Check if key exists in the map + if val, exists := v[key]; exists { + current = val // If so, update current to the value of the key + } else { + err := fmt.Errorf("key '%s' not found in map", key) + logrus.Error(err) + return nil, err } - var exists bool - current, exists = m[key] - if !exists { - return nil, errors.New("key not found: " + key) + case []interface{}: // Handle array indexing + index, err := parseIndex(key) // Convert string to int + if err != nil || index < 0 || index >= len(v) { + err := fmt.Errorf("invalid array index '%s': %v", key, err) + logrus.Error(err) + return nil, err } + current = v[index] // Update current to array element + default: + err := fmt.Errorf("unexpected type encountered while navigating JSON at key '%s'", key) + logrus.Error(err) + return nil, err } - - // Process all array indices in the part. - for { - start := strings.Index(part, "[") - if start == -1 { - break - } - end := strings.Index(part, "]") - if end == -1 { - return nil, errors.New("malformed array index in part: " + part) - } - indexStr := part[start+1 : end] - arrIdx, err := strconv.Atoi(indexStr) - if err != nil { - return nil, errors.New("invalid array index: " + indexStr) - } - arr, ok := current.([]interface{}) - if !ok { - return nil, errors.New("expected JSON array when processing index: " + indexStr) - } - if arrIdx < 0 || arrIdx >= len(arr) { - return nil, errors.New("array index out of range: " + indexStr) - } - current = arr[arrIdx] - part = part[end+1:] - } - return current, nil - } - - // Otherwise, treat part as a simple key. - m, ok := current.(map[string]interface{}) - if !ok { - return nil, errors.New("expected JSON object for key: " + part) - } - val, exists := m[part] - if !exists { - return nil, errors.New("key not found: " + part) } - return val, nil + return current, nil } From 4d80ec64b07cfc4bde2d9533561d8555b8fabbce Mon Sep 17 00:00:00 2001 From: Jacob Stella Date: Sat, 5 Apr 2025 14:15:40 -0400 Subject: [PATCH 3/3] pass fileID back to contentgen for unity display --- internal/callers/contentgen.go | 3 +-- internal/callers/contentstorage.go | 21 +++++++++++---------- 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/internal/callers/contentgen.go b/internal/callers/contentgen.go index d558128..02d04ce 100644 --- a/internal/callers/contentgen.go +++ b/internal/callers/contentgen.go @@ -119,8 +119,7 @@ func HandleWorkflow(intentDetectionResponse structs.IntentDetectionResponse, wor } //fmt.Println("Extracted URL:", extractedURL) - - _, filePath, err := ContentStorage(intentDetectionResponse.ContentType, extractedFormat, fileExtention, []byte(extractedURL)) + dataBytes, filePath, fileID, err := ContentStorage(intentDetectionResponse.ContentType, extractedFormat, fileExtention, []byte(extractedURL)) if err != nil { fmt.Printf("Storage failed: %v", err) diff --git a/internal/callers/contentstorage.go b/internal/callers/contentstorage.go index ecf81ea..5196e8c 100644 --- a/internal/callers/contentstorage.go +++ b/internal/callers/contentstorage.go @@ -21,7 +21,7 @@ const filePerm = 0644 // ContentStorage saves the content to local storage under a subdirectory based on the file type. // If the provided content represents a URL (i.e. format == "url"), the function downloads the file from that URL before storing it. // It assumes that the provided filename already includes the proper file extension. -func ContentStorage(fileType, format, fileExtention string, content []byte) ([]byte, string, error) { +func ContentStorage(fileType, format, fileExtention string, content []byte) ([]byte, string, string, error) { // Map file types to database table names. tableMap := map[string]string{ "image": "image", @@ -33,13 +33,14 @@ func ContentStorage(fileType, format, fileExtention string, content []byte) ([]b // Get the corresponding database table name. tableName, ok := tableMap[fileType] if !ok { - return nil, "", fmt.Errorf("invalid file type: %s", fileType) + return nil, "", "", fmt.Errorf("invalid file type: %s", fileType) } // Combine fileID and fileExtention into a single file name. - fileName := uuid.New().String() + fileID := uuid.New().String() + fileName := "" if fileExtention != "" { - fileName = fmt.Sprintf("%s.%s", uuid.New().String(), fileExtention) + fileName = fmt.Sprintf("%s.%s", fileID, fileExtention) } // If the format indicates that the content is a URL, download the file. @@ -47,7 +48,7 @@ func ContentStorage(fileType, format, fileExtention string, content []byte) ([]b var err error content, err = downloadContent(string(content)) if err != nil { - return nil, "", err + return nil, "", "", err } } @@ -55,19 +56,19 @@ func ContentStorage(fileType, format, fileExtention string, content []byte) ([]b directory := filepath.Join(config.General.DataDir, "/content", tableName) // Ensure the directory exists. if err := os.MkdirAll(directory, os.ModePerm); err != nil { - return nil, "", fmt.Errorf("failed to create directory: %v", err) + return nil, "", "", fmt.Errorf("failed to create directory: %v", err) } // Create the full file path. filePath := filepath.Join(directory, fileName) // Write the content to the file. if err := os.WriteFile(filePath, content, filePerm); err != nil { - return nil, "", fmt.Errorf("failed to write file: %v", err) + return nil, "", "", fmt.Errorf("failed to write file: %v", err) } // Insert a record into the database. if err := database.Insert(tableName, fileName, filePath); err != nil { - return nil, "", fmt.Errorf("failed to insert record into database: %v", err) + return nil, "", "", fmt.Errorf("failed to insert record into database: %v", err) } // filePath = "my_file.txt" // Could be a relative or absolute path @@ -75,12 +76,12 @@ func ContentStorage(fileType, format, fileExtention string, content []byte) ([]b absPath, err := filepath.Abs(filePath) if err != nil { fmt.Println("Error getting absolute path:", err) - return nil, "", err + return nil, "", "", err } fmt.Println("Absolute path:", absPath) - return content, absPath, nil + return content, absPath, fileID, nil } // downloadContent downloads the content from the given URL and returns the downloaded data.