From 22305fe6e3be94e6268e8ce8775ce25bf5ba5574 Mon Sep 17 00:00:00 2001 From: Gabriel Luiz Freitas Almeida Date: Wed, 7 Aug 2024 13:21:07 -0300 Subject: [PATCH] feat: Implement graph combination method (#3224) * feat(utils.py): add escape_json_dump function to escape JSON strings for Edge dictionaries * refactor(Output): streamline add_types method to prevent duplicate entries in types list for improved type management * feat(data.py): add classmethod decorator to validate_data for enhanced validation logic when checking data types * feat(setup.py): implement retry logic for loading starter projects to enhance robustness against JSON decode errors * fix(input_mixin.py): improve model_config formatting and update field_type alias for clarity and consistency in field definitions * feat(types.py): refactor vertex constructors to use NodeData and add input/output methods for better component interaction * feat(schema.py): add NodeData and Position TypedDicts for improved type safety and structure in vertex data handling * feat(base.py): update Vertex to use NodeData type and add to_data method for better data management and access * refactor(schema.py): update TargetHandle and SourceHandle models to include model_config attribute * Add TypedDict classes for graph schema serialization in `schema.py` * Refactor `Edge` class to improve handle validation and data handling - Consolidated imports and removed redundant `BaseModel` definitions for `SourceHandle` and `TargetHandle`. - Added `valid_handles`, `target_param`, and `_target_handle` attributes to `Edge` class. - Enhanced handle validation logic to distinguish between dictionary and string types. - Introduced `to_data` method to return edge data. - Updated attribute names to follow consistent naming conventions (`base_classes`, `input_types`, `field_name`). * Refactor `Edge` class to improve handle validation and data handling * Refactor: Standardize attribute naming and add `to_data` method in Edge class - Renamed attributes to use snake_case consistently (`baseClasses` to `base_classes`, `inputTypes` to `input_types`, `fieldName` to `field_name`). - Added `to_data` method to return `_data` attribute. - Updated validation methods to use new attribute names. * Refactor: Update Edge class to consistently use snake_case for attributes and improve validation logic for handles * Refactor: Change node argument type in add_node and _create_vertex methods to NodeData for better type safety and clarity * Refactor: Implement JSON serialization for graph data with `dumps` and `dump` methods, enhancing data export capabilities * Refactor: Add pytest fixtures for ingestion and RAG graphs, enhance test structure for better clarity and organization * Refactor: Add pytest fixtures for memory_chatbot_graph tests and improve test structure * Refactor: Remove unused methods in ComponentVertex class to streamline code and improve readability * Refactor: Remove unnecessary line in ComponentVertex class to enhance code clarity and maintainability * refactor: Add utility functions for getting handle IDs in CustomNodes - Added `getRightHandleId` function to generate the right handle ID for source handles. - Added `getLeftHandleId` function to generate the left handle ID for target handles. - These functions improve code readability and maintainability by encapsulating the logic for generating handle IDs. * refactor: Add type for escaped handle IDs in edges to improve type safety in reactflowUtils * feat: Add function to escape handle IDs in edges, enhancing edge management in reactflowUtils * feat: Add function to check edges without escaped handle IDs, improving edge validation in reactflowUtils * feat: Enhance edge processing in reactflowUtils to handle edges without escaped handle IDs more effectively * feat: Add layoutUtils module for handling node layout using elkjs * feat: update processDataFromFlow to add layout to nodes if needed * feat: Update flowsManagerStore to parse flow data from file before processing - Replace usages of `fileData` with `parsedFlowData` for improved clarity - Ensure compatibility with newProject and isComponent parameters - Improve error handling for uploading components as flows or vice versa - Refactor code for better readability and maintainability * Refactor import paths to use 'initialize' module in 'base.py' * feat: Add method to set class source code and integrate it with frontend node input field * refactor: Update sourceHandle dataType to use custom component class name * fix: Raise error for unknown vertex types instead of returning default Vertex class * refactor: Remove redundant call to _import_vertex_types() in VertexTypesDict initialization * refactor: Simplify add_code_field by removing unnecessary field_config parameter from function signature * feat: Add elkjs dependency to package.json and package-lock.json for enhanced functionality in the frontend * refactor: Update fields type in Template class to use InputTypes for improved type safety * refactor: Update import path for DefaultPromptField to improve code organization and maintain compatibility * refactor: Reorganize imports in __init__.py for better structure and consistency across the inputs module * refactor: Clean up imports in types.py for better organization and consistency in the graph vertex module * refactor: Change vertex type annotations to strings for better compatibility and consistency in the graph module methods * refactor: Update component instantiation to include _code parameter and fix input type annotations for improved type handling * refactor: Remove unused CustomComponent import from __init__.py for cleaner module structure and improved organization * refactor: Modify custom_component instantiation to include _code argument for enhanced functionality and clarity in CodeParser class * refactor: Update CustomComponent import in __init__.py for improved module structure and organization * refactor: Update launch.json to include correct path for backend source files * refactor: Update dependencies in poetry.lock to latest versions and resolve merge conflicts in backend files * [autofix.ci] apply automated fixes * refactor: Remove unnecessary line in test_memory_chatbot.py * refactor: Update dataType assignment in Component class to use component name if available, or fallback to class name * refactor: Correct flow_id reference in MemoryComponent to improve clarity and consistency in memory handling * refactor: Update import path for DefaultPromptField to improve code organization and maintainability in api_utils.py * refactor: Add loading module to __init__.py for improved organization of interface package * refactor: Clean up imports in base.py and enforce edge validation in Graph class for improved maintainability and error handling * refactor: Remove edge component additions in test_base.py to streamline graph tests and emphasize error handling for unprepared graphs * refactor: Mark @clack/prompts is-unicode-supported as extraneous in package-lock.json for better dependency management * refactor: Update dataType assignment in Component class to use component name if available, or fallback to class name * refactor: Fix edge existence check in Graph class to use correct variable, ensuring accurate validation of graph structure * refactor: Add test for graph with edge and improve graph preparation logic * refactor: Set default node type to "genericNode" in getLayoutedNodes for consistent layout structure * create consts for node widht and height * refactor: Catch and log errors when processing flow data in flowsManagerStore and reactflowUtils * [autofix.ci] apply automated fixes * fix: Validate custom components for source and target vertices in Graph edges * test: Add fixture for client and raise TypeError for invalid class parsing in CodeParser tests * test: Add unit test for listing flows as Flow objects in custom component with client * test: Update assertions for memory chatbot component types in unit tests * test: Refactor assertions to use updated component names in vector store RAG unit tests * fix: Change error handling to return default Vertex for unknown node types in graph class * [autofix.ci] apply automated fixes * test: Add pytest fixture for CustomComponent in unit tests to enhance test structure and readability * chore: Update component names in vector store RAG unit tests * test: Refactor imports and make flow name generation unique in database unit tests * chore: Add new attributes to Edge class for improved state management and validation in edge processing logic * chore: Implement addition methods for Graph class to combine vertices and edges from other graph instances safely * chore: Extend serialization in Graph class to include additional internal attributes for improved state handling * chore: Call initialize method in prepare for proper setup before validating component IDs in Graph class * chore: Add test to validate graph combination in vector store RAG, ensuring correct vertices and edges in merged graph structure * refactor: Add utility functions for getting handle IDs in CustomNodes - Added `getRightHandleId` function to generate the right handle ID for source handles. - Added `getLeftHandleId` function to generate the left handle ID for target handles. - These functions improve code readability and maintainability by encapsulating the logic for generating handle IDs. * refactor: Add type for escaped handle IDs in edges to improve type safety in reactflowUtils * feat: Add function to escape handle IDs in edges, enhancing edge management in reactflowUtils * feat: Add function to check edges without escaped handle IDs, improving edge validation in reactflowUtils * feat: Enhance edge processing in reactflowUtils to handle edges without escaped handle IDs more effectively * feat: Add layoutUtils module for handling node layout using elkjs * feat: update processDataFromFlow to add layout to nodes if needed * Refactor import paths to use 'initialize' module in 'base.py' * feat: Add method to set class source code and integrate it with frontend node input field * refactor: Update sourceHandle dataType to use custom component class name * fix: Raise error for unknown vertex types instead of returning default Vertex class * refactor: Remove redundant call to _import_vertex_types() in VertexTypesDict initialization * refactor: Simplify add_code_field by removing unnecessary field_config parameter from function signature * feat: Add elkjs dependency to package.json and package-lock.json for enhanced functionality in the frontend * refactor: Update fields type in Template class to use InputTypes for improved type safety * refactor: Reorganize imports in __init__.py for better structure and consistency across the inputs module * refactor: Clean up imports in types.py for better organization and consistency in the graph vertex module * refactor: Change vertex type annotations to strings for better compatibility and consistency in the graph module methods * refactor: Update component instantiation to include _code parameter and fix input type annotations for improved type handling * refactor: Remove unused CustomComponent import from __init__.py for cleaner module structure and improved organization * refactor: Modify custom_component instantiation to include _code argument for enhanced functionality and clarity in CodeParser class * refactor: Update CustomComponent import in __init__.py for improved module structure and organization * refactor: Update launch.json to include correct path for backend source files * refactor: Update dependencies in poetry.lock to latest versions and resolve merge conflicts in backend files * refactor: Update dataType assignment in Component class to use component name if available, or fallback to class name * refactor: Correct flow_id reference in MemoryComponent to improve clarity and consistency in memory handling * refactor: Update import path for DefaultPromptField to improve code organization and maintainability in api_utils.py * refactor: Add loading module to __init__.py for improved organization of interface package * refactor: Clean up imports in base.py and enforce edge validation in Graph class for improved maintainability and error handling * refactor: Remove edge component additions in test_base.py to streamline graph tests and emphasize error handling for unprepared graphs * refactor: Mark @clack/prompts is-unicode-supported as extraneous in package-lock.json for better dependency management * refactor: Update dataType assignment in Component class to use component name if available, or fallback to class name * refactor: Fix edge existence check in Graph class to use correct variable, ensuring accurate validation of graph structure * refactor: Add test for graph with edge and improve graph preparation logic * refactor: Set default node type to "genericNode" in getLayoutedNodes for consistent layout structure * create consts for node widht and height * refactor: Catch and log errors when processing flow data in flowsManagerStore and reactflowUtils * [autofix.ci] apply automated fixes * fix: Validate custom components for source and target vertices in Graph edges * test: Add fixture for client and raise TypeError for invalid class parsing in CodeParser tests * test: Add unit test for listing flows as Flow objects in custom component with client * test: Update assertions for memory chatbot component types in unit tests * test: Refactor assertions to use updated component names in vector store RAG unit tests * fix: Change error handling to return default Vertex for unknown node types in graph class * [autofix.ci] apply automated fixes * test: Add pytest fixture for CustomComponent in unit tests to enhance test structure and readability * chore: Update component names in vector store RAG unit tests * test: Refactor imports and make flow name generation unique in database unit tests * chore: Remove unused upload and flow management functions from flowsManagerStore for cleaner codebase * chore: Await processDataFromFlow in useAddFlow hook * chore: Correct NODE_HEIGHT calculation to use NODE_WIDTH constant for consistency in constants file * chore: Remove extraneous flag for is-unicode-supported in package-lock.json for cleaner dependency management --------- Co-authored-by: autofix-ci[bot] <114827586+autofix-ci[bot]@users.noreply.github.com> Co-authored-by: anovazzi1 --- src/backend/base/langflow/graph/edge/base.py | 7 ++ src/backend/base/langflow/graph/graph/base.py | 33 +++++++++ .../starter_projects/test_vector_store_rag.py | 68 +++++++++++++++++++ src/frontend/package-lock.json | 1 + 4 files changed, 109 insertions(+) diff --git a/src/backend/base/langflow/graph/edge/base.py b/src/backend/base/langflow/graph/edge/base.py index f3520e72cf18..80aa5519bb6a 100644 --- a/src/backend/base/langflow/graph/edge/base.py +++ b/src/backend/base/langflow/graph/edge/base.py @@ -87,6 +87,13 @@ def __setstate__(self, state): self.target_param = state["target_param"] self.source_handle = state.get("source_handle") self.target_handle = state.get("target_handle") + self._source_handle = state.get("_source_handle") + self._target_handle = state.get("_target_handle") + self._data = state.get("_data") + self.valid_handles = state.get("valid_handles") + self.source_types = state.get("source_types") + self.target_reqs = state.get("target_reqs") + self.matched_type = state.get("matched_type") def validate_edge(self, source, target) -> None: # If the self.source_handle has base_classes, then we are using the legacy diff --git a/src/backend/base/langflow/graph/graph/base.py b/src/backend/base/langflow/graph/graph/base.py index 901f4288810d..f9772fe325c1 100644 --- a/src/backend/base/langflow/graph/graph/base.py +++ b/src/backend/base/langflow/graph/graph/base.py @@ -1,4 +1,5 @@ import asyncio +import copy import json import uuid from collections import defaultdict, deque @@ -100,6 +101,29 @@ def __init__( if (start is not None and end is None) or (start is None and end is not None): raise ValueError("You must provide both input and output components") + def __add__(self, other): + if not isinstance(other, Graph): + raise TypeError("Can only add Graph objects") + # Add the vertices and edges from the other graph to this graph + new_instance = copy.deepcopy(self) + for vertex in other.vertices: + # This updates the edges as well + new_instance.add_vertex(vertex) + new_instance.build_graph_maps(new_instance.edges) + new_instance.define_vertices_lists() + return new_instance + + def __iadd__(self, other): + if not isinstance(other, Graph): + raise TypeError("Can only add Graph objects") + # Add the vertices and edges from the other graph to this graph + for vertex in other.vertices: + # This updates the edges as well + self.add_vertex(vertex) + self.build_graph_maps(self.edges) + self.define_vertices_lists() + return self + def dumps( self, name: Optional[str] = None, @@ -779,6 +803,14 @@ def __getstate__(self): "vertices_to_run": self.vertices_to_run, "stop_vertex": self.stop_vertex, "vertex_map": self.vertex_map, + "_run_queue": self._run_queue, + "_first_layer": self._first_layer, + "_vertices": self._vertices, + "_edges": self._edges, + "_is_input_vertices": self._is_input_vertices, + "_is_output_vertices": self._is_output_vertices, + "_has_session_id_vertices": self._has_session_id_vertices, + "_sorted_vertices_layers": self._sorted_vertices_layers, } def __setstate__(self, state): @@ -1450,6 +1482,7 @@ def _create_vertex(self, frontend_data: NodeData): return vertex_instance def prepare(self, stop_component_id: Optional[str] = None, start_component_id: Optional[str] = None): + self.initialize() if stop_component_id and start_component_id: raise ValueError("You can only provide one of stop_component_id or start_component_id") self.validate_stream() diff --git a/src/backend/tests/unit/initial_setup/starter_projects/test_vector_store_rag.py b/src/backend/tests/unit/initial_setup/starter_projects/test_vector_store_rag.py index a005b97deeb7..587c65779a2d 100644 --- a/src/backend/tests/unit/initial_setup/starter_projects/test_vector_store_rag.py +++ b/src/backend/tests/unit/initial_setup/starter_projects/test_vector_store_rag.py @@ -1,3 +1,4 @@ +import copy from textwrap import dedent import pytest @@ -211,6 +212,73 @@ def test_vector_store_rag_dump_components_and_edges(ingestion_graph, rag_graph): assert (source, target) in expected_rag_edges, f"Edge {source} -> {target} not found" +def test_vector_store_rag_add(ingestion_graph, rag_graph): + ingestion_graph_copy = copy.deepcopy(ingestion_graph) + rag_graph_copy = copy.deepcopy(rag_graph) + ingestion_graph_copy += rag_graph_copy + + assert ( + len(ingestion_graph_copy.vertices) == len(ingestion_graph.vertices) + len(rag_graph.vertices) + ), f"Vertices mismatch: {len(ingestion_graph_copy.vertices)} != {len(ingestion_graph.vertices)} + {len(rag_graph.vertices)}" + assert len(ingestion_graph_copy.edges) == len(ingestion_graph.edges) + len( + rag_graph.edges + ), f"Edges mismatch: {len(ingestion_graph_copy.edges)} != {len(ingestion_graph.edges)} + {len(rag_graph.edges)}" + + combined_graph_dump = ingestion_graph_copy.dump( + name="Combined Graph", description="Graph for data ingestion and RAG", endpoint_name="combined" + ) + + combined_data = combined_graph_dump["data"] + combined_nodes = combined_data["nodes"] + combined_edges = combined_data["edges"] + + # Sort nodes by id to check components + combined_nodes = sorted(combined_nodes, key=lambda x: x["id"]) + + # Expected components in the combined graph (both ingestion and RAG nodes) + expected_nodes = sorted( + [ + {"id": "file-123", "type": "File"}, + {"id": "openai-embeddings-123", "type": "OpenAIEmbeddings"}, + {"id": "text-splitter-123", "type": "SplitText"}, + {"id": "vector-store-123", "type": "AstraDB"}, + {"id": "chatinput-123", "type": "ChatInput"}, + {"id": "chatoutput-123", "type": "ChatOutput"}, + {"id": "openai-123", "type": "OpenAIModel"}, + {"id": "openai-embeddings-124", "type": "OpenAIEmbeddings"}, + {"id": "parse-data-123", "type": "ParseData"}, + {"id": "prompt-123", "type": "Prompt"}, + {"id": "rag-vector-store-123", "type": "AstraDB"}, + ], + key=lambda x: x["id"], + ) + + for expected_node, combined_node in zip(expected_nodes, combined_nodes): + assert combined_node["data"]["type"] == expected_node["type"] + assert combined_node["id"] == expected_node["id"] + + # Expected edges in the combined graph (both ingestion and RAG edges) + expected_combined_edges = [ + ("file-123", "text-splitter-123"), + ("text-splitter-123", "vector-store-123"), + ("openai-embeddings-123", "vector-store-123"), + ("chatinput-123", "rag-vector-store-123"), + ("openai-embeddings-124", "rag-vector-store-123"), + ("chatinput-123", "prompt-123"), + ("rag-vector-store-123", "parse-data-123"), + ("parse-data-123", "prompt-123"), + ("prompt-123", "openai-123"), + ("openai-123", "chatoutput-123"), + ] + + assert len(combined_edges) == len(expected_combined_edges), combined_edges + + for edge in combined_edges: + source = edge["source"] + target = edge["target"] + assert (source, target) in expected_combined_edges, f"Edge {source} -> {target} not found" + + def test_vector_store_rag_dump(ingestion_graph, rag_graph): # Test ingestion graph dump ingestion_graph_dump = ingestion_graph.dump( diff --git a/src/frontend/package-lock.json b/src/frontend/package-lock.json index db0e15228cd9..f150e6b429da 100644 --- a/src/frontend/package-lock.json +++ b/src/frontend/package-lock.json @@ -1079,6 +1079,7 @@ }, "node_modules/@clack/prompts/node_modules/is-unicode-supported": { "version": "1.3.0", + "extraneous": true, "inBundle": true, "license": "MIT", "engines": {