From 7eda663fab14f620edd671723486931aa1957cee Mon Sep 17 00:00:00 2001 From: Gabriel Luiz Freitas Almeida Date: Fri, 31 Jan 2025 12:03:42 -0300 Subject: [PATCH 01/23] feat: Implement serialization functions for various data types and add a unified serialize method --- .../base/langflow/serialization/__init__.py | 3 + .../langflow/serialization/serialization.py | 165 ++++++++++++++++++ 2 files changed, 168 insertions(+) create mode 100644 src/backend/base/langflow/serialization/__init__.py create mode 100644 src/backend/base/langflow/serialization/serialization.py diff --git a/src/backend/base/langflow/serialization/__init__.py b/src/backend/base/langflow/serialization/__init__.py new file mode 100644 index 000000000000..d277ccd8a499 --- /dev/null +++ b/src/backend/base/langflow/serialization/__init__.py @@ -0,0 +1,3 @@ +from .serialization import serialize + +__all__ = ["serialize"] diff --git a/src/backend/base/langflow/serialization/serialization.py b/src/backend/base/langflow/serialization/serialization.py new file mode 100644 index 000000000000..05753e9bebef --- /dev/null +++ b/src/backend/base/langflow/serialization/serialization.py @@ -0,0 +1,165 @@ +from collections.abc import AsyncIterator, Generator, Iterator +from datetime import datetime, timezone +from decimal import Decimal +from typing import Any +from uuid import UUID + +from langchain_core.documents import Document +from loguru import logger +from pydantic import BaseModel +from pydantic.v1 import BaseModel as BaseModelV1 + +from langflow.utils.constants import MAX_ITEMS_LENGTH, MAX_TEXT_LENGTH + + +def _serialize_str(obj: str, max_length: int | None, _) -> str: + """Truncate long strings with ellipsis if max_length provided.""" + if max_length is None: + return obj + return obj[:max_length] + "..." if len(obj) > max_length else obj + + +def _serialize_bytes(obj: bytes, max_length: int | None, _) -> str: + """Decode bytes to string and truncate if max_length provided.""" + decoded = obj.decode("utf-8", errors="ignore") + if max_length is None: + return decoded + return decoded[:max_length] + "..." if len(decoded) > max_length else decoded + + +def _serialize_datetime(obj: datetime, *_) -> str: + """Convert datetime to UTC ISO format.""" + return obj.replace(tzinfo=timezone.utc).isoformat() + + +def _serialize_decimal(obj: Decimal, *_) -> float: + """Convert Decimal to float.""" + return float(obj) + + +def _serialize_uuid(obj: UUID, *_) -> str: + """Convert UUID to string.""" + return str(obj) + + +def _serialize_document(obj: Document, max_length: int | None, max_items: int | None) -> Any: + """Serialize Langchain Document recursively.""" + return serialize(obj.to_json(), max_length, max_items) + + +def _serialize_iterator(_: AsyncIterator | Generator | Iterator, *__) -> str: + """Handle unconsumed iterators uniformly.""" + return "Unconsumed Stream" + + +def _serialize_pydantic(obj: BaseModel, max_length: int | None, max_items: int | None) -> Any: + """Handle modern Pydantic models.""" + serialized = obj.model_dump() + return {k: serialize(v, max_length, max_items) for k, v in serialized.items()} + + +def _serialize_pydantic_v1(obj: BaseModelV1, max_length: int | None, max_items: int | None) -> Any: + """Backwards-compatible handling for Pydantic v1 models.""" + if hasattr(obj, "to_json"): + return serialize(obj.to_json(), max_length, max_items) + return serialize(obj.dict(), max_length, max_items) + + +def _serialize_dict(obj: dict, max_length: int | None, max_items: int | None) -> dict: + """Recursively process dictionary values.""" + return {k: serialize(v, max_length, max_items) for k, v in obj.items()} + + +def _serialize_list_tuple(obj: list | tuple, max_length: int | None, max_items: int | None) -> list: + """Truncate long lists and process items recursively.""" + if max_items is not None and len(obj) > max_items: + truncated = list(obj)[:max_items] + truncated.append(f"... [truncated {len(obj) - max_items} items]") + obj = truncated + return [serialize(item, max_length, max_items) for item in obj] + + +def _serialize_dispatcher(obj: Any, max_length: int | None, max_items: int | None) -> Any | None: + """Dispatch object to appropriate serializer.""" + match obj: + case str(): + return _serialize_str(obj, max_length, max_items) + case bytes(): + return _serialize_bytes(obj, max_length, max_items) + case datetime(): + return _serialize_datetime(obj, max_length, max_items) + case Decimal(): + return _serialize_decimal(obj, max_length, max_items) + case UUID(): + return _serialize_uuid(obj, max_length, max_items) + case Document(): + return _serialize_document(obj, max_length, max_items) + case AsyncIterator() | Generator() | Iterator(): + return _serialize_iterator(obj, max_length, max_items) + case BaseModel(): + return _serialize_pydantic(obj, max_length, max_items) + case BaseModelV1(): + return _serialize_pydantic_v1(obj, max_length, max_items) + case dict(): + return _serialize_dict(obj, max_length, max_items) + case list() | tuple(): + return _serialize_list_tuple(obj, max_length, max_items) + case _: + return None + + +def serialize( + obj: Any, + max_length: int | None = MAX_TEXT_LENGTH, + max_items: int | None = MAX_ITEMS_LENGTH, + *, + to_str: bool = False, +) -> Any: + """Unified serialization with optional truncation support. + + Coordinates specialized serializers through a dispatcher pattern. + Maintains recursive processing for nested structures. + + Args: + obj: Object to serialize + max_length: Maximum length for string values, None for no truncation + max_items: Maximum items in list-like structures, None for no truncation + to_str: If True, return a string representation of the object if serialization fails + """ + try: + # First try type-specific serialization + result = _serialize_dispatcher(obj, max_length, max_items) + if result is not None: + return result + + # Handle class-based Pydantic types + if isinstance(obj, type) and issubclass(obj, BaseModel | BaseModelV1): + return repr(obj) + + # Fallback to common serialization patterns + if hasattr(obj, "model_dump"): + return serialize(obj.model_dump(), max_length, max_items) + if hasattr(obj, "dict") and not isinstance(obj, type): + return serialize(obj.dict(), max_length, max_items) + + # Final fallback to string conversion + if to_str: + return str(obj) + + except Exception: # noqa: BLE001 + logger.debug(f"Cannot serialize object {obj}") + return "[Unserializable Object]" + return obj + + +def serialize_or_str( + obj: Any, max_length: int | None = MAX_TEXT_LENGTH, max_items: int | None = MAX_ITEMS_LENGTH +) -> Any: + """Calls serialize() and if it fails, returns a string representation of the object. + + Args: + obj: Object to serialize + max_length: Maximum length for string values, None for no truncation + max_items: Maximum items in list-like structures, None for no truncation + """ + return serialize(obj, max_length, max_items, to_str=True) From 1045c7ab7d1f9266af3931f01841368464e3fe4b Mon Sep 17 00:00:00 2001 From: Gabriel Luiz Freitas Almeida Date: Fri, 31 Jan 2025 13:05:13 -0300 Subject: [PATCH 02/23] feat: Enhance serialization by adding support for primitive types, enums, and generic types --- .../langflow/serialization/serialization.py | 46 ++++++++++++++++--- 1 file changed, 39 insertions(+), 7 deletions(-) diff --git a/src/backend/base/langflow/serialization/serialization.py b/src/backend/base/langflow/serialization/serialization.py index 05753e9bebef..c69f0d4e013f 100644 --- a/src/backend/base/langflow/serialization/serialization.py +++ b/src/backend/base/langflow/serialization/serialization.py @@ -79,8 +79,22 @@ def _serialize_list_tuple(obj: list | tuple, max_length: int | None, max_items: return [serialize(item, max_length, max_items) for item in obj] +def _serialize_primitive(obj: Any, *_) -> Any: + """Handle primitive types without conversion.""" + if obj is None or isinstance(obj, int | float | bool): + return obj + return None + + def _serialize_dispatcher(obj: Any, max_length: int | None, max_items: int | None) -> Any | None: """Dispatch object to appropriate serializer.""" + # Handle primitive types first + if obj is None: + return obj + primitive = _serialize_primitive(obj, max_length, max_items) + if primitive is not None: # Special check for None since it's a valid primitive + return primitive + match obj: case str(): return _serialize_str(obj, max_length, max_items) @@ -105,6 +119,15 @@ def _serialize_dispatcher(obj: Any, max_length: int | None, max_items: int | Non case list() | tuple(): return _serialize_list_tuple(obj, max_length, max_items) case _: + # Handle enums + if hasattr(obj, "_name_"): # Enum check + return f"{obj.__class__.__name__}.{obj._name_}" + # Handle TypeVars + if hasattr(obj, "__name__") and hasattr(obj, "__bound__"): + return repr(obj) + # Handle type aliases and generic types + if hasattr(obj, "__origin__") or hasattr(obj, "__parameters__"): + return repr(obj) return None @@ -129,12 +152,21 @@ def serialize( try: # First try type-specific serialization result = _serialize_dispatcher(obj, max_length, max_items) - if result is not None: + if result is not None or obj is None: # Special check for None since it's a valid result return result - # Handle class-based Pydantic types - if isinstance(obj, type) and issubclass(obj, BaseModel | BaseModelV1): - return repr(obj) + # Handle class-based Pydantic types and other types + if isinstance(obj, type): + if issubclass(obj, BaseModel | BaseModelV1): + return repr(obj) + return str(obj) # Handle other class types + + # Handle type aliases and generic types + if hasattr(obj, "__origin__") or hasattr(obj, "__parameters__"): # Type alias or generic type check + try: + return repr(obj) + except Exception as e: # noqa: BLE001 + logger.debug(f"Cannot serialize object {obj}: {e!s}") # Fallback to common serialization patterns if hasattr(obj, "model_dump"): @@ -143,11 +175,11 @@ def serialize( return serialize(obj.dict(), max_length, max_items) # Final fallback to string conversion - if to_str: + if to_str or not isinstance(obj, type): # Convert instances to string return str(obj) - except Exception: # noqa: BLE001 - logger.debug(f"Cannot serialize object {obj}") + except Exception as e: # noqa: BLE001 + logger.debug(f"Cannot serialize object {obj}: {e!s}") return "[Unserializable Object]" return obj From 42bd591dffc5d6e88bc20ea38a5d0651eff09aaa Mon Sep 17 00:00:00 2001 From: Gabriel Luiz Freitas Almeida Date: Fri, 31 Jan 2025 13:05:26 -0300 Subject: [PATCH 03/23] fix: Update Pinecone integration to use VectorStore and handle import errors gracefully --- .../langflow/components/vectorstores/pinecone.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/src/backend/base/langflow/components/vectorstores/pinecone.py b/src/backend/base/langflow/components/vectorstores/pinecone.py index 8ab592c32951..f71a2aed24fd 100644 --- a/src/backend/base/langflow/components/vectorstores/pinecone.py +++ b/src/backend/base/langflow/components/vectorstores/pinecone.py @@ -1,5 +1,5 @@ import numpy as np -from langchain_pinecone import Pinecone +from langchain_core.vectorstores import VectorStore from langflow.base.vectorstores.model import LCVectorStoreComponent, check_cached_vector_store from langflow.helpers.data import docs_to_data @@ -42,8 +42,14 @@ class PineconeVectorStoreComponent(LCVectorStoreComponent): ] @check_cached_vector_store - def build_vector_store(self) -> Pinecone: + def build_vector_store(self) -> VectorStore: """Build and return a Pinecone vector store instance.""" + try: + from langchain_pinecone import PineconeVectorStore + except ImportError as e: + msg = "langchain-pinecone is not installed. Please install it with `pip install langchain-pinecone`." + raise ValueError(msg) from e + try: from langchain_pinecone._utilities import DistanceStrategy @@ -55,7 +61,7 @@ def build_vector_store(self) -> Pinecone: distance_strategy = DistanceStrategy[distance_strategy] # Initialize Pinecone instance with wrapped embeddings - pinecone = Pinecone( + pinecone = PineconeVectorStore( index_name=self.index_name, embedding=wrapped_embeddings, # Use wrapped embeddings text_key=self.text_key, From 6cb69833e071ebd4e9407ddbfa2a61222a35ed08 Mon Sep 17 00:00:00 2001 From: Gabriel Luiz Freitas Almeida Date: Fri, 31 Jan 2025 13:05:49 -0300 Subject: [PATCH 04/23] test: Add hypothesis-based tests for serialization functions across various data types --- .../tests/unit/serialization/__init__.py | 0 .../unit/serialization/test_serialization.py | 246 ++++++++++++++++++ 2 files changed, 246 insertions(+) create mode 100644 src/backend/tests/unit/serialization/__init__.py create mode 100644 src/backend/tests/unit/serialization/test_serialization.py diff --git a/src/backend/tests/unit/serialization/__init__.py b/src/backend/tests/unit/serialization/__init__.py new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/src/backend/tests/unit/serialization/test_serialization.py b/src/backend/tests/unit/serialization/test_serialization.py new file mode 100644 index 000000000000..f79650bff73b --- /dev/null +++ b/src/backend/tests/unit/serialization/test_serialization.py @@ -0,0 +1,246 @@ +import math +from datetime import datetime, timezone +from typing import Any + +import pytest +from hypothesis import given, settings +from hypothesis import strategies as st +from langchain_core.documents import Document +from langflow.serialization.serialization import serialize, serialize_or_str +from langflow.utils.constants import MAX_ITEMS_LENGTH, MAX_TEXT_LENGTH +from pydantic import BaseModel as PydanticBaseModel +from pydantic.v1 import BaseModel as PydanticV1BaseModel + +# Comprehensive hypothesis strategies +text_strategy = st.text(min_size=0, max_size=MAX_TEXT_LENGTH * 3) +bytes_strategy = st.binary(min_size=0, max_size=MAX_TEXT_LENGTH * 3) +datetime_strategy = st.datetimes( + min_value=datetime.min, max_value=datetime.max, timezones=st.sampled_from([timezone.utc, None]) +) +decimal_strategy = st.decimals(min_value=-1e6, max_value=1e6, allow_nan=False, allow_infinity=False, places=10) +uuid_strategy = st.uuids() +list_strategy = st.lists(st.one_of(st.integers(), st.text(), st.floats()), min_size=0, max_size=MAX_ITEMS_LENGTH * 3) +dict_strategy = st.dictionaries( + keys=st.text(min_size=1), + values=st.one_of(st.integers(), st.floats(), st.text(), st.booleans(), st.none()), + min_size=0, + max_size=MAX_ITEMS_LENGTH, +) + +# Complex nested structure strategy +nested_strategy = st.recursive( + st.one_of(st.integers(), st.floats(), st.text(), st.booleans()), + lambda children: st.lists(children) | st.dictionaries(st.text(), children), + max_leaves=10, +) + + +# Pydantic models for testing +class ModernModel(PydanticBaseModel): + name: str + value: int + + +class LegacyModel(PydanticV1BaseModel): + name: str + value: int + + +@pytest.mark.unit +class TestSerializationHypothesis: + """Hypothesis-based property tests for serialization logic.""" + + @settings(max_examples=100) + @given(text=text_strategy) + def test_string_serialization(self, text: str) -> None: + result: str = serialize(text) + if len(text) > MAX_TEXT_LENGTH: + expected: str = text[:MAX_TEXT_LENGTH] + "..." + assert result == expected + else: + assert result == text + + @settings(max_examples=100) + @given(data=bytes_strategy) + def test_bytes_serialization(self, data: bytes) -> None: + result: str = serialize(data) + decoded: str = data.decode("utf-8", errors="ignore") + if len(decoded) > MAX_TEXT_LENGTH: + expected: str = decoded[:MAX_TEXT_LENGTH] + "..." + assert result == expected + else: + assert result == decoded + + @settings(max_examples=100) + @given(dt=datetime_strategy) + def test_datetime_serialization(self, dt: datetime) -> None: + result: str = serialize(dt) + assert result == dt.replace(tzinfo=timezone.utc).isoformat() + + @settings(max_examples=100) + @given(dec=decimal_strategy) + def test_decimal_serialization(self, dec) -> None: + result: float = serialize(dec) + assert result == float(dec) + + @settings(max_examples=100) + @given(uid=uuid_strategy) + def test_uuid_serialization(self, uid) -> None: + result: str = serialize(uid) + assert result == str(uid) + + @settings(max_examples=100) + @given(lst=list_strategy) + def test_list_truncation(self, lst: list) -> None: + result: list = serialize(lst) + if len(lst) > MAX_ITEMS_LENGTH: + assert len(result) == MAX_ITEMS_LENGTH + 1 + assert f"... [truncated {len(lst) - MAX_ITEMS_LENGTH} items]" in result + else: + assert result == lst + + @settings(max_examples=100) + @given(dct=dict_strategy) + def test_dict_serialization(self, dct: dict) -> None: + result: dict = serialize(dct) + assert isinstance(result, dict) + for k, v in result.items(): + assert isinstance(k, str) + assert isinstance(v, int | float | str | bool | type(None)) + + @settings(max_examples=100) + @given(value=st.integers()) + def test_pydantic_modern_model(self, value: int) -> None: + model: ModernModel = ModernModel(name="test", value=value) + result: dict = serialize(model) + assert result == {"name": "test", "value": value} + + @settings(max_examples=100) + @given(value=st.integers()) + def test_pydantic_v1_model(self, value: int) -> None: + model: LegacyModel = LegacyModel(name="test", value=value) + result: dict = serialize(model) + assert result == {"name": "test", "value": value} + + def test_async_iterator_handling(self) -> None: + async def async_gen(): + yield 1 + yield 2 + + gen = async_gen() + result: str = serialize(gen) + assert result == "Unconsumed Stream" + + @settings(max_examples=100) + @given(data=st.one_of(st.integers(), st.floats(allow_nan=True), st.booleans(), st.none())) + def test_primitive_types(self, data: float | bool | None) -> None: + result: int | float | bool | None = serialize(data) + if isinstance(data, float) and math.isnan(data) and isinstance(result, float): + assert math.isnan(result) + else: + assert result == data + + @settings(max_examples=100) + @given(nested=nested_strategy) + def test_nested_structures(self, nested: Any) -> None: + result: list | dict | int | float | str | bool = serialize(nested) + assert isinstance(result, list | dict | int | float | str | bool) + + @settings(max_examples=100) + @given(text=text_strategy) + def test_max_length_none(self, text: str) -> None: + result: str = serialize(text, max_length=None) + assert result == text + + @settings(max_examples=100) + @given(lst=list_strategy) + def test_max_items_none(self, lst: list) -> None: + result: list = serialize(lst, max_items=None) + assert result == lst + + @settings(max_examples=100) + @given(obj=st.builds(object)) + def test_fallback_serialization(self, obj: object) -> None: + result: str = serialize_or_str(obj) + assert isinstance(result, str) + assert str(obj) in result + + def test_document_serialization(self) -> None: + doc: Document = Document(page_content="test", metadata={"source": "test"}) + result: dict = serialize(doc) + assert isinstance(result, dict) + assert "kwargs" in result + assert "page_content" in result["kwargs"] + assert result["kwargs"]["page_content"] == "test" + assert "metadata" in result["kwargs"] + assert result["kwargs"]["metadata"] == {"source": "test"} + + def test_class_serialization(self) -> None: + class TestClass: + def __init__(self, value: Any) -> None: + self.value = value + + result: str = serialize(TestClass) + assert result == str(TestClass) + + def test_instance_serialization(self) -> None: + class TestClass: + def __init__(self, value: int) -> None: + self.value = value + + instance: TestClass = TestClass(42) + result: str = serialize(instance) + assert result == str(instance) + + def test_pydantic_class_serialization(self) -> None: + result: str = serialize(ModernModel) + assert result == repr(ModernModel) + + def test_builtin_type_serialization(self) -> None: + result: str = serialize(int) + assert result == repr(int) + + def test_none_serialization(self) -> None: + result: None = serialize(None) + assert result is None + + def test_custom_type_serialization(self) -> None: + from typing import TypeVar + + T = TypeVar("T") + result: str = serialize(T) + assert result == repr(T) + + def test_nested_class_serialization(self) -> None: + class Outer: + class Inner: + pass + + result: str = serialize(Outer.Inner) + assert result == str(Outer.Inner) + + def test_enum_serialization(self) -> None: + from enum import Enum + + class TestEnum(Enum): + A = 1 + B = 2 + + result: str = serialize(TestEnum.A) + assert result == "TestEnum.A" + + def test_type_alias_serialization(self) -> None: + IntList = list[int] # noqa: N806 + result: str = serialize(IntList) + assert result == repr(IntList) + + def test_generic_type_serialization(self) -> None: + from typing import Generic, TypeVar + + T = TypeVar("T") + + class Box(Generic[T]): + pass + + result: str = serialize(Box[int]) + assert result == repr(Box[int]) From 9767766632bd71fb788e79a74d27bdc0d2ad5a66 Mon Sep 17 00:00:00 2001 From: Gabriel Luiz Freitas Almeida Date: Fri, 31 Jan 2025 13:06:05 -0300 Subject: [PATCH 05/23] refactor: Replace custom serialization logic with unified serialize function for consistency and maintainability --- src/backend/base/langflow/api/v1/schemas.py | 62 +++---------------- .../tests/unit/api/v1/test_api_schemas.py | 47 +++++++------- 2 files changed, 32 insertions(+), 77 deletions(-) diff --git a/src/backend/base/langflow/api/v1/schemas.py b/src/backend/base/langflow/api/v1/schemas.py index 49021c436da5..ffecb8cbc4ee 100644 --- a/src/backend/base/langflow/api/v1/schemas.py +++ b/src/backend/base/langflow/api/v1/schemas.py @@ -1,5 +1,4 @@ from datetime import datetime, timezone -from decimal import Decimal from enum import Enum from pathlib import Path from typing import Any @@ -11,13 +10,14 @@ from langflow.schema import dotdict from langflow.schema.graph import Tweaks from langflow.schema.schema import InputType, OutputType, OutputValue +from langflow.serialization.serialization import serialize from langflow.services.database.models.api_key.model import ApiKeyRead from langflow.services.database.models.base import orjson_dumps from langflow.services.database.models.flow import FlowCreate, FlowRead from langflow.services.database.models.user import UserRead from langflow.services.settings.feature_flags import FeatureFlags from langflow.services.tracing.schema import Log -from langflow.utils.constants import MAX_ITEMS_LENGTH, MAX_TEXT_LENGTH +from langflow.utils.constants import MAX_TEXT_LENGTH from langflow.utils.util_strings import truncate_long_strings @@ -271,64 +271,18 @@ class ResultDataResponse(BaseModel): def serialize_results(cls, v): """Serialize results with custom handling for special types and truncation.""" if isinstance(v, dict): - return {key: cls._serialize_and_truncate(val, max_length=MAX_TEXT_LENGTH) for key, val in v.items()} - return cls._serialize_and_truncate(v, max_length=MAX_TEXT_LENGTH) - - @staticmethod - def _serialize_and_truncate(obj: Any, max_length: int = MAX_TEXT_LENGTH) -> Any: - """Helper method to serialize and truncate values.""" - if isinstance(obj, bytes): - obj = obj.decode("utf-8", errors="ignore") - if len(obj) > max_length: - return f"{obj[:max_length]}... [truncated]" - return obj - if isinstance(obj, str): - if len(obj) > max_length: - return f"{obj[:max_length]}... [truncated]" - return obj - if isinstance(obj, datetime): - return obj.replace(tzinfo=timezone.utc).isoformat() - if isinstance(obj, Decimal): - return float(obj) - if isinstance(obj, UUID): - return str(obj) - if isinstance(obj, OutputValue | Log): - # First serialize the model - serialized = obj.model_dump() - # Then recursively truncate all values in the serialized dict - for key, value in serialized.items(): - # Handle string values directly to ensure proper truncation - if isinstance(value, str) and len(value) > max_length: - serialized[key] = f"{value[:max_length]}... [truncated]" - else: - serialized[key] = ResultDataResponse._serialize_and_truncate(value, max_length=max_length) - return serialized - if isinstance(obj, BaseModel): - # For other BaseModel instances, serialize all fields - serialized = obj.model_dump() - return { - k: ResultDataResponse._serialize_and_truncate(v, max_length=max_length) for k, v in serialized.items() - } - if isinstance(obj, dict): - return {k: ResultDataResponse._serialize_and_truncate(v, max_length=max_length) for k, v in obj.items()} - if isinstance(obj, list | tuple): - # If list is too long, truncate it - if len(obj) > MAX_ITEMS_LENGTH: - truncated_list = list(obj)[:MAX_ITEMS_LENGTH] - truncated_list.append(f"... [truncated {len(obj) - MAX_ITEMS_LENGTH} items]") - obj = truncated_list - return [ResultDataResponse._serialize_and_truncate(item, max_length=max_length) for item in obj] - return obj + return {key: serialize(val, max_length=MAX_TEXT_LENGTH) for key, val in v.items()} + return serialize(v, max_length=MAX_TEXT_LENGTH) @model_serializer(mode="plain") def serialize_model(self) -> dict: """Custom serializer for the entire model.""" return { "results": self.serialize_results(self.results), - "outputs": self._serialize_and_truncate(self.outputs, max_length=MAX_TEXT_LENGTH), - "logs": self._serialize_and_truncate(self.logs, max_length=MAX_TEXT_LENGTH), - "message": self._serialize_and_truncate(self.message, max_length=MAX_TEXT_LENGTH), - "artifacts": self._serialize_and_truncate(self.artifacts, max_length=MAX_TEXT_LENGTH), + "outputs": serialize(self.outputs, max_length=MAX_TEXT_LENGTH), + "logs": serialize(self.logs, max_length=MAX_TEXT_LENGTH), + "message": serialize(self.message, max_length=MAX_TEXT_LENGTH), + "artifacts": serialize(self.artifacts, max_length=MAX_TEXT_LENGTH), "timedelta": self.timedelta, "duration": self.duration, "used_frozen_result": self.used_frozen_result, diff --git a/src/backend/tests/unit/api/v1/test_api_schemas.py b/src/backend/tests/unit/api/v1/test_api_schemas.py index 26844eb6ccd5..2a73afe2290b 100644 --- a/src/backend/tests/unit/api/v1/test_api_schemas.py +++ b/src/backend/tests/unit/api/v1/test_api_schemas.py @@ -4,6 +4,7 @@ from hypothesis import strategies as st from langflow.api.v1.schemas import ResultDataResponse, VertexBuildResponse from langflow.schema.schema import OutputValue +from langflow.serialization import serialize from langflow.services.tracing.schema import Log from pydantic import BaseModel @@ -26,9 +27,9 @@ def test_result_data_response_truncation(long_string): ) response.serialize_model() - truncated = response._serialize_and_truncate(long_string, max_length=TEST_TEXT_LENGTH) - assert len(truncated) <= TEST_TEXT_LENGTH + len("... [truncated]") - assert "... [truncated]" in truncated + truncated = serialize(long_string, max_length=TEST_TEXT_LENGTH) + assert len(truncated) <= TEST_TEXT_LENGTH + len("...") + assert "..." in truncated @given( @@ -77,20 +78,20 @@ def test_result_data_response_nested_structures(long_list, long_dict): "dict": long_dict, } - response = ResultDataResponse(results=nested_data) - serialized = response._serialize_and_truncate(nested_data, max_length=TEST_TEXT_LENGTH) + ResultDataResponse(results=nested_data) + serialized = serialize(nested_data, max_length=TEST_TEXT_LENGTH) # Check list items for item in serialized["list"]: - assert len(item) <= TEST_TEXT_LENGTH + len("... [truncated]") + assert len(item) <= TEST_TEXT_LENGTH + len("...") if len(item) > TEST_TEXT_LENGTH: - assert "... [truncated]" in item + assert "..." in item # Check dict values for val in serialized["dict"].values(): - assert len(val) <= TEST_TEXT_LENGTH + len("... [truncated]") + assert len(val) <= TEST_TEXT_LENGTH + len("...") if len(val) > TEST_TEXT_LENGTH: - assert "... [truncated]" in val + assert "..." in val @given( @@ -114,7 +115,7 @@ def test_result_data_response_outputs(outputs_dict): outputs = {key: OutputValue(type="text", message=value) for key, value in outputs_dict.items()} response = ResultDataResponse(outputs=outputs) - serialized = ResultDataResponse._serialize_and_truncate(response, max_length=TEST_TEXT_LENGTH) + serialized = serialize(response, max_length=TEST_TEXT_LENGTH) # Check outputs are properly serialized and truncated for key, value in outputs_dict.items(): @@ -124,9 +125,9 @@ def test_result_data_response_outputs(outputs_dict): # Check message truncation message = serialized_output["message"] - assert len(message) <= TEST_TEXT_LENGTH + len("... [truncated]"), f"Message length: {len(message)}" + assert len(message) <= TEST_TEXT_LENGTH + len("..."), f"Message length: {len(message)}" if len(value) > TEST_TEXT_LENGTH: - assert "... [truncated]" in message + assert "..." in message assert message.startswith(value[:TEST_TEXT_LENGTH]) else: assert message == value @@ -158,7 +159,7 @@ def test_result_data_response_logs(log_messages): } response = ResultDataResponse(logs=logs) - serialized = ResultDataResponse._serialize_and_truncate(response, max_length=TEST_TEXT_LENGTH) + serialized = serialize(response, max_length=TEST_TEXT_LENGTH) # Check logs are properly serialized and truncated assert "test_node" in serialized["logs"] @@ -171,9 +172,9 @@ def test_result_data_response_logs(log_messages): # Check message truncation message = serialized_log["message"] - assert len(message) <= TEST_TEXT_LENGTH + len("... [truncated]") + assert len(message) <= TEST_TEXT_LENGTH + len("...") if len(log_msg) > TEST_TEXT_LENGTH: - assert "... [truncated]" in message + assert "..." in message assert message.startswith(log_msg[:TEST_TEXT_LENGTH]) else: assert message == log_msg @@ -225,7 +226,7 @@ def test_result_data_response_combined_fields(outputs_dict, log_messages): message={"text": "test"}, artifacts={"file": "test.txt"}, ) - serialized = ResultDataResponse._serialize_and_truncate(response, max_length=TEST_TEXT_LENGTH) + serialized = serialize(response, max_length=TEST_TEXT_LENGTH) # Check all fields are present assert "outputs" in serialized @@ -243,8 +244,8 @@ def test_result_data_response_combined_fields(outputs_dict, log_messages): # Check message truncation message = serialized_output["message"] if len(value) > TEST_TEXT_LENGTH: - assert len(message) <= TEST_TEXT_LENGTH + len("... [truncated]") - assert "... [truncated]" in message + assert len(message) <= TEST_TEXT_LENGTH + len("...") + assert "..." in message else: assert message == value @@ -260,8 +261,8 @@ def test_result_data_response_combined_fields(outputs_dict, log_messages): # Check message truncation message = serialized_log["message"] if len(log_msg) > TEST_TEXT_LENGTH: - assert len(message) <= TEST_TEXT_LENGTH + len("... [truncated]") - assert "... [truncated]" in message + assert len(message) <= TEST_TEXT_LENGTH + len("...") + assert "..." in message else: assert message == log_msg @@ -311,6 +312,6 @@ def test_vertex_build_response_with_long_data(long_string): ) response.model_dump() - truncated = result_data._serialize_and_truncate(long_string, max_length=TEST_TEXT_LENGTH) - assert len(truncated) <= TEST_TEXT_LENGTH + len("... [truncated]") - assert "... [truncated]" in truncated + truncated = serialize(long_string, max_length=TEST_TEXT_LENGTH) + assert len(truncated) <= TEST_TEXT_LENGTH + len("...") + assert "..." in truncated From b20e0f9a4422f53b0f1ae990114f6aa5d4aa159d Mon Sep 17 00:00:00 2001 From: Gabriel Luiz Freitas Almeida Date: Fri, 31 Jan 2025 13:06:49 -0300 Subject: [PATCH 06/23] refactor: Replace recursive serialization function with unified serialize method for improved clarity and maintainability --- src/backend/base/langflow/schema/artifact.py | 4 +- src/backend/base/langflow/schema/schema.py | 4 +- src/backend/base/langflow/schema/serialize.py | 43 +------------------ 3 files changed, 5 insertions(+), 46 deletions(-) diff --git a/src/backend/base/langflow/schema/artifact.py b/src/backend/base/langflow/schema/artifact.py index 03be57867aa4..d5b248f28151 100644 --- a/src/backend/base/langflow/schema/artifact.py +++ b/src/backend/base/langflow/schema/artifact.py @@ -9,7 +9,7 @@ from langflow.schema.dataframe import DataFrame from langflow.schema.encoders import CUSTOM_ENCODERS from langflow.schema.message import Message -from langflow.schema.serialize import recursive_serialize_or_str +from langflow.serialization.serialization import serialize class ArtifactType(str, Enum): @@ -56,7 +56,7 @@ def _to_list_of_dicts(raw): raw_ = [] for item in raw: if hasattr(item, "dict") or hasattr(item, "model_dump"): - raw_.append(recursive_serialize_or_str(item)) + raw_.append(serialize(item)) else: raw_.append(str(item)) return raw_ diff --git a/src/backend/base/langflow/schema/schema.py b/src/backend/base/langflow/schema/schema.py index 428c24e87878..fb7ada8e02f3 100644 --- a/src/backend/base/langflow/schema/schema.py +++ b/src/backend/base/langflow/schema/schema.py @@ -8,7 +8,7 @@ from langflow.schema.data import Data from langflow.schema.dataframe import DataFrame from langflow.schema.message import Message -from langflow.schema.serialize import recursive_serialize_or_str +from langflow.serialization.serialization import serialize INPUT_FIELD_NAME = "input_value" @@ -110,7 +110,7 @@ def build_output_logs(vertex, result) -> dict: case LogType.ARRAY: if isinstance(message, DataFrame): message = message.to_dict(orient="records") - message = [recursive_serialize_or_str(item) for item in message] + message = [serialize(item) for item in message] name = output.get("name", f"output_{index}") outputs |= {name: OutputValue(message=message, type=type_).model_dump()} diff --git a/src/backend/base/langflow/schema/serialize.py b/src/backend/base/langflow/schema/serialize.py index d3f98053d028..272445c3efc0 100644 --- a/src/backend/base/langflow/schema/serialize.py +++ b/src/backend/base/langflow/schema/serialize.py @@ -1,11 +1,7 @@ -from collections.abc import AsyncIterator, Generator, Iterator -from datetime import datetime from typing import Annotated from uuid import UUID -from loguru import logger -from pydantic import BaseModel, BeforeValidator -from pydantic.v1 import BaseModel as BaseModelV1 +from pydantic import BeforeValidator def str_to_uuid(v: str | UUID) -> UUID: @@ -15,40 +11,3 @@ def str_to_uuid(v: str | UUID) -> UUID: UUIDstr = Annotated[UUID, BeforeValidator(str_to_uuid)] - - -def recursive_serialize_or_str(obj): - try: - if isinstance(obj, type) and issubclass(obj, BaseModel | BaseModelV1): - # This a type BaseModel and not an instance of it - return repr(obj) - if isinstance(obj, str): - return obj - if isinstance(obj, datetime): - return obj.isoformat() - if isinstance(obj, dict): - return {k: recursive_serialize_or_str(v) for k, v in obj.items()} - if isinstance(obj, list): - return [recursive_serialize_or_str(v) for v in obj] - if isinstance(obj, BaseModel | BaseModelV1): - if hasattr(obj, "model_dump"): - obj_dict = obj.model_dump() - elif hasattr(obj, "dict"): - obj_dict = obj.dict() - return {k: recursive_serialize_or_str(v) for k, v in obj_dict.items()} - - if isinstance(obj, AsyncIterator | Generator | Iterator): - # contain memory addresses - # without consuming the iterator - # return list(obj) consumes the iterator - # return f"{obj}" this generates '' - # it is not useful - return "Unconsumed Stream" - if hasattr(obj, "dict") and not isinstance(obj, type): - return {k: recursive_serialize_or_str(v) for k, v in obj.dict().items()} - if hasattr(obj, "model_dump") and not isinstance(obj, type): - return {k: recursive_serialize_or_str(v) for k, v in obj.model_dump().items()} - return str(obj) - except Exception: # noqa: BLE001 - logger.debug(f"Cannot serialize object {obj}") - return str(obj) From 9acdbfc2c103069e5f4fd19c0029b776f1d412a8 Mon Sep 17 00:00:00 2001 From: Gabriel Luiz Freitas Almeida Date: Fri, 31 Jan 2025 13:06:56 -0300 Subject: [PATCH 07/23] refactor: Replace custom serialization logic with unified serialize function for improved consistency and clarity --- src/backend/base/langflow/graph/schema.py | 6 ++-- src/backend/base/langflow/graph/utils.py | 32 ++----------------- .../langflow/graph/vertex/vertex_types.py | 5 +-- 3 files changed, 9 insertions(+), 34 deletions(-) diff --git a/src/backend/base/langflow/graph/schema.py b/src/backend/base/langflow/graph/schema.py index d792868c16db..407b30640918 100644 --- a/src/backend/base/langflow/graph/schema.py +++ b/src/backend/base/langflow/graph/schema.py @@ -3,8 +3,8 @@ from pydantic import BaseModel, Field, field_serializer, model_validator -from langflow.graph.utils import serialize_field from langflow.schema.schema import OutputValue, StreamURL +from langflow.serialization import serialize from langflow.utils.schemas import ChatOutputResponse, ContainsEnumMeta @@ -23,8 +23,8 @@ class ResultData(BaseModel): @field_serializer("results") def serialize_results(self, value): if isinstance(value, dict): - return {key: serialize_field(val) for key, val in value.items()} - return serialize_field(value) + return {key: serialize(val) for key, val in value.items()} + return serialize(value) @model_validator(mode="before") @classmethod diff --git a/src/backend/base/langflow/graph/utils.py b/src/backend/base/langflow/graph/utils.py index 5589c0b3d78c..e75e4f7b17e4 100644 --- a/src/backend/base/langflow/graph/utils.py +++ b/src/backend/base/langflow/graph/utils.py @@ -6,14 +6,12 @@ from typing import TYPE_CHECKING, Any from uuid import UUID -from langchain_core.documents import Document from loguru import logger -from pydantic import BaseModel -from pydantic.v1 import BaseModel as V1BaseModel from langflow.interface.utils import extract_input_variables_from_prompt from langflow.schema.data import Data from langflow.schema.message import Message +from langflow.serialization import serialize from langflow.services.database.models.transactions.crud import log_transaction as crud_log_transaction from langflow.services.database.models.transactions.model import TransactionBase from langflow.services.database.models.vertex_builds.crud import log_vertex_build as crud_log_vertex_build @@ -68,30 +66,6 @@ def flatten_list(list_of_lists: list[list | Any]) -> list: return new_list -def serialize_field(value): - """Serialize field. - - Unified serialization function for handling both BaseModel and Document types, - including handling lists of these types. - """ - if isinstance(value, list | tuple): - return [serialize_field(v) for v in value] - if isinstance(value, Document): - return value.to_json() - if isinstance(value, BaseModel): - return serialize_field(value.model_dump()) - if isinstance(value, dict): - return {k: serialize_field(v) for k, v in value.items()} - if isinstance(value, V1BaseModel): - if hasattr(value, "to_json"): - return value.to_json() - return value.dict() - # Handle datetime objects - if hasattr(value, "isoformat"): - return value.isoformat() - return str(value) - - def get_artifact_type(value, build_result) -> str: result = ArtifactType.UNKNOWN match value: @@ -186,9 +160,9 @@ async def log_vertex_build( valid=valid, params=str(params) if params else None, # Serialize data using our custom serializer - data=serialize_field(data), + data=serialize(data), # Serialize artifacts using our custom serializer - artifacts=serialize_field(artifacts) if artifacts else None, + artifacts=serialize(artifacts) if artifacts else None, ) async with session_getter(get_db_service()) as session: inserted = await crud_log_vertex_build(session, vertex_build) diff --git a/src/backend/base/langflow/graph/vertex/vertex_types.py b/src/backend/base/langflow/graph/vertex/vertex_types.py index 45412da0b4ce..cebc0cb9c24a 100644 --- a/src/backend/base/langflow/graph/vertex/vertex_types.py +++ b/src/backend/base/langflow/graph/vertex/vertex_types.py @@ -10,13 +10,14 @@ from loguru import logger from langflow.graph.schema import CHAT_COMPONENTS, RECORDS_COMPONENTS, InterfaceComponentTypes, ResultData -from langflow.graph.utils import UnbuiltObject, log_vertex_build, rewrite_file_path, serialize_field +from langflow.graph.utils import UnbuiltObject, log_vertex_build, rewrite_file_path from langflow.graph.vertex.base import Vertex from langflow.graph.vertex.exceptions import NoComponentInstanceError from langflow.schema import Data from langflow.schema.artifact import ArtifactType from langflow.schema.message import Message from langflow.schema.schema import INPUT_FIELD_NAME +from langflow.serialization import serialize from langflow.template.field.base import UNDEFINED, Output from langflow.utils.schemas import ChatOutputResponse, DataOutputResponse from langflow.utils.util import unescape_string @@ -478,6 +479,6 @@ def built_object_repr(self): def dict_to_codeblock(d: dict) -> str: - serialized = {key: serialize_field(val) for key, val in d.items()} + serialized = {key: serialize(val) for key, val in d.items()} json_str = json.dumps(serialized, indent=4) return f"```json\n{json_str}\n```" From bbb52867b6d222a726df2c7b0011c1dee9017659 Mon Sep 17 00:00:00 2001 From: Gabriel Luiz Freitas Almeida Date: Fri, 31 Jan 2025 13:16:46 -0300 Subject: [PATCH 08/23] refactor: Enhance serialization logic by adding instance handling and streamlining type checks --- .../langflow/serialization/serialization.py | 26 +++++++++++-------- 1 file changed, 15 insertions(+), 11 deletions(-) diff --git a/src/backend/base/langflow/serialization/serialization.py b/src/backend/base/langflow/serialization/serialization.py index c69f0d4e013f..cb1e400d5e51 100644 --- a/src/backend/base/langflow/serialization/serialization.py +++ b/src/backend/base/langflow/serialization/serialization.py @@ -86,6 +86,11 @@ def _serialize_primitive(obj: Any, *_) -> Any: return None +def _serialize_instance(obj: Any, *_) -> str: + """Handle regular class instances by converting to string.""" + return str(obj) + + def _serialize_dispatcher(obj: Any, max_length: int | None, max_items: int | None) -> Any | None: """Dispatch object to appropriate serializer.""" # Handle primitive types first @@ -118,16 +123,15 @@ def _serialize_dispatcher(obj: Any, max_length: int | None, max_items: int | Non return _serialize_dict(obj, max_length, max_items) case list() | tuple(): return _serialize_list_tuple(obj, max_length, max_items) + case object() if not isinstance(obj, type): # Match any instance that's not a class + return _serialize_instance(obj, max_length, max_items) + case object() if hasattr(obj, "_name_"): # Enum case + return f"{obj.__class__.__name__}.{obj._name_}" + case object() if hasattr(obj, "__name__") and hasattr(obj, "__bound__"): # TypeVar case + return repr(obj) + case object() if hasattr(obj, "__origin__") or hasattr(obj, "__parameters__"): # Type alias/generic case + return repr(obj) case _: - # Handle enums - if hasattr(obj, "_name_"): # Enum check - return f"{obj.__class__.__name__}.{obj._name_}" - # Handle TypeVars - if hasattr(obj, "__name__") and hasattr(obj, "__bound__"): - return repr(obj) - # Handle type aliases and generic types - if hasattr(obj, "__origin__") or hasattr(obj, "__parameters__"): - return repr(obj) return None @@ -174,8 +178,8 @@ def serialize( if hasattr(obj, "dict") and not isinstance(obj, type): return serialize(obj.dict(), max_length, max_items) - # Final fallback to string conversion - if to_str or not isinstance(obj, type): # Convert instances to string + # Final fallback to string conversion only if explicitly requested + if to_str: return str(obj) except Exception as e: # noqa: BLE001 From 4b563dc81b357454d1e88e90f7fc7a056a6cd713 Mon Sep 17 00:00:00 2001 From: Gabriel Luiz Freitas Almeida Date: Fri, 31 Jan 2025 13:17:44 -0300 Subject: [PATCH 09/23] refactor: Remove custom dictionary serialization from ResultDataResponse for streamlined handling --- src/backend/base/langflow/api/v1/schemas.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/backend/base/langflow/api/v1/schemas.py b/src/backend/base/langflow/api/v1/schemas.py index ffecb8cbc4ee..9f5c31e77089 100644 --- a/src/backend/base/langflow/api/v1/schemas.py +++ b/src/backend/base/langflow/api/v1/schemas.py @@ -270,8 +270,6 @@ class ResultDataResponse(BaseModel): @classmethod def serialize_results(cls, v): """Serialize results with custom handling for special types and truncation.""" - if isinstance(v, dict): - return {key: serialize(val, max_length=MAX_TEXT_LENGTH) for key, val in v.items()} return serialize(v, max_length=MAX_TEXT_LENGTH) @model_serializer(mode="plain") From d8182dc937262289240b3aded79040dff8cbdee9 Mon Sep 17 00:00:00 2001 From: Gabriel Luiz Freitas Almeida Date: Fri, 31 Jan 2025 13:19:43 -0300 Subject: [PATCH 10/23] refactor: Enhance serialization in ResultDataResponse by adding max_items_length for improved handling of outputs, logs, messages, and artifacts --- src/backend/base/langflow/api/v1/schemas.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/backend/base/langflow/api/v1/schemas.py b/src/backend/base/langflow/api/v1/schemas.py index 9f5c31e77089..05f674cdf66f 100644 --- a/src/backend/base/langflow/api/v1/schemas.py +++ b/src/backend/base/langflow/api/v1/schemas.py @@ -17,7 +17,7 @@ from langflow.services.database.models.user import UserRead from langflow.services.settings.feature_flags import FeatureFlags from langflow.services.tracing.schema import Log -from langflow.utils.constants import MAX_TEXT_LENGTH +from langflow.utils.constants import MAX_ITEMS_LENGTH, MAX_TEXT_LENGTH from langflow.utils.util_strings import truncate_long_strings @@ -270,17 +270,17 @@ class ResultDataResponse(BaseModel): @classmethod def serialize_results(cls, v): """Serialize results with custom handling for special types and truncation.""" - return serialize(v, max_length=MAX_TEXT_LENGTH) + return serialize(v, max_length=MAX_TEXT_LENGTH, max_items=MAX_ITEMS_LENGTH) @model_serializer(mode="plain") def serialize_model(self) -> dict: """Custom serializer for the entire model.""" return { "results": self.serialize_results(self.results), - "outputs": serialize(self.outputs, max_length=MAX_TEXT_LENGTH), - "logs": serialize(self.logs, max_length=MAX_TEXT_LENGTH), - "message": serialize(self.message, max_length=MAX_TEXT_LENGTH), - "artifacts": serialize(self.artifacts, max_length=MAX_TEXT_LENGTH), + "outputs": serialize(self.outputs, max_length=MAX_TEXT_LENGTH, max_items=MAX_ITEMS_LENGTH), + "logs": serialize(self.logs, max_length=MAX_TEXT_LENGTH, max_items=MAX_ITEMS_LENGTH), + "message": serialize(self.message, max_length=MAX_TEXT_LENGTH, max_items=MAX_ITEMS_LENGTH), + "artifacts": serialize(self.artifacts, max_length=MAX_TEXT_LENGTH, max_items=MAX_ITEMS_LENGTH), "timedelta": self.timedelta, "duration": self.duration, "used_frozen_result": self.used_frozen_result, From a898ab1f1bc87182294cf58c2b12596cf58e43cb Mon Sep 17 00:00:00 2001 From: Gabriel Luiz Freitas Almeida Date: Fri, 31 Jan 2025 13:19:55 -0300 Subject: [PATCH 11/23] refactor: Move MAX_ITEMS_LENGTH and MAX_TEXT_LENGTH constants to serialization module for better organization --- src/backend/base/langflow/api/v1/schemas.py | 2 +- src/backend/base/langflow/serialization/constants.py | 2 ++ src/backend/base/langflow/serialization/serialization.py | 2 +- src/backend/base/langflow/services/database/utils.py | 2 +- src/backend/base/langflow/utils/constants.py | 3 --- src/backend/base/langflow/utils/util_strings.py | 2 +- src/backend/tests/unit/serialization/test_serialization.py | 2 +- src/backend/tests/unit/utils/test_truncate_long_strings.py | 2 +- .../tests/unit/utils/test_truncate_long_strings_on_objects.py | 2 +- 9 files changed, 9 insertions(+), 10 deletions(-) create mode 100644 src/backend/base/langflow/serialization/constants.py diff --git a/src/backend/base/langflow/api/v1/schemas.py b/src/backend/base/langflow/api/v1/schemas.py index 05f674cdf66f..0d90d12df930 100644 --- a/src/backend/base/langflow/api/v1/schemas.py +++ b/src/backend/base/langflow/api/v1/schemas.py @@ -10,6 +10,7 @@ from langflow.schema import dotdict from langflow.schema.graph import Tweaks from langflow.schema.schema import InputType, OutputType, OutputValue +from langflow.serialization.constants import MAX_ITEMS_LENGTH, MAX_TEXT_LENGTH from langflow.serialization.serialization import serialize from langflow.services.database.models.api_key.model import ApiKeyRead from langflow.services.database.models.base import orjson_dumps @@ -17,7 +18,6 @@ from langflow.services.database.models.user import UserRead from langflow.services.settings.feature_flags import FeatureFlags from langflow.services.tracing.schema import Log -from langflow.utils.constants import MAX_ITEMS_LENGTH, MAX_TEXT_LENGTH from langflow.utils.util_strings import truncate_long_strings diff --git a/src/backend/base/langflow/serialization/constants.py b/src/backend/base/langflow/serialization/constants.py new file mode 100644 index 000000000000..51973eecc6e3 --- /dev/null +++ b/src/backend/base/langflow/serialization/constants.py @@ -0,0 +1,2 @@ +MAX_TEXT_LENGTH = 20000 +MAX_ITEMS_LENGTH = 1000 diff --git a/src/backend/base/langflow/serialization/serialization.py b/src/backend/base/langflow/serialization/serialization.py index cb1e400d5e51..88b4a6babeaf 100644 --- a/src/backend/base/langflow/serialization/serialization.py +++ b/src/backend/base/langflow/serialization/serialization.py @@ -9,7 +9,7 @@ from pydantic import BaseModel from pydantic.v1 import BaseModel as BaseModelV1 -from langflow.utils.constants import MAX_ITEMS_LENGTH, MAX_TEXT_LENGTH +from langflow.serialization.constants import MAX_ITEMS_LENGTH, MAX_TEXT_LENGTH def _serialize_str(obj: str, max_length: int | None, _) -> str: diff --git a/src/backend/base/langflow/services/database/utils.py b/src/backend/base/langflow/services/database/utils.py index 9143dd7ff90d..9af153507dc8 100644 --- a/src/backend/base/langflow/services/database/utils.py +++ b/src/backend/base/langflow/services/database/utils.py @@ -10,7 +10,7 @@ from sqlmodel import text from sqlmodel.ext.asyncio.session import AsyncSession -from langflow.utils import constants +from langflow.serialization import constants if TYPE_CHECKING: from langflow.services.database.service import DatabaseService diff --git a/src/backend/base/langflow/utils/constants.py b/src/backend/base/langflow/utils/constants.py index 3234230d6df3..3e9e3545b7c1 100644 --- a/src/backend/base/langflow/utils/constants.py +++ b/src/backend/base/langflow/utils/constants.py @@ -173,6 +173,3 @@ def python_function(text: str) -> str: MESSAGE_SENDER_USER = "User" MESSAGE_SENDER_NAME_AI = "AI" MESSAGE_SENDER_NAME_USER = "User" - -MAX_TEXT_LENGTH = 20000 -MAX_ITEMS_LENGTH = 1000 diff --git a/src/backend/base/langflow/utils/util_strings.py b/src/backend/base/langflow/utils/util_strings.py index 47503f7846e7..954e46004216 100644 --- a/src/backend/base/langflow/utils/util_strings.py +++ b/src/backend/base/langflow/utils/util_strings.py @@ -1,6 +1,6 @@ from sqlalchemy.engine import make_url -from langflow.utils import constants +from langflow.serialization import constants def truncate_long_strings(data, max_length=None): diff --git a/src/backend/tests/unit/serialization/test_serialization.py b/src/backend/tests/unit/serialization/test_serialization.py index f79650bff73b..7e454fba02fa 100644 --- a/src/backend/tests/unit/serialization/test_serialization.py +++ b/src/backend/tests/unit/serialization/test_serialization.py @@ -6,8 +6,8 @@ from hypothesis import given, settings from hypothesis import strategies as st from langchain_core.documents import Document +from langflow.serialization.constants import MAX_ITEMS_LENGTH, MAX_TEXT_LENGTH from langflow.serialization.serialization import serialize, serialize_or_str -from langflow.utils.constants import MAX_ITEMS_LENGTH, MAX_TEXT_LENGTH from pydantic import BaseModel as PydanticBaseModel from pydantic.v1 import BaseModel as PydanticV1BaseModel diff --git a/src/backend/tests/unit/utils/test_truncate_long_strings.py b/src/backend/tests/unit/utils/test_truncate_long_strings.py index 5a08d5744848..bdb9d855d9a0 100644 --- a/src/backend/tests/unit/utils/test_truncate_long_strings.py +++ b/src/backend/tests/unit/utils/test_truncate_long_strings.py @@ -1,7 +1,7 @@ import math import pytest -from langflow.utils.constants import MAX_TEXT_LENGTH +from langflow.serialization.constants import MAX_TEXT_LENGTH from langflow.utils.util_strings import truncate_long_strings diff --git a/src/backend/tests/unit/utils/test_truncate_long_strings_on_objects.py b/src/backend/tests/unit/utils/test_truncate_long_strings_on_objects.py index eafc3f10b140..20af26e6dbfe 100644 --- a/src/backend/tests/unit/utils/test_truncate_long_strings_on_objects.py +++ b/src/backend/tests/unit/utils/test_truncate_long_strings_on_objects.py @@ -1,5 +1,5 @@ import pytest -from langflow.utils.constants import MAX_TEXT_LENGTH +from langflow.serialization.constants import MAX_TEXT_LENGTH from langflow.utils.util_strings import truncate_long_strings From 1a0116dbd9386a1f3406ac78a4bfca2888acf04e Mon Sep 17 00:00:00 2001 From: Gabriel Luiz Freitas Almeida Date: Fri, 31 Jan 2025 13:44:50 -0300 Subject: [PATCH 12/23] refactor: Simplify message serialization in Log model by utilizing unified serialize function --- .../base/langflow/services/tracing/schema.py | 21 +++---------------- 1 file changed, 3 insertions(+), 18 deletions(-) diff --git a/src/backend/base/langflow/services/tracing/schema.py b/src/backend/base/langflow/services/tracing/schema.py index 4485f578ca5c..cbfc9dee09ae 100644 --- a/src/backend/base/langflow/services/tracing/schema.py +++ b/src/backend/base/langflow/services/tracing/schema.py @@ -1,11 +1,10 @@ import logging -from fastapi.encoders import jsonable_encoder from pydantic import BaseModel, field_serializer -from pydantic.v1 import BaseModel as V1BaseModel from pydantic_core import PydanticSerializationError from langflow.schema.log import LoggableType +from langflow.serialization.serialization import serialize logger = logging.getLogger(__name__) @@ -18,22 +17,8 @@ class Log(BaseModel): @field_serializer("message") def serialize_message(self, value): try: - # We need to make sure everything inside the message has been serialized - if isinstance(value, dict): - return {key: self.serialize_message(value[key]) for key in value} - if isinstance(value, list): - return [self.serialize_message(item) for item in value] - # To json is for LangChain Serializable objects - if hasattr(value, "dict") and isinstance(value, V1BaseModel): - # This is for Pydantic V1 models - return value.dict() - if hasattr(value, "to_json"): - return value.to_json() - if isinstance(value, BaseModel): - return value.model_dump(exclude_none=True) - value = jsonable_encoder(value) + return serialize(value) except UnicodeDecodeError: return str(value) # Fallback to string representation except PydanticSerializationError: - return str(value) - return value + return str(value) # Fallback to string for Pydantic errors From ba01a10fa505e3391db948df363a266bb60fd360 Mon Sep 17 00:00:00 2001 From: Gabriel Luiz Freitas Almeida Date: Fri, 31 Jan 2025 15:56:06 -0300 Subject: [PATCH 13/23] refactor: Remove unnecessary pytest marker from TestSerializationHypothesis class --- src/backend/tests/unit/serialization/test_serialization.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/backend/tests/unit/serialization/test_serialization.py b/src/backend/tests/unit/serialization/test_serialization.py index 7e454fba02fa..f79932f8c2ba 100644 --- a/src/backend/tests/unit/serialization/test_serialization.py +++ b/src/backend/tests/unit/serialization/test_serialization.py @@ -2,7 +2,6 @@ from datetime import datetime, timezone from typing import Any -import pytest from hypothesis import given, settings from hypothesis import strategies as st from langchain_core.documents import Document @@ -46,7 +45,6 @@ class LegacyModel(PydanticV1BaseModel): value: int -@pytest.mark.unit class TestSerializationHypothesis: """Hypothesis-based property tests for serialization logic.""" From 01676d5c7fa06e88d3ca70311027420d77694cbd Mon Sep 17 00:00:00 2001 From: Gabriel Luiz Freitas Almeida Date: Fri, 31 Jan 2025 16:21:02 -0300 Subject: [PATCH 14/23] optimize _serialize_bytes Co-authored-by: codeflash-ai[bot] <148906541+codeflash-ai[bot]@users.noreply.github.com> --- .../base/langflow/serialization/serialization.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/src/backend/base/langflow/serialization/serialization.py b/src/backend/base/langflow/serialization/serialization.py index 88b4a6babeaf..e6ade735711d 100644 --- a/src/backend/base/langflow/serialization/serialization.py +++ b/src/backend/base/langflow/serialization/serialization.py @@ -21,10 +21,13 @@ def _serialize_str(obj: str, max_length: int | None, _) -> str: def _serialize_bytes(obj: bytes, max_length: int | None, _) -> str: """Decode bytes to string and truncate if max_length provided.""" - decoded = obj.decode("utf-8", errors="ignore") - if max_length is None: - return decoded - return decoded[:max_length] + "..." if len(decoded) > max_length else decoded + if max_length is not None: + return ( + obj[:max_length].decode("utf-8", errors="ignore") + "..." + if len(obj) > max_length + else obj.decode("utf-8", errors="ignore") + ) + return obj.decode("utf-8", errors="ignore") def _serialize_datetime(obj: datetime, *_) -> str: From 7ebb30465628d121658677300867ed8fd1cc74da Mon Sep 17 00:00:00 2001 From: Gabriel Luiz Freitas Almeida Date: Fri, 31 Jan 2025 16:27:11 -0300 Subject: [PATCH 15/23] feat: Add support for numpy integer type serialization --- src/backend/base/langflow/serialization/serialization.py | 3 +++ .../tests/unit/serialization/test_serialization.py | 8 ++++++++ 2 files changed, 11 insertions(+) diff --git a/src/backend/base/langflow/serialization/serialization.py b/src/backend/base/langflow/serialization/serialization.py index e6ade735711d..77e8c42b7289 100644 --- a/src/backend/base/langflow/serialization/serialization.py +++ b/src/backend/base/langflow/serialization/serialization.py @@ -86,6 +86,9 @@ def _serialize_primitive(obj: Any, *_) -> Any: """Handle primitive types without conversion.""" if obj is None or isinstance(obj, int | float | bool): return obj + # Handle numpy integer types + if hasattr(obj, "dtype") and "int" in str(obj.dtype): + return int(obj) return None diff --git a/src/backend/tests/unit/serialization/test_serialization.py b/src/backend/tests/unit/serialization/test_serialization.py index f79932f8c2ba..2f822ed84f60 100644 --- a/src/backend/tests/unit/serialization/test_serialization.py +++ b/src/backend/tests/unit/serialization/test_serialization.py @@ -2,6 +2,7 @@ from datetime import datetime, timezone from typing import Any +import numpy as np from hypothesis import given, settings from hypothesis import strategies as st from langchain_core.documents import Document @@ -242,3 +243,10 @@ class Box(Generic[T]): result: str = serialize(Box[int]) assert result == repr(Box[int]) + + def test_numpy_int64_serialization(self) -> None: + """Test serialization of numpy.int64 values.""" + np_int = np.int64(42) + result = serialize(np_int) + assert result == 42 + assert isinstance(result, int) From f16110bfc80365de3157d56e8e03b22eaf213848 Mon Sep 17 00:00:00 2001 From: Gabriel Luiz Freitas Almeida Date: Fri, 31 Jan 2025 16:55:29 -0300 Subject: [PATCH 16/23] feat: Enhance serialization with support for pandas and numpy types --- .../langflow/serialization/serialization.py | 52 +++++++++++++++++-- 1 file changed, 47 insertions(+), 5 deletions(-) diff --git a/src/backend/base/langflow/serialization/serialization.py b/src/backend/base/langflow/serialization/serialization.py index 77e8c42b7289..4a45de4449ec 100644 --- a/src/backend/base/langflow/serialization/serialization.py +++ b/src/backend/base/langflow/serialization/serialization.py @@ -1,9 +1,11 @@ from collections.abc import AsyncIterator, Generator, Iterator from datetime import datetime, timezone from decimal import Decimal -from typing import Any +from typing import Any, cast from uuid import UUID +import numpy as np +import pandas as pd from langchain_core.documents import Document from loguru import logger from pydantic import BaseModel @@ -84,11 +86,8 @@ def _serialize_list_tuple(obj: list | tuple, max_length: int | None, max_items: def _serialize_primitive(obj: Any, *_) -> Any: """Handle primitive types without conversion.""" - if obj is None or isinstance(obj, int | float | bool): + if obj is None or isinstance(obj, int | float | bool | complex | str): return obj - # Handle numpy integer types - if hasattr(obj, "dtype") and "int" in str(obj.dtype): - return int(obj) return None @@ -97,6 +96,31 @@ def _serialize_instance(obj: Any, *_) -> str: return str(obj) +def _truncate_value(value: Any, max_length: int | None, max_items: int | None) -> Any: + """Truncate value based on its type and provided limits.""" + if isinstance(value, str) and max_length is not None and len(value) > max_length: + return value[:max_length] + if isinstance(value, list | tuple) and max_items is not None and len(value) > max_items: + return value[:max_items] + return value + + +def _serialize_dataframe(obj: pd.DataFrame, max_length: int | None, max_items: int | None) -> list[dict]: + """Serialize pandas DataFrame to a dictionary format.""" + if max_items is not None and len(obj) > max_items: + obj = obj.head(max_items) + obj = obj.apply(lambda x: x.apply(lambda y: _truncate_value(y, max_length, max_items))) + return obj.to_dict(orient="records") + + +def _serialize_series(obj: pd.Series, max_length: int | None, max_items: int | None) -> dict: + """Serialize pandas Series to a dictionary format.""" + if max_items is not None and len(obj) > max_items: + obj = obj.head(max_items) + obj = obj.apply(lambda x: _truncate_value(x, max_length, max_items)) + return obj.to_dict() + + def _serialize_dispatcher(obj: Any, max_length: int | None, max_items: int | None) -> Any | None: """Dispatch object to appropriate serializer.""" # Handle primitive types first @@ -127,6 +151,10 @@ def _serialize_dispatcher(obj: Any, max_length: int | None, max_items: int | Non return _serialize_pydantic_v1(obj, max_length, max_items) case dict(): return _serialize_dict(obj, max_length, max_items) + case pd.DataFrame(): + return _serialize_dataframe(obj, max_length, max_items) + case pd.Series(): + return _serialize_series(obj, max_length, max_items) case list() | tuple(): return _serialize_list_tuple(obj, max_length, max_items) case object() if not isinstance(obj, type): # Match any instance that's not a class @@ -138,6 +166,20 @@ def _serialize_dispatcher(obj: Any, max_length: int | None, max_items: int | Non case object() if hasattr(obj, "__origin__") or hasattr(obj, "__parameters__"): # Type alias/generic case return repr(obj) case _: + # Handle numpy numeric types (int, float, bool, complex) + if hasattr(obj, "dtype"): + if np.issubdtype(obj.dtype, np.number) and hasattr(obj, "item"): + return obj.item() + if np.issubdtype(obj.dtype, np.bool_): + return bool(obj) + if np.issubdtype(obj.dtype, np.complexfloating): + return complex(cast(complex, obj)) + if np.issubdtype(obj.dtype, np.str_): + return str(obj) + if np.issubdtype(obj.dtype, np.bytes_) and hasattr(obj, "tobytes"): + return obj.tobytes().decode("utf-8", errors="ignore") + if np.issubdtype(obj.dtype, np.object_) and hasattr(obj, "item"): + return serialize(obj.item()) return None From 550bec4f47ea35c9e38e69b229f1a2238b9753bd Mon Sep 17 00:00:00 2001 From: Gabriel Luiz Freitas Almeida Date: Fri, 31 Jan 2025 16:56:12 -0300 Subject: [PATCH 17/23] test: Add comprehensive serialization tests for numpy and pandas types --- .../unit/serialization/test_serialization.py | 92 +++++++++++++++++++ 1 file changed, 92 insertions(+) diff --git a/src/backend/tests/unit/serialization/test_serialization.py b/src/backend/tests/unit/serialization/test_serialization.py index 2f822ed84f60..32a848b45820 100644 --- a/src/backend/tests/unit/serialization/test_serialization.py +++ b/src/backend/tests/unit/serialization/test_serialization.py @@ -3,6 +3,7 @@ from typing import Any import numpy as np +import pandas as pd from hypothesis import given, settings from hypothesis import strategies as st from langchain_core.documents import Document @@ -250,3 +251,94 @@ def test_numpy_int64_serialization(self) -> None: result = serialize(np_int) assert result == 42 assert isinstance(result, int) + + def test_numpy_numeric_serialization(self) -> None: + """Test serialization of various numpy numeric types.""" + # Test integers + assert serialize(np.int64(42)) == 42 + assert isinstance(serialize(np.int64(42)), int) + + # Test unsigned integers + assert serialize(np.uint64(42)) == 42 + assert isinstance(serialize(np.uint64(42)), int) + + # Test floats + assert serialize(np.float64(3.14)) == 3.14 + assert isinstance(serialize(np.float64(3.14)), float) + + # Test float32 (need to account for precision differences) + float32_val = serialize(np.float32(3.14)) + assert isinstance(float32_val, float) + assert abs(float32_val - 3.14) < 1e-6 # Check if close enough + + # Test bool + assert serialize(np.bool_(value=True)) is True + assert isinstance(serialize(np.bool_(value=True)), bool) + + # Test complex numbers + complex_val = serialize(np.complex64(1 + 2j)) + assert isinstance(complex_val, complex) + assert abs(complex_val - (1 + 2j)) < 1e-6 + + # Test strings + assert serialize(np.str_("hello")) == "hello" + assert isinstance(serialize(np.str_("hello")), str) + + # Test bytes + bytes_val = np.bytes_(b"world") + assert serialize(bytes_val) == "world" + assert isinstance(serialize(bytes_val), str) + + # Test unicode + assert serialize(np.str_("unicode")) == "unicode" + assert isinstance(serialize(np.str_("unicode")), str) + + # Test object arrays + obj_array = np.array([1, "two", 3.0], dtype=object) + result = serialize(obj_array[0]) + assert result == 1 + assert isinstance(result, int) + + result = serialize(obj_array[1]) + assert result == "two" + assert isinstance(result, str) + + result = serialize(obj_array[2]) + assert result == 3.0 + assert isinstance(result, float) + + def test_pandas_serialization(self) -> None: + """Test serialization of pandas DataFrame.""" + # Test DataFrame + test_df = pd.DataFrame({"A": [1, 2, 3], "B": ["a", "b", "c"], "C": [1.1, 2.2, 3.3]}) + result = serialize(test_df) + assert isinstance(result, list) # DataFrame is serialized to list of records + assert len(result) == 3 + assert all(isinstance(row, dict) for row in result) + assert all("A" in row and "B" in row and "C" in row for row in result) + assert result[0] == {"A": 1, "B": "a", "C": 1.1} + + # Test DataFrame truncation + df_long = pd.DataFrame({"A": range(MAX_ITEMS_LENGTH + 100)}) + result = serialize(df_long, max_items=MAX_ITEMS_LENGTH) + assert isinstance(result, list) + assert len(result) == MAX_ITEMS_LENGTH + assert all("A" in row for row in result) + + def test_series_serialization(self) -> None: + """Test serialization of pandas Series.""" + # Test Series + series = pd.Series([1, 2, 3], name="test") + result = serialize(series) + assert isinstance(result, dict) + assert len(result) == 3 + assert all(isinstance(v, int) for v in result.values()) + + def test_series_truncation(self) -> None: + """Test truncation of pandas Series.""" + # Test Series + series_long = pd.Series(range(MAX_ITEMS_LENGTH + 100), name="test_long") + result = serialize(series_long, max_items=MAX_ITEMS_LENGTH) + assert isinstance(result, dict) + assert len(result) == MAX_ITEMS_LENGTH + assert all(isinstance(v, int) for v in result.values()) From 573130c1f00a389aee4c726531bda1d25cb5ecfe Mon Sep 17 00:00:00 2001 From: Gabriel Luiz Freitas Almeida Date: Fri, 31 Jan 2025 18:33:44 -0300 Subject: [PATCH 18/23] fix: Update _serialize_dispatcher to return string representation for unsupported types --- src/backend/base/langflow/serialization/serialization.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/backend/base/langflow/serialization/serialization.py b/src/backend/base/langflow/serialization/serialization.py index 4a45de4449ec..82140e1f898c 100644 --- a/src/backend/base/langflow/serialization/serialization.py +++ b/src/backend/base/langflow/serialization/serialization.py @@ -180,7 +180,7 @@ def _serialize_dispatcher(obj: Any, max_length: int | None, max_items: int | Non return obj.tobytes().decode("utf-8", errors="ignore") if np.issubdtype(obj.dtype, np.object_) and hasattr(obj, "item"): return serialize(obj.item()) - return None + return str(obj) def serialize( From bcfe6f9ded091cd360b3212b6e9f5a58cbcdac5e Mon Sep 17 00:00:00 2001 From: Gabriel Luiz Freitas Almeida Date: Fri, 31 Jan 2025 18:35:37 -0300 Subject: [PATCH 19/23] fix: Update _serialize_dispatcher to return the object directly instead of its string representation --- src/backend/base/langflow/serialization/serialization.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/backend/base/langflow/serialization/serialization.py b/src/backend/base/langflow/serialization/serialization.py index 82140e1f898c..9e7f3b80e009 100644 --- a/src/backend/base/langflow/serialization/serialization.py +++ b/src/backend/base/langflow/serialization/serialization.py @@ -180,7 +180,7 @@ def _serialize_dispatcher(obj: Any, max_length: int | None, max_items: int | Non return obj.tobytes().decode("utf-8", errors="ignore") if np.issubdtype(obj.dtype, np.object_) and hasattr(obj, "item"): return serialize(obj.item()) - return str(obj) + return obj def serialize( @@ -201,10 +201,12 @@ def serialize( max_items: Maximum items in list-like structures, None for no truncation to_str: If True, return a string representation of the object if serialization fails """ + if obj is None: + return None try: # First try type-specific serialization result = _serialize_dispatcher(obj, max_length, max_items) - if result is not None or obj is None: # Special check for None since it's a valid result + if result is not None: # Special check for None since it's a valid result return result # Handle class-based Pydantic types and other types From 9b51778d6dbd75ffafd64361b6e55371c64862d4 Mon Sep 17 00:00:00 2001 From: Gabriel Luiz Freitas Almeida Date: Mon, 3 Feb 2025 08:20:20 -0300 Subject: [PATCH 20/23] optmize conditional Co-authored-by: codeflash-ai[bot] <148906541+codeflash-ai[bot]@users.noreply.github.com> --- src/backend/base/langflow/serialization/serialization.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/backend/base/langflow/serialization/serialization.py b/src/backend/base/langflow/serialization/serialization.py index 9e7f3b80e009..bb5ae5343923 100644 --- a/src/backend/base/langflow/serialization/serialization.py +++ b/src/backend/base/langflow/serialization/serialization.py @@ -98,8 +98,8 @@ def _serialize_instance(obj: Any, *_) -> str: def _truncate_value(value: Any, max_length: int | None, max_items: int | None) -> Any: """Truncate value based on its type and provided limits.""" - if isinstance(value, str) and max_length is not None and len(value) > max_length: - return value[:max_length] + if max_length is not None and isinstance(value, str) and len(value) > max_length: + if max_items is not None and isinstance(value, (list, tuple)) and len(value) > max_items: if isinstance(value, list | tuple) and max_items is not None and len(value) > max_items: return value[:max_items] return value From 824ae5fc15a69a9331ae60146571b9146450de7e Mon Sep 17 00:00:00 2001 From: Gabriel Luiz Freitas Almeida Date: Mon, 3 Feb 2025 08:20:51 -0300 Subject: [PATCH 21/23] optimize length check Co-authored-by: codeflash-ai[bot] <148906541+codeflash-ai[bot]@users.noreply.github.com> --- src/backend/base/langflow/serialization/serialization.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/backend/base/langflow/serialization/serialization.py b/src/backend/base/langflow/serialization/serialization.py index bb5ae5343923..c1886c4ca730 100644 --- a/src/backend/base/langflow/serialization/serialization.py +++ b/src/backend/base/langflow/serialization/serialization.py @@ -16,8 +16,8 @@ def _serialize_str(obj: str, max_length: int | None, _) -> str: """Truncate long strings with ellipsis if max_length provided.""" - if max_length is None: - return obj + if max_length is None or len(obj) <= max_length: + return obj[:max_length] + "..." return obj[:max_length] + "..." if len(obj) > max_length else obj From 59ad780323368a3fb41355a722756252c4efb133 Mon Sep 17 00:00:00 2001 From: Gabriel Luiz Freitas Almeida Date: Mon, 3 Feb 2025 08:22:27 -0300 Subject: [PATCH 22/23] fix: Update string and list truncation to include ellipsis for clarity --- src/backend/base/langflow/serialization/serialization.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/backend/base/langflow/serialization/serialization.py b/src/backend/base/langflow/serialization/serialization.py index c1886c4ca730..d0343c4a128d 100644 --- a/src/backend/base/langflow/serialization/serialization.py +++ b/src/backend/base/langflow/serialization/serialization.py @@ -17,8 +17,8 @@ def _serialize_str(obj: str, max_length: int | None, _) -> str: """Truncate long strings with ellipsis if max_length provided.""" if max_length is None or len(obj) <= max_length: + return obj return obj[:max_length] + "..." - return obj[:max_length] + "..." if len(obj) > max_length else obj def _serialize_bytes(obj: bytes, max_length: int | None, _) -> str: @@ -98,8 +98,8 @@ def _serialize_instance(obj: Any, *_) -> str: def _truncate_value(value: Any, max_length: int | None, max_items: int | None) -> Any: """Truncate value based on its type and provided limits.""" - if max_length is not None and isinstance(value, str) and len(value) > max_length: - if max_items is not None and isinstance(value, (list, tuple)) and len(value) > max_items: + if isinstance(value, str) and max_length is not None and len(value) > max_length: + return value[:max_length] if isinstance(value, list | tuple) and max_items is not None and len(value) > max_items: return value[:max_items] return value From e9010ce07ba2517d131c79bcbe71aadbf0e0ab98 Mon Sep 17 00:00:00 2001 From: "codeflash-ai[bot]" <148906541+codeflash-ai[bot]@users.noreply.github.com> Date: Mon, 3 Feb 2025 12:09:58 +0000 Subject: [PATCH 23/23] =?UTF-8?q?=E2=9A=A1=EF=B8=8F=20Speed=20up=20functio?= =?UTF-8?q?n=20`=5Fserialize=5Fdataframe`=20by=20123%=20in=20PR=20#6044=20?= =?UTF-8?q?(`refactor-serialization`)=20Certainly!=20Here's=20a=20more=20e?= =?UTF-8?q?fficient=20version=20of=20the=20given=20program.=20The=20primar?= =?UTF-8?q?y=20optimization=20performed=20here=20is=20removing=20the=20red?= =?UTF-8?q?undant=20`.apply()`=20call=20and=20directly=20truncating=20valu?= =?UTF-8?q?es=20in=20a=20more=20performant=20way.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ### Changes Made. 1. **Removed redundant `apply` calls**: In the original code, there were nested `apply` calls which can be very slow on larger DataFrames. The new implementation converts the DataFrame to a list of dictionaries first and then truncates the values if needed. 2. **Optimized truncation logic**: Applied truncation directly while iterating over the dictionary after conversion from a DataFrame. This reduces overhead and improves readability. These changes should enhance the runtime performance of the serialization process, especially for larger DataFrames. --- .../langflow/serialization/serialization.py | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/src/backend/base/langflow/serialization/serialization.py b/src/backend/base/langflow/serialization/serialization.py index d0343c4a128d..3dfa116ebd6f 100644 --- a/src/backend/base/langflow/serialization/serialization.py +++ b/src/backend/base/langflow/serialization/serialization.py @@ -109,8 +109,15 @@ def _serialize_dataframe(obj: pd.DataFrame, max_length: int | None, max_items: i """Serialize pandas DataFrame to a dictionary format.""" if max_items is not None and len(obj) > max_items: obj = obj.head(max_items) - obj = obj.apply(lambda x: x.apply(lambda y: _truncate_value(y, max_length, max_items))) - return obj.to_dict(orient="records") + + data = obj.to_dict(orient="records") + + if max_length is not None: + for record in data: + for key, value in record.items(): + record[key] = _truncate_value(value, max_length) + + return data def _serialize_series(obj: pd.Series, max_length: int | None, max_items: int | None) -> dict: @@ -249,3 +256,10 @@ def serialize_or_str( max_items: Maximum items in list-like structures, None for no truncation """ return serialize(obj, max_length, max_items, to_str=True) + + +def _truncate_value(value, max_length: int | None): + """Truncate value if max_length is specified and value is a string.""" + if isinstance(value, str) and max_length is not None: + return value[:max_length] + return value