diff --git a/airbyte_cdk/sources/declarative/declarative_component_schema.yaml b/airbyte_cdk/sources/declarative/declarative_component_schema.yaml index 144ba5e3..7a3619a4 100644 --- a/airbyte_cdk/sources/declarative/declarative_component_schema.yaml +++ b/airbyte_cdk/sources/declarative/declarative_component_schema.yaml @@ -1904,6 +1904,11 @@ definitions: type: type: string enum: [FlattenFields] + flatten_lists: + title: Flatten Lists + description: Whether to flatten lists or leave it as is. Default is True. + type: boolean + default: true $parameters: type: object additionalProperties: true diff --git a/airbyte_cdk/sources/declarative/models/declarative_component_schema.py b/airbyte_cdk/sources/declarative/models/declarative_component_schema.py index 66d4c0ad..df6925ea 100644 --- a/airbyte_cdk/sources/declarative/models/declarative_component_schema.py +++ b/airbyte_cdk/sources/declarative/models/declarative_component_schema.py @@ -756,6 +756,11 @@ class KeysReplace(BaseModel): class FlattenFields(BaseModel): type: Literal["FlattenFields"] + flatten_lists: Optional[bool] = Field( + True, + description="Whether to flatten lists or leave it as is. Default is True.", + title="Flatten Lists", + ) parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") diff --git a/airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py b/airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py index 39c07b04..8a31fab2 100644 --- a/airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py +++ b/airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py @@ -658,7 +658,9 @@ def create_keys_replace_transformation( def create_flatten_fields( self, model: FlattenFieldsModel, config: Config, **kwargs: Any ) -> FlattenFields: - return FlattenFields() + return FlattenFields( + flatten_lists=model.flatten_lists if model.flatten_lists is not None else True + ) @staticmethod def _json_schema_type_name_to_type(value_type: Optional[ValueType]) -> Optional[Type[Any]]: diff --git a/airbyte_cdk/sources/declarative/transformations/flatten_fields.py b/airbyte_cdk/sources/declarative/transformations/flatten_fields.py index 0cc30839..24bfba66 100644 --- a/airbyte_cdk/sources/declarative/transformations/flatten_fields.py +++ b/airbyte_cdk/sources/declarative/transformations/flatten_fields.py @@ -11,6 +11,8 @@ @dataclass class FlattenFields(RecordTransformation): + flatten_lists: bool = True + def transform( self, record: Dict[str, Any], @@ -39,7 +41,7 @@ def flatten_record(self, record: Dict[str, Any]) -> Dict[str, Any]: ) stack.append((value, new_key)) - elif isinstance(current_record, list): + elif isinstance(current_record, list) and self.flatten_lists: for i, item in enumerate(current_record): force_with_parent_name = True stack.append((item, f"{parent_key}.{i}")) diff --git a/unit_tests/sources/declarative/transformations/test_flatten_fields.py b/unit_tests/sources/declarative/transformations/test_flatten_fields.py index 4cf53a54..bc34fad1 100644 --- a/unit_tests/sources/declarative/transformations/test_flatten_fields.py +++ b/unit_tests/sources/declarative/transformations/test_flatten_fields.py @@ -8,13 +8,27 @@ FlattenFields, ) +_FLATTEN_LISTS = True +_DO_NOT_FLATTEN_LISTS = False + @pytest.mark.parametrize( - "input_record, expected_output", + "flatten_lists, input_record, expected_output", [ - ({"FirstName": "John", "LastName": "Doe"}, {"FirstName": "John", "LastName": "Doe"}), - ({"123Number": 123, "456Another123": 456}, {"123Number": 123, "456Another123": 456}), - ( + pytest.param( + _FLATTEN_LISTS, + {"FirstName": "John", "LastName": "Doe"}, + {"FirstName": "John", "LastName": "Doe"}, + id="flatten simple record with string values", + ), + pytest.param( + _FLATTEN_LISTS, + {"123Number": 123, "456Another123": 456}, + {"123Number": 123, "456Another123": 456}, + id="flatten simple record with int values", + ), + pytest.param( + _FLATTEN_LISTS, { "NestedRecord": {"FirstName": "John", "LastName": "Doe"}, "456Another123": 456, @@ -24,12 +38,16 @@ "LastName": "Doe", "456Another123": 456, }, + id="flatten record with nested dict", ), - ( + pytest.param( + _FLATTEN_LISTS, {"ListExample": [{"A": "a"}, {"A": "b"}]}, {"ListExample.0.A": "a", "ListExample.1.A": "b"}, + id="flatten record with list values of dict items", ), - ( + pytest.param( + _FLATTEN_LISTS, { "MixedCase123": { "Nested": [{"Key": {"Value": "test1"}}, {"Key": {"Value": "test2"}}] @@ -41,14 +59,51 @@ "Nested.1.Key.Value": "test2", "SimpleKey": "SimpleValue", }, + id="flatten record with nested dict of both list and string values", ), - ( + pytest.param( + _FLATTEN_LISTS, {"List": ["Item1", "Item2", "Item3"]}, {"List.0": "Item1", "List.1": "Item2", "List.2": "Item3"}, + id="flatten record with list of str values", + ), + pytest.param( + _DO_NOT_FLATTEN_LISTS, + {"List": ["Item1", "Item2", "Item3"]}, + {"List": ["Item1", "Item2", "Item3"]}, + id="flatten record with dict of list values, flatten_lists=False", + ), + pytest.param( + _DO_NOT_FLATTEN_LISTS, + { + "RootField": { + "NestedList": [{"Key": {"Value": "test1"}}, {"Key": {"Value": "test2"}}] + }, + "SimpleKey": "SimpleValue", + }, + { + "NestedList": [{"Key": {"Value": "test1"}}, {"Key": {"Value": "test2"}}], + "SimpleKey": "SimpleValue", + }, + id="flatten record with dict of list values and simple key, flatten_lists=False", + ), + pytest.param( + _DO_NOT_FLATTEN_LISTS, + { + "RootField": {"List": [{"Key": {"Value": "test1"}}, {"Key": {"Value": "test2"}}]}, + "List": [1, 3, 6], + "SimpleKey": "SimpleValue", + }, + { + "RootField.List": [{"Key": {"Value": "test1"}}, {"Key": {"Value": "test2"}}], + "List": [1, 3, 6], + "SimpleKey": "SimpleValue", + }, + id="flatten record with dict of list values and simple key with duplicated keys, flatten_lists=False", ), ], ) -def test_flatten_fields(input_record, expected_output): - flattener = FlattenFields() +def test_flatten_fields(flatten_lists, input_record, expected_output): + flattener = FlattenFields(flatten_lists=flatten_lists) flattener.transform(input_record) assert input_record == expected_output