From 89b0be56a6bf9cfff285d511fb4d1f2ff7111294 Mon Sep 17 00:00:00 2001 From: darynaishchenko Date: Wed, 8 Jan 2025 14:18:58 +0200 Subject: [PATCH 1/4] added flatten_lists option to FlattenFields transformation --- .../declarative_component_schema.yaml | 5 +++ .../models/declarative_component_schema.py | 5 +++ .../parsers/model_to_component_factory.py | 2 +- .../transformations/flatten_fields.py | 4 +- .../transformations/test_flatten_fields.py | 45 ++++++++++++++++--- 5 files changed, 54 insertions(+), 7 deletions(-) diff --git a/airbyte_cdk/sources/declarative/declarative_component_schema.yaml b/airbyte_cdk/sources/declarative/declarative_component_schema.yaml index 89c731075..481d58a4a 100644 --- a/airbyte_cdk/sources/declarative/declarative_component_schema.yaml +++ b/airbyte_cdk/sources/declarative/declarative_component_schema.yaml @@ -1880,6 +1880,11 @@ definitions: type: type: string enum: [FlattenFields] + flatten_lists: + title: Flatten Lists + description: Whether to flatten lists or leave it as is. Default is True. + type: boolean + default: true $parameters: type: object additionalProperties: true diff --git a/airbyte_cdk/sources/declarative/models/declarative_component_schema.py b/airbyte_cdk/sources/declarative/models/declarative_component_schema.py index 5823b34c1..f739016a2 100644 --- a/airbyte_cdk/sources/declarative/models/declarative_component_schema.py +++ b/airbyte_cdk/sources/declarative/models/declarative_component_schema.py @@ -723,6 +723,11 @@ class KeysToSnakeCase(BaseModel): class FlattenFields(BaseModel): type: Literal["FlattenFields"] + flatten_lists: Optional[bool] = Field( + True, + description="Whether to flatten lists or leave it as is. Default is True.", + title="Flatten Lists", + ) parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") diff --git a/airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py b/airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py index 694cb1042..a45d9b159 100644 --- a/airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py +++ b/airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py @@ -633,7 +633,7 @@ def create_keys_to_snake_transformation( def create_flatten_fields( self, model: FlattenFieldsModel, config: Config, **kwargs: Any ) -> FlattenFields: - return FlattenFields() + return FlattenFields(flatten_lists=model.flatten_lists) @staticmethod def _json_schema_type_name_to_type(value_type: Optional[ValueType]) -> Optional[Type[Any]]: diff --git a/airbyte_cdk/sources/declarative/transformations/flatten_fields.py b/airbyte_cdk/sources/declarative/transformations/flatten_fields.py index 0cc30839a..24bfba660 100644 --- a/airbyte_cdk/sources/declarative/transformations/flatten_fields.py +++ b/airbyte_cdk/sources/declarative/transformations/flatten_fields.py @@ -11,6 +11,8 @@ @dataclass class FlattenFields(RecordTransformation): + flatten_lists: bool = True + def transform( self, record: Dict[str, Any], @@ -39,7 +41,7 @@ def flatten_record(self, record: Dict[str, Any]) -> Dict[str, Any]: ) stack.append((value, new_key)) - elif isinstance(current_record, list): + elif isinstance(current_record, list) and self.flatten_lists: for i, item in enumerate(current_record): force_with_parent_name = True stack.append((item, f"{parent_key}.{i}")) diff --git a/unit_tests/sources/declarative/transformations/test_flatten_fields.py b/unit_tests/sources/declarative/transformations/test_flatten_fields.py index 4cf53a545..d99e8dad8 100644 --- a/unit_tests/sources/declarative/transformations/test_flatten_fields.py +++ b/unit_tests/sources/declarative/transformations/test_flatten_fields.py @@ -10,11 +10,12 @@ @pytest.mark.parametrize( - "input_record, expected_output", + "flatten_lists, input_record, expected_output", [ - ({"FirstName": "John", "LastName": "Doe"}, {"FirstName": "John", "LastName": "Doe"}), - ({"123Number": 123, "456Another123": 456}, {"123Number": 123, "456Another123": 456}), + (True, {"FirstName": "John", "LastName": "Doe"}, {"FirstName": "John", "LastName": "Doe"}), + (True, {"123Number": 123, "456Another123": 456}, {"123Number": 123, "456Another123": 456}), ( + True, { "NestedRecord": {"FirstName": "John", "LastName": "Doe"}, "456Another123": 456, @@ -26,10 +27,12 @@ }, ), ( + True, {"ListExample": [{"A": "a"}, {"A": "b"}]}, {"ListExample.0.A": "a", "ListExample.1.A": "b"}, ), ( + True, { "MixedCase123": { "Nested": [{"Key": {"Value": "test1"}}, {"Key": {"Value": "test2"}}] @@ -43,12 +46,44 @@ }, ), ( + True, {"List": ["Item1", "Item2", "Item3"]}, {"List.0": "Item1", "List.1": "Item2", "List.2": "Item3"}, ), + ( + False, + {"List": ["Item1", "Item2", "Item3"]}, + {"List": ["Item1", "Item2", "Item3"]}, + ), + ( + False, + { + "RootField": { + "NestedList": [{"Key": {"Value": "test1"}}, {"Key": {"Value": "test2"}}] + }, + "SimpleKey": "SimpleValue", + }, + { + "NestedList": [{"Key": {"Value": "test1"}}, {"Key": {"Value": "test2"}}], + "SimpleKey": "SimpleValue", + }, + ), + ( + False, + { + "RootField": {"List": [{"Key": {"Value": "test1"}}, {"Key": {"Value": "test2"}}]}, + "List": [1, 3, 6], + "SimpleKey": "SimpleValue", + }, + { + "RootField.List": [{"Key": {"Value": "test1"}}, {"Key": {"Value": "test2"}}], + "List": [1, 3, 6], + "SimpleKey": "SimpleValue", + }, + ), ], ) -def test_flatten_fields(input_record, expected_output): - flattener = FlattenFields() +def test_flatten_fields(flatten_lists, input_record, expected_output): + flattener = FlattenFields(flatten_lists=flatten_lists) flattener.transform(input_record) assert input_record == expected_output From ec5eb1611c1232cfcd6e38e2d5772e1c6db5e257 Mon Sep 17 00:00:00 2001 From: darynaishchenko Date: Wed, 8 Jan 2025 14:40:11 +0200 Subject: [PATCH 2/4] added default flatten_lists to FlattenFields creation --- .../sources/declarative/parsers/model_to_component_factory.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py b/airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py index a45d9b159..2201c1cda 100644 --- a/airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py +++ b/airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py @@ -633,7 +633,7 @@ def create_keys_to_snake_transformation( def create_flatten_fields( self, model: FlattenFieldsModel, config: Config, **kwargs: Any ) -> FlattenFields: - return FlattenFields(flatten_lists=model.flatten_lists) + return FlattenFields(flatten_lists=model.flatten_lists if model.flatten_lists is not None else True) @staticmethod def _json_schema_type_name_to_type(value_type: Optional[ValueType]) -> Optional[Type[Any]]: From 87bcbee7ada67a268456d8da78f6429186909d65 Mon Sep 17 00:00:00 2001 From: octavia-squidington-iii Date: Wed, 8 Jan 2025 12:43:12 +0000 Subject: [PATCH 3/4] Auto-fix lint and format issues --- .../sources/declarative/parsers/model_to_component_factory.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py b/airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py index 2201c1cda..83f77514d 100644 --- a/airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py +++ b/airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py @@ -633,7 +633,9 @@ def create_keys_to_snake_transformation( def create_flatten_fields( self, model: FlattenFieldsModel, config: Config, **kwargs: Any ) -> FlattenFields: - return FlattenFields(flatten_lists=model.flatten_lists if model.flatten_lists is not None else True) + return FlattenFields( + flatten_lists=model.flatten_lists if model.flatten_lists is not None else True + ) @staticmethod def _json_schema_type_name_to_type(value_type: Optional[ValueType]) -> Optional[Type[Any]]: From a96a160d4a9e6856630b9d25c7ea3e548454a07b Mon Sep 17 00:00:00 2001 From: darynaishchenko Date: Fri, 10 Jan 2025 14:13:28 +0200 Subject: [PATCH 4/4] updated test cases --- .../transformations/test_flatten_fields.py | 52 +++++++++++++------ 1 file changed, 36 insertions(+), 16 deletions(-) diff --git a/unit_tests/sources/declarative/transformations/test_flatten_fields.py b/unit_tests/sources/declarative/transformations/test_flatten_fields.py index d99e8dad8..bc34fad10 100644 --- a/unit_tests/sources/declarative/transformations/test_flatten_fields.py +++ b/unit_tests/sources/declarative/transformations/test_flatten_fields.py @@ -8,14 +8,27 @@ FlattenFields, ) +_FLATTEN_LISTS = True +_DO_NOT_FLATTEN_LISTS = False + @pytest.mark.parametrize( "flatten_lists, input_record, expected_output", [ - (True, {"FirstName": "John", "LastName": "Doe"}, {"FirstName": "John", "LastName": "Doe"}), - (True, {"123Number": 123, "456Another123": 456}, {"123Number": 123, "456Another123": 456}), - ( - True, + pytest.param( + _FLATTEN_LISTS, + {"FirstName": "John", "LastName": "Doe"}, + {"FirstName": "John", "LastName": "Doe"}, + id="flatten simple record with string values", + ), + pytest.param( + _FLATTEN_LISTS, + {"123Number": 123, "456Another123": 456}, + {"123Number": 123, "456Another123": 456}, + id="flatten simple record with int values", + ), + pytest.param( + _FLATTEN_LISTS, { "NestedRecord": {"FirstName": "John", "LastName": "Doe"}, "456Another123": 456, @@ -25,14 +38,16 @@ "LastName": "Doe", "456Another123": 456, }, + id="flatten record with nested dict", ), - ( - True, + pytest.param( + _FLATTEN_LISTS, {"ListExample": [{"A": "a"}, {"A": "b"}]}, {"ListExample.0.A": "a", "ListExample.1.A": "b"}, + id="flatten record with list values of dict items", ), - ( - True, + pytest.param( + _FLATTEN_LISTS, { "MixedCase123": { "Nested": [{"Key": {"Value": "test1"}}, {"Key": {"Value": "test2"}}] @@ -44,19 +59,22 @@ "Nested.1.Key.Value": "test2", "SimpleKey": "SimpleValue", }, + id="flatten record with nested dict of both list and string values", ), - ( - True, + pytest.param( + _FLATTEN_LISTS, {"List": ["Item1", "Item2", "Item3"]}, {"List.0": "Item1", "List.1": "Item2", "List.2": "Item3"}, + id="flatten record with list of str values", ), - ( - False, + pytest.param( + _DO_NOT_FLATTEN_LISTS, {"List": ["Item1", "Item2", "Item3"]}, {"List": ["Item1", "Item2", "Item3"]}, + id="flatten record with dict of list values, flatten_lists=False", ), - ( - False, + pytest.param( + _DO_NOT_FLATTEN_LISTS, { "RootField": { "NestedList": [{"Key": {"Value": "test1"}}, {"Key": {"Value": "test2"}}] @@ -67,9 +85,10 @@ "NestedList": [{"Key": {"Value": "test1"}}, {"Key": {"Value": "test2"}}], "SimpleKey": "SimpleValue", }, + id="flatten record with dict of list values and simple key, flatten_lists=False", ), - ( - False, + pytest.param( + _DO_NOT_FLATTEN_LISTS, { "RootField": {"List": [{"Key": {"Value": "test1"}}, {"Key": {"Value": "test2"}}]}, "List": [1, 3, 6], @@ -80,6 +99,7 @@ "List": [1, 3, 6], "SimpleKey": "SimpleValue", }, + id="flatten record with dict of list values and simple key with duplicated keys, flatten_lists=False", ), ], )