From 60011ca4893715b22b7ee3e66bdda3dbcb9299e6 Mon Sep 17 00:00:00 2001 From: Jerry Guo Date: Wed, 26 Nov 2025 12:05:01 +0100 Subject: [PATCH 01/11] logic to handle optional_extra Signed-off-by: Jerry Guo --- docs/converters/vision_converter.md | 27 +++ .../converters/tabular_converter.py | 44 ++++- .../unit/converters/test_tabular_converter.py | 176 ++++++++++++++++++ 3 files changed, 244 insertions(+), 3 deletions(-) diff --git a/docs/converters/vision_converter.md b/docs/converters/vision_converter.md index 2fee20e6..9d53ff35 100644 --- a/docs/converters/vision_converter.md +++ b/docs/converters/vision_converter.md @@ -52,6 +52,33 @@ Vision introduced UUID based identifier system since version 9.7. It is implemen An examplery usage can be found in the example notebook as well as in the test cases. +## Optional extra columns + +When working with Vision Excel exports, some metadata columns (like `GUID` or `StationID`) may not always be present, especially in partial exports. The `optional_extra` feature allows you to specify columns that should be included in `extra_info` if present, but won't cause conversion failure if missing. + +**Syntax:** +```yaml +grid: + Transformers: + transformer: + id: + auto_id: + key: Number + # ... other fields ... + extra: + - ID # Required - fails if missing + - Name # Required - fails if missing + - optional_extra: + - GUID # Optional - skipped if missing + - StationID # Optional - skipped if missing +``` + +**Behavior:** +- Required columns (listed directly under `extra`) will cause a KeyError if missing +- Optional columns (nested under `optional_extra`) are silently skipped if not found +- If some optional columns are present and others missing, only the present ones are included in `extra_info` +- This feature is particularly useful for handling different Vision export configurations or versions + ## Common/Known issues related to Vision So far we have the following issue known to us related to Vision exported spread sheets. We provide a solution from user perspective to the best of our knowledge. diff --git a/src/power_grid_model_io/converters/tabular_converter.py b/src/power_grid_model_io/converters/tabular_converter.py index b596d1ac..8a98c86f 100644 --- a/src/power_grid_model_io/converters/tabular_converter.py +++ b/src/power_grid_model_io/converters/tabular_converter.py @@ -396,6 +396,7 @@ def _parse_col_def( # pylint: disable = too-many-arguments,too-many-positional- col_def: Any, table_mask: Optional[np.ndarray], extra_info: Optional[ExtraInfo], + allow_missing: bool = False, ) -> pd.DataFrame: """Interpret the column definition and extract/convert/create the data as a pandas DataFrame. @@ -404,6 +405,7 @@ def _parse_col_def( # pylint: disable = too-many-arguments,too-many-positional- table: str: col_def: Any: extra_info: Optional[ExtraInfo]: + allow_missing: bool: If True, missing columns will return empty DataFrame instead of raising KeyError Returns: @@ -411,8 +413,19 @@ def _parse_col_def( # pylint: disable = too-many-arguments,too-many-positional- if isinstance(col_def, (int, float)): return self._parse_col_def_const(data=data, table=table, col_def=col_def, table_mask=table_mask) if isinstance(col_def, str): - return self._parse_col_def_column_name(data=data, table=table, col_def=col_def, table_mask=table_mask) + return self._parse_col_def_column_name( + data=data, table=table, col_def=col_def, table_mask=table_mask, allow_missing=allow_missing + ) if isinstance(col_def, dict): + # Check if this is an optional_extra wrapper + if len(col_def) == 1 and "optional_extra" in col_def: + # Extract the list of optional columns and parse as composite with allow_missing=True + optional_cols = col_def["optional_extra"] + if not isinstance(optional_cols, list): + raise TypeError(f"optional_extra value must be a list, got {type(optional_cols).__name__}") + return self._parse_col_def_composite( + data=data, table=table, col_def=optional_cols, table_mask=table_mask, allow_missing=True + ) return self._parse_col_def_filter( data=data, table=table, @@ -421,7 +434,9 @@ def _parse_col_def( # pylint: disable = too-many-arguments,too-many-positional- extra_info=extra_info, ) if isinstance(col_def, list): - return self._parse_col_def_composite(data=data, table=table, col_def=col_def, table_mask=table_mask) + return self._parse_col_def_composite( + data=data, table=table, col_def=col_def, table_mask=table_mask, allow_missing=allow_missing + ) raise TypeError(f"Invalid column definition: {col_def}") @staticmethod @@ -454,6 +469,7 @@ def _parse_col_def_column_name( table: str, col_def: str, table_mask: Optional[np.ndarray] = None, + allow_missing: bool = False, ) -> pd.DataFrame: """Extract a column from the data. If the column doesn't exist, check if the col_def is a special float value, like 'inf'. If that's the case, create a single column pandas DataFrame containing the const value. @@ -462,6 +478,7 @@ def _parse_col_def_column_name( data: TabularData: table: str: col_def: str: + allow_missing: bool: If True, return empty DataFrame when column is missing instead of raising KeyError Returns: @@ -486,6 +503,15 @@ def _parse_col_def_column_name( const_value = float(col_def) except ValueError: # pylint: disable=raise-missing-from + if allow_missing: + # Return empty DataFrame with correct number of rows when column is optional and missing + self._log.debug( + "Optional column not found", + table=table, + columns=" or ".join(f"'{col_name}'" for col_name in columns), + ) + n_rows = len(table_data) + return pd.DataFrame(index=range(n_rows)) columns_str = " and ".join(f"'{col_name}'" for col_name in columns) raise KeyError(f"Could not find column {columns_str} on table '{table}'") @@ -778,6 +804,7 @@ def _parse_col_def_composite( table: str, col_def: list, table_mask: Optional[np.ndarray], + allow_missing: bool = False, ) -> pd.DataFrame: """Select multiple columns (each is created from a column definition) and return them as a new DataFrame. @@ -785,6 +812,7 @@ def _parse_col_def_composite( data: TabularData: table: str: col_def: list: + allow_missing: bool: If True, skip missing columns instead of raising errors Returns: @@ -797,10 +825,20 @@ def _parse_col_def_composite( col_def=sub_def, table_mask=table_mask, extra_info=None, + allow_missing=allow_missing, ) for sub_def in col_def ] - return pd.concat(columns, axis=1) + # Filter out empty DataFrames (from missing optional columns) + non_empty_columns = [col for col in columns if not col.empty and len(col.columns) > 0] + if not non_empty_columns: + # If all columns are missing, return an empty DataFrame with the correct number of rows + table_data = data[table] + if table_mask is not None: + table_data = table_data[table_mask] + n_rows = len(table_data) + return pd.DataFrame(index=range(n_rows)) + return pd.concat(non_empty_columns, axis=1) def _get_id(self, table: str, key: Mapping[str, int], name: Optional[str]) -> int: """ diff --git a/tests/unit/converters/test_tabular_converter.py b/tests/unit/converters/test_tabular_converter.py index 157bc807..c96d8eac 100644 --- a/tests/unit/converters/test_tabular_converter.py +++ b/tests/unit/converters/test_tabular_converter.py @@ -462,6 +462,7 @@ def test_parse_col_def(converter: TabularConverter, tabular_data_no_units_no_sub table="nodes", col_def="col_name", table_mask=None, + allow_missing=False, ) # type(col_def) == dict @@ -499,6 +500,7 @@ def test_parse_col_def(converter: TabularConverter, tabular_data_no_units_no_sub table="nodes", col_def=[], table_mask=None, + allow_missing=False, ) @@ -1356,3 +1358,177 @@ def bool_fn_filter(row: pd.Series, **kwargs): def test_parse_table_filters__ndarray_data(converter: TabularConverter): numpy_tabular_data = TabularData(numpy_table=np.ones((4, 3))) assert converter._parse_table_filters(data=numpy_tabular_data, table="numpy_table", filtering_functions=[]) is None + + +def test_optional_extra__all_columns_present(converter: TabularConverter): + """Test optional_extra when all optional columns are present in the data""" + # Arrange + data = TabularData( + test_table=pd.DataFrame( + {"id": [1, 2], "name": ["node1", "node2"], "guid": ["guid1", "guid2"], "station": ["st1", "st2"]} + ) + ) + col_def = {"optional_extra": ["guid", "station"]} + + # Act + result = converter._parse_col_def( + data=data, table="test_table", col_def=col_def, table_mask=None, extra_info=None, allow_missing=False + ) + + # Assert + assert list(result.columns) == ["guid", "station"] + assert list(result["guid"]) == ["guid1", "guid2"] + assert list(result["station"]) == ["st1", "st2"] + + +def test_optional_extra__some_columns_missing(converter: TabularConverter): + """Test optional_extra when some optional columns are missing from the data""" + # Arrange + data = TabularData(test_table=pd.DataFrame({"id": [1, 2], "name": ["node1", "node2"], "guid": ["guid1", "guid2"]})) + col_def = {"optional_extra": ["guid", "station"]} # 'station' is missing + + # Act + result = converter._parse_col_def( + data=data, table="test_table", col_def=col_def, table_mask=None, extra_info=None, allow_missing=False + ) + + # Assert - only 'guid' should be present + assert list(result.columns) == ["guid"] + assert list(result["guid"]) == ["guid1", "guid2"] + + +def test_optional_extra__all_columns_missing(converter: TabularConverter): + """Test optional_extra when all optional columns are missing from the data""" + # Arrange + data = TabularData(test_table=pd.DataFrame({"id": [1, 2], "name": ["node1", "node2"]})) + col_def = {"optional_extra": ["guid", "station"]} # Both are missing + + # Act + result = converter._parse_col_def( + data=data, table="test_table", col_def=col_def, table_mask=None, extra_info=None, allow_missing=False + ) + + # Assert - should return empty DataFrame with correct number of rows + assert len(result) == 2 + assert len(result.columns) == 0 + + +def test_optional_extra__mixed_with_required(converter: TabularConverter): + """Test mixing required and optional extra columns""" + # Arrange + data = TabularData(test_table=pd.DataFrame({"id": [1, 2], "name": ["node1", "node2"], "guid": ["guid1", "guid2"]})) + # Mix required columns with optional_extra + col_def = ["name", {"optional_extra": ["guid", "station"]}] + + # Act + result = converter._parse_col_def( + data=data, table="test_table", col_def=col_def, table_mask=None, extra_info=None, allow_missing=False + ) + + # Assert - should have 'name' and 'guid', but not 'station' + assert list(result.columns) == ["name", "guid"] + assert list(result["name"]) == ["node1", "node2"] + assert list(result["guid"]) == ["guid1", "guid2"] + + +def test_optional_extra__in_extra_info(converter: TabularConverter): + """Test that optional_extra works correctly with _handle_extra_info""" + # Arrange + data = TabularData( + test_table=pd.DataFrame( + {"id": [1, 2], "name": ["node1", "node2"], "guid": ["guid1", "guid2"]} # 'station' is missing + ) + ) + uuids = np.array([100, 200]) + extra_info: ExtraInfo = {} + col_def = {"optional_extra": ["guid", "station"]} + + # Act + converter._handle_extra_info( + data=data, table="test_table", col_def=col_def, uuids=uuids, table_mask=None, extra_info=extra_info + ) + + # Assert - only 'guid' should be in extra_info, not 'station' + assert 100 in extra_info + assert 200 in extra_info + assert "guid" in extra_info[100] + assert "guid" in extra_info[200] + assert extra_info[100]["guid"] == "guid1" + assert extra_info[200]["guid"] == "guid2" + assert "station" not in extra_info[100] + assert "station" not in extra_info[200] + + +def test_optional_extra__all_missing_no_extra_info(converter: TabularConverter): + """Test that when all optional columns are missing, no extra_info entries are created""" + # Arrange + data = TabularData(test_table=pd.DataFrame({"id": [1, 2], "name": ["node1", "node2"]})) # Both optional missing + uuids = np.array([100, 200]) + extra_info: ExtraInfo = {} + col_def = {"optional_extra": ["guid", "station"]} + + # Act + converter._handle_extra_info( + data=data, table="test_table", col_def=col_def, uuids=uuids, table_mask=None, extra_info=extra_info + ) + + # Assert - no entries should be added to extra_info + assert len(extra_info) == 0 + + +def test_optional_extra__invalid_type(): + """Test that optional_extra raises TypeError if value is not a list""" + # Arrange + converter = TabularConverter(mapping_file=MAPPING_FILE) + data = TabularData(test_table=pd.DataFrame({"id": [1, 2]})) + col_def = {"optional_extra": "not_a_list"} # Invalid: should be a list + + # Act & Assert + with pytest.raises(TypeError, match="optional_extra value must be a list"): + converter._parse_col_def( + data=data, table="test_table", col_def=col_def, table_mask=None, extra_info=None, allow_missing=False + ) + + +def test_optional_extra__integration(): + """Integration test for optional_extra feature using a complete mapping file""" + # Arrange + mapping_file = Path(__file__).parents[2] / "data" / "config" / "test_optional_extra_mapping.yaml" + converter = TabularConverter(mapping_file=mapping_file) + + # Create test data with some optional columns present and some missing + data = TabularData( + nodes=pd.DataFrame( + { + "node_id": [1, 2, 3], + "voltage": [10.5, 10.5, 0.4], + "ID": ["N1", "N2", "N3"], + "Name": ["Node 1", "Node 2", "Node 3"], + "GUID": ["guid-1", "guid-2", "guid-3"], + # Note: StationID column is missing (optional) + } + ) + ) + + extra_info: ExtraInfo = {} + + # Act + result = converter._parse_data(data=data, data_type=DatasetType.input, extra_info=extra_info) + + # Assert + assert ComponentType.node in result + assert len(result[ComponentType.node]) == 3 + + # Check that extra_info contains the required and present optional fields + for node_id in result[ComponentType.node]["id"]: + assert node_id in extra_info + assert "ID" in extra_info[node_id] + assert "Name" in extra_info[node_id] + assert "GUID" in extra_info[node_id] # Optional but present + assert "StationID" not in extra_info[node_id] # Optional and missing + + # Verify values + node_0_id = result[ComponentType.node]["id"][0] + assert extra_info[node_0_id]["ID"] == "N1" + assert extra_info[node_0_id]["Name"] == "Node 1" + assert extra_info[node_0_id]["GUID"] == "guid-1" From 7f00482667a09a7fd49b3537c6d0567c4ff239a2 Mon Sep 17 00:00:00 2001 From: Jerry Guo Date: Wed, 26 Nov 2025 12:08:01 +0100 Subject: [PATCH 02/11] ignored file Signed-off-by: Jerry Guo --- .../config/test_optional_extra_mapping.yaml | 24 +++++++++++++++++++ 1 file changed, 24 insertions(+) create mode 100644 tests/data/config/test_optional_extra_mapping.yaml diff --git a/tests/data/config/test_optional_extra_mapping.yaml b/tests/data/config/test_optional_extra_mapping.yaml new file mode 100644 index 00000000..1b757b4e --- /dev/null +++ b/tests/data/config/test_optional_extra_mapping.yaml @@ -0,0 +1,24 @@ +# SPDX-FileCopyrightText: Contributors to the Power Grid Model project +# +# SPDX-License-Identifier: MPL-2.0 +--- +# Test mapping file for optional_extra feature +grid: + nodes: + node: + id: + auto_id: + key: node_id + u_rated: voltage + extra: + - ID + - Name + - optional_extra: + - GUID + - StationID + +units: + V: + kV: 1000.0 + +substitutions: {} From 5d78037cbcf734a7841b951c8a464cbf5dfaa7f1 Mon Sep 17 00:00:00 2001 From: Jerry Guo <6221579+Jerry-Jinfeng-Guo@users.noreply.github.com> Date: Wed, 26 Nov 2025 12:09:23 +0100 Subject: [PATCH 03/11] Update src/power_grid_model_io/converters/tabular_converter.py Copilot Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> Signed-off-by: Jerry Guo <6221579+Jerry-Jinfeng-Guo@users.noreply.github.com> --- src/power_grid_model_io/converters/tabular_converter.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/power_grid_model_io/converters/tabular_converter.py b/src/power_grid_model_io/converters/tabular_converter.py index 8a98c86f..373db46f 100644 --- a/src/power_grid_model_io/converters/tabular_converter.py +++ b/src/power_grid_model_io/converters/tabular_converter.py @@ -829,8 +829,8 @@ def _parse_col_def_composite( ) for sub_def in col_def ] - # Filter out empty DataFrames (from missing optional columns) - non_empty_columns = [col for col in columns if not col.empty and len(col.columns) > 0] + # Filter out DataFrames with no columns (from missing optional columns) + non_empty_columns = [col for col in columns if len(col.columns) > 0] if not non_empty_columns: # If all columns are missing, return an empty DataFrame with the correct number of rows table_data = data[table] From e375135169060124d3a6ec48b42daa3b1afe46c2 Mon Sep 17 00:00:00 2001 From: Jerry Guo Date: Thu, 27 Nov 2025 13:27:43 +0100 Subject: [PATCH 04/11] added tests for optional_extra fields Signed-off-by: Jerry Guo --- .../vision/vision_optional_extra_full.xlsx | Bin 0 -> 5020 bytes .../vision_optional_extra_full.xlsx.license | 3 + .../vision/vision_optional_extra_mapping.yaml | 30 ++++ ...vision_optional_extra_mapping.yaml.license | 3 + .../vision/vision_optional_extra_minimal.xlsx | Bin 0 -> 4952 bytes ...vision_optional_extra_minimal.xlsx.license | 3 + .../vision/vision_optional_extra_partial.xlsx | Bin 0 -> 4987 bytes ...vision_optional_extra_partial.xlsx.license | 3 + .../unit/converters/test_tabular_converter.py | 140 ++++++++++++++++++ .../converters/test_vision_excel_converter.py | 102 +++++++++++++ 10 files changed, 284 insertions(+) create mode 100644 tests/data/vision/vision_optional_extra_full.xlsx create mode 100644 tests/data/vision/vision_optional_extra_full.xlsx.license create mode 100644 tests/data/vision/vision_optional_extra_mapping.yaml create mode 100644 tests/data/vision/vision_optional_extra_mapping.yaml.license create mode 100644 tests/data/vision/vision_optional_extra_minimal.xlsx create mode 100644 tests/data/vision/vision_optional_extra_minimal.xlsx.license create mode 100644 tests/data/vision/vision_optional_extra_partial.xlsx create mode 100644 tests/data/vision/vision_optional_extra_partial.xlsx.license diff --git a/tests/data/vision/vision_optional_extra_full.xlsx b/tests/data/vision/vision_optional_extra_full.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..dba721930ee22126ed9d54c597da9b3734832da8 GIT binary patch literal 5020 zcmZ`-1yodB+Z_-@y1S%nKw3Iw=u$vnBm@Q+Iz}1^2}$V=X^<8e8W})RT0j^|LO@hX zI{Zfc-{mX+_sy((@0zvF-gC}g`-%JLsNcXK0{{TG0B`Ml!60Jb?}ji4D3okrl43^2X4#ELf_Y0Ov`n{XuN#OdBKI`Sda7o5V9roxk?G4jHolhT10& zIsicY@BLajyFq?#csK!}c0+(D%uLB`DO)oV=W%A#N*N|1c!igBQnYC{#LBAC+1bOf z@wrT3-@A*`Pqb;%_n-=08)VGvZfVY-r9-2PCTrz$X# zr(;+cn$fhQ;FE!5Wd=HU#!gPryS8k(f5&@aYK z=E>%_$kjM<{yBS+j`(I9v|sXNpIS)U&A zRf{o{*f7Sy+bTXJ;El2<(NGHh*96U$OeqZD*7=B%^2Vl^_rW(li_Ng-N&8;rvv-iw zt&MjUmmdnU63g>bmsIa|;uL9^HW)(z-D3U&q$J(*!BMx=y~u{9A1#Av?yyXMu178y zgg_ngsFbbD{Y`(?2DPRNJ!Vju-*qz~oUo1fS3pE|s~m|K*AjBZKL z3kEsy(;D6m7;!FH(=vYkQLn3~)6PcLz`k#MLM1SGz&{DIMfIp)lKhq+zNAF9;fROd zQP0=h=)pOdup`a#>YkSvd}t!dg$`p`w>76L`@6%NLV4`^0_d34hoiZw_~KVF540zb z{g<{gZL@Gk9G~eiANA$^}fZJhE7nY2!q_R<1h&TcYDS) zbo{F2;gQQ-2@{XvrboFNbZBm6^amX(2Q$5$uf9A?Z5=2S^2zp>7_m|`d~MAf zO}ud5T+1yi zjE=75M7y!HE*u&9A89K)vCNcp)-lf{fmoGt`*~Z|h{YOI@dZg`Z)Jz2+3C=s7h(&f zYMQ_;%VWxpS{FONyS9WH<@;rhHf`D?3S<_)|?4(kq^#xvQ*6x!t5YtY>QR zFQ9cSB{25kO(7ZtsG9akxb1|L&lTormIw}AJS{!C4Q(BWDFoWO%TUc4n^D&vF6VN*g5z}REu_mpaDgzP17 zWl9+YaWMCq8D!Rdym0QO^tS0#1^c4?7@xb_ReloODIUP*KU%cY{9QV4qVX<^zYStC z3cLt%0{5I(3Czt1^Kn`}bQ|vP)2mM8`XHX$`Q_49CTH*K*(_^Fb%qfy3q_oIP@Oz$ z(?!(aE_V<1rWKeCdq{5*ayODBM}gL9>%pN~^2~a5GiQ?&cx|gX4<-W~J(V)ss2|1^ zQfzcn{|ry+XkZsp`(FFbMT=nwPtryzfgbMpl3wKe$i!ofrHKDQs$vt3LdOlqmh5>6 zv7EH{W}M0Uan9YRTW`H+R>Ws9n*GxrI&ORzA3xtf=++eMNTtyCJ>Sn~`j!`#HrlNi z?(J)qW|Q2jBTVM#P{9fWHJUfg>;cJ^t;~5}Vm+(YMc==#Z!Q4ty}=Wg@23&1wCS>u z`p9f`JYf}C3~6wx=JKR=WWF!n1x+KhDIZ*f)FSlhNA4{ z6u2ZRmPwSwYWWA$4>~q@RD1Nr-Y5ydNm8lWRzLN*YCUKo@qiPe3pTfkm-#>hY>M}n zc=51-;O9mpE1D)>r#3RPs(hF|&Qd)Q=KGf}1ImQ!k{f2Z6 zB}RiOp|d2@-K3xtT+{iM5OAuZ&l_(6~DplXgCHx#*|H{43o z!EUbJ?c6o_vdH)NwnzBFbn%#`0_kwk zf;BB1srjoN{F<(w*pFYM-k|+mK+cO7^@&iG^#WBysD2j^PiHrKcUuSq=6>hb<5v~& zPhN=4!*pFrds9CZdHYG?*f*teTUGds7F8!s@B6f|4u3}l!-R(Bt{icjFQ>pS zHI^6iWPq+S^`Tcrv!O(Xqzc1jfXGbD^1OufXxGTltbIBW5v;5uc4OvIR~eQ_RfzIq zM&ur+RVAk?rlgYyZ&BJ0->`%yJje#;poa;R3Ya=%(_-NmJ$Z|+;)<5ZNjjx8UEEQM z@YipewfF9C`66h8s?wu*rv~C{>oVF3d62ra!6aG=`knjkK_W>-+^mgzHl@%P>fEa^ zzjB7-_94^QrXUfXfJ8kqp7V0YqPUN6vg(R+G)37R+IjP!g1EYol~<#jQe{EL;Lj>W zC#=CkV)3{Un-A4rbSkp0QRrc0-|VX!a4n^p+MHp(90C0`ok{y8m_h97=aK1B8?H?}a3Yhl>Y3^r6^DIRU(Zjr&*GK&w`P5piQgUEdM71C zPgr2OKSm8_@9tFGh|tWj+8N$8jFn1B%(=v3n9QoX#oz)Q7W1PgC@@|5wJh-emTkMd z99Sq@49!j3D732}5j5&G@{2z_Ijm~e!YDuYT<;GtuVep0#ng z2Nd3soz{YYAJ4sqfL@84f2Zm@;} z>uduH9^oDhPKlN`Ctz`O8&wsxfDUl*mCGwE8SWFS+*JR;(=}B3IHiD*H|B}$JIj#W zO+=ZVotk;FQn%Bj>rSYUuMRo~)snnJl35kKm5bX&%OhzkAttHY{fHJ&Q>#P)v{5(z zP|jr^)?3nO68rMl;MthU)7N3)bWYK_fJWuK_d>Co%Dx-t6R$sP_uWLkt9hS!3p>UF zMoRwuM@i%6dG5k%tq$WKkU%?)u>gYUC=L!|Z|t+8_7}3D1QGK}xfOV+mYM#2*$?V) zJ*W!zC!*(!L}U1O>~0+AFq)U`Ty&p;>iu-CAn1*=zyQ)1I+#edPuJ_-2XzsK!FZVFASu;3h~$dm91d9{Hg+no+@Fq35*C z`kGzbK7rJTtB~L>Imd9>vf4vqeI?x#J>_kQba~CgO5k4V5okROkDCB=)@nLAS$ubG zRr;m%*?5>v@7@ud*!dILa zo&xO!5LH-7kEAr?XI`$zS86QJi-b6Fa1f30pi(@3EF7-LU3$L*=|ShEGQX>m#&i6? zy_Xu9@nid^GxslBD8Nah)!a$m&?;ZbE&mqhoW##2pbDsZ+J(2;nBs`o$!k?;w* zdq_*g8e^c093QoHW4jeaiF@J0=Z|Kxi=>fvwB?-xt0gpC6`(5WLIk_R69Iz92Y4b% z>uM~?U>dK<^(&8M)$u%LLy8QgfysROq5%57qqxbfYD?yNAMoa>tF0bR?7Ssgw5vt^ zl}PXa;g_e^dVO;$BQ_y40Du{FP{Q9q_A^ymI6K?_46_*Bai=bRyo+RR#u|!xCVXAf zFgTMYUQERCSj&6n2%f&snCe{QE1C(@qon!SmU$Bj%9Y8W%_Ki>N((2uc|c670dv&DDm=nb8ftRD|p^vNk| z^79XgtTy8jPi`t!)JCvlPb)f!>M4BL-@LKy*3WMDxtJ@bfWXUy;0UD6rbqR1zEnfc z^qdQx1yimTY7a=0vLmc?*B)#Szx#Tf%Jm{=o$zlXu`*=lSE6)1L1|3$Ti@Sa`nSrz zeAN&yP4J8#FSMFu9U!7priiIwV#vcsgn7V1Lh`l>r}0iGTAT#z@#`GkfSm!YTw5|v zrpR*?P8L93xJJAguPeI(z=D!;ZJWyOk7*v+NmV8?;a~v4Xzr;#cX zHMP1?rM{X30%qd_8on&1HODn^L9^7RDZfgzF`hsQY0as0Jtb7{=3M5r zx@PcK0}?z@{mWT^e&%$CFRsW zinZ|(rbU*?o0W1B#+^QMD}&UENE%Fu()QI98cX!D+8eiRw#Q)tn(R6J(W9-i7YJ*d z{y|L~V)0sxfWB;&fdzdl78R@FYA>eOhhNU+wZP9h;)ho+@t}9tf-DR_jh^`SQvRSF z*(2`y_C~h0ZCZ;a;8WB`SUhX5W#aO)Srf9(UWvs|zI|F8t2>EZlh`O^nn zhh9(If1qP{f2Hy3!0UX&z|^RuiUR+Sn7__*J&^wJT%maW aOIYcsW1@O&000~Hl|iL&m7l{bz<&T;2DDrN literal 0 HcmV?d00001 diff --git a/tests/data/vision/vision_optional_extra_full.xlsx.license b/tests/data/vision/vision_optional_extra_full.xlsx.license new file mode 100644 index 00000000..76010591 --- /dev/null +++ b/tests/data/vision/vision_optional_extra_full.xlsx.license @@ -0,0 +1,3 @@ +SPDX-FileCopyrightText: Contributors to the Power Grid Model project + +SPDX-License-Identifier: MPL-2.0 diff --git a/tests/data/vision/vision_optional_extra_mapping.yaml b/tests/data/vision/vision_optional_extra_mapping.yaml new file mode 100644 index 00000000..4979d149 --- /dev/null +++ b/tests/data/vision/vision_optional_extra_mapping.yaml @@ -0,0 +1,30 @@ +# SPDX-FileCopyrightText: Contributors to the Power Grid Model project +# +# SPDX-License-Identifier: MPL-2.0 +--- +# Test mapping file for optional_extra feature with Vision Excel format +id_reference: + nodes_table: Nodes + number: Number + node_number: Node.Number + sub_number: Subnumber + +grid: + Nodes: + node: + id: + auto_id: + key: Number + u_rated: Unom + extra: + - ID + - Name + - optional_extra: + - GUID + - StationID + +units: + V: + kV: 1000.0 + +substitutions: {} diff --git a/tests/data/vision/vision_optional_extra_mapping.yaml.license b/tests/data/vision/vision_optional_extra_mapping.yaml.license new file mode 100644 index 00000000..76010591 --- /dev/null +++ b/tests/data/vision/vision_optional_extra_mapping.yaml.license @@ -0,0 +1,3 @@ +SPDX-FileCopyrightText: Contributors to the Power Grid Model project + +SPDX-License-Identifier: MPL-2.0 diff --git a/tests/data/vision/vision_optional_extra_minimal.xlsx b/tests/data/vision/vision_optional_extra_minimal.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..af66a5d79368820e4ac8ca1bd5a9ee165f86100d GIT binary patch literal 4952 zcmZ`-1yod9+a6lFr9l`{x*G`@5CK63B?O5XVCWcOC<*D3ZcrMekuDihK)QsXE+HTy zr6B!}y6(DH{`;M?&fe>+b)NU^{jBGW{p#Gu!lnWM05*N2 z+@eraGxdN$H_$^oq`T-jdA}N3j1Qz zh{;K7esI#_@6I|rb*I`5@1wM|RYQJ=d2#4eCa=^5{g^>`cdfLHcjQJ9^3 z+Ih1VVqRXDgWkp1#WehkiRR&r{*$w{)2`jyUtT=(K7JW382BdmsYgyb;7@YWo@4^a zZ~=f!E&zZMO%B3d5N-o?fc_qZf6}vKWb8I4MfH5Qaui{G~I0)usjQ zXtM;98${cCLgIq_-l<8D4JH%s#A`KY$YJwsoeit1ZEQ+;9)1EF04c0;fOMq81(n z!R&Kr)gWfRrhGPvqhMVlr>D1S*I7h=P!EEmp?MgTkCix^!fFpr?0e4V78Rf)TQW={ z0ggfp2Dki%or>19jl%u{cXoH!THkqO*EcqN~a6HnP5qnv;CA%~0oBg|dCH&ev*eK-V(OgAb;hSi^2NTD> zOWPSXnFP_s4<_t97A9iCd&@@=#O?bJLc1!$drL>*+b~Y9iwe8^t(!K=pw(?J3hKIb z#fe)0184W?593(LraY(566t4|S{9U^>=}HO4ky3t?ZKXgjngHIfn2jcg zaPGFb_NpjhMoW3vxs?xp{|sns9?In?OX(`ZXszrsa�|z#%fSdqS|SEoaU0D7ngH zWMnNn$`#nMaAe?n^gzuKI8)N`mTe{x#IE}4J%95Wxm4XfViC$aH?l%fZFLwi^YMj~ zb3%U9_Lh+^Q~EZv-*~?i9d=ymlrH}`$Kc*v&<`>Y{Ys|WC$};CGrR)O*pYtgd|rwH zHAV{UlGqz@zMg4BQpbLIrW_L`#ZV$4u2M(}J10en^m8qB%1hpYxy#H6#l1v9_EU|x zm$0|MA~@&JrYJoURLSrx)Mi}H>k{WAQ=EV?j)4i&n&B;oH3-(S$6U!ClV09Q&a13o zK_@})T6Z&`-lZhS%!s0Q452+R_WsLI+_T~7I1=)BQ%Ur4`|3@o zm_jkL2m#K16SKm)7lJ@CxwloPBG4P-YHUujv-C$`hqRx7??}OJ(>M8?@dintuQhTa z(s&W%Xxx2PAv`xDCctg^*mdZAAGk7s=cDwijxQHB3fcQRr?c!qmFb53z?-r61KujJ zH=aih?A`9h--H-*;17Z)ppp?3*~$!#TYBF$l4jN`o46b0jMug*bKnYmBPViCH)@9n zMO7MH?|-gOY_H>#()d>W!C9MmkVw{AE}p60<294`*^!C+8n8e}FGZ!1Ub!91p*d?_ zMk+fsu8CmcVXRZv$yN`7enol~yU928vBSp4v9Yraq;6H-u3R!xU)Vt|>(`u+)R8Wg zP*3ltsn$uoI$~4~_GRpRpa!$XnSDO0Wr!L7YrGegx|jzK_05Ehd$D+9bA2?UR5zVB zQl30r9gAN@6+-JAD|tNV9M~R8cfwLBtxE?Mq18xzrV)MPgQ|90`C4l2kxRq!_pgY^ zwQmwqs9Pq|6l&xi(&@Et@ZRgzmwKlvT2GNe+q(Lx&qZ6Wk;1*66jP+BMY_ZbDr{YN zu*6S<&u1KFNU@@2vNN@jky+uz=6;&ufiyd~aPC(lU6`prb8;`!0S>ZxObth#2V zXAe0BTon_T#E5&7Mr2LrYkVv&CB{%<|J^;zKu95@f9fe=+W}D%5A|@lg0!D1--{4! z({T<8s)#Bg99A0F)!p3$TaT!jQ@#L)Ag~Bzie^A194RmAs`3%caab|ONf?Y;0S~p% zw{x1^@A7iY;^(?TnNG^r(#V4j^U{`?--fzncr|vamR%W4GoOeoNjsxt7O_k}QSzze zKS%mTMEH}JTC_J;J~-jXl&$sN54e?aK8?yhT39CXnJhIbfbf#vxsT_=5|3*cW&BO- z=}Rh$^1|x!-Z^0+Pk)0$q8ux!rBKy$deN!KGU;dnC#5OH^*g9ZVw)pkF~&F?Q{l9F ziM%y!0=fCiUE-?F?wG%JBHv;BnLu#QKk0nH001O#007$G3B<$6)y~ZZ3Wd7~{+fPe z5#L0VW1bK?$>N+~Sj4pGa1Pu7R zF9)X$wwZ%FG;q8|q;;>D{rlS$$z`~*x9Y_O0tQ=w#t4kVx$?y5z0HSuB+7a`m^fMi z&sC50KYe?#9Iaf*wcYWoCfRi(u&-G$2MZIIVU;oLg3$~=N*Wls?{aW=HO_Gr?(2Rf z|IN|ckdpX*|02v`4s#*rbx6pUG|%J<7OC%tI0ob@15f%DbSg$fmKuX)QJ;&Qj}I*p zhxEK-d9J>m(1T;5Rh{WtrFRf}I|%CdkZ6%b;b_T4|Xj^FwlV-1}F&orA^Z$$2dN(a&r?SO)EFB1^!w8fHnV zU5*njyTPK~I+$FvOG@^MPb-)p&aUUpPvjw@ta7*BBbz~uEi!qq2Ho86ip~e{-l7JR znAgXTUW}?guL%idbd1skG^piFipFfJd2gJJ*L>XW!$p0ln#{O?A8i4rr2ckQ)UbK> zYN1BE-RKJHZ>u@#M=~AB#bxA)e_GJ?@=h>G_`K??GNKgA4Bx&iy|;w!wD||)QFDe8 z(L%enSjX8cW+l7lT_>PgADv4mX2UF>A7wNnTs+IGv!++??K!n|BVW-_kiodv3R}7t zcwvj|E-vO>CPDUA)uX>OfK*%-Fl^!5BuN-O;e5A;ufQYeWn30u$8Gi+PHDS%N<*G} z5=m;Vp^{~d$42_9y2)U*ZJ9JBt?%W0`zc4D^$?=lB%sq4(}{^f$+cDa*H))vAv#Ip z8S45P$MkXVrC444EtsB5tdds198=V*m>051pQv!OlLn%_UcmC-aASK2x8*_a!Hc?O z>|mqiU{#D8ipC8C9~1Fbnq zwEhUu^HW!-y_=xG&oL{$$+1g_NYSrnG+PL7lr>Bgx_=Y<1mjUI=|>8j2-U22m2(9y zv4nJ<@^TdO%rM0rsr>rL650#0hcKh5Mhx|Oj+e0EIOU2-K4$UIamBkROO+}kJ_SVq zI;(~@$ju_R{KsKWX0i(8QGySYoct?gG+mTo>i0!S_J+p&M2-)M#8uZdfJw&mh>7(} z_vL$IIcx?u(^dN?a+wPJnEH-lC$=gr*=oIvH&0w_zyvY#mK;$o7PXh+f&HXko?ole z&1eldMA2cE4ZTs)-%j>3%vv}(+5L30Xx%Z#P9dW6q}wc2H)~mmbxlL+S+$6w!;eRs zC)vVz`+}n@U!mU6kDDGP&d)Z_o7|*XnF!cS^zo#zaI}rH$}X%fMxqdE#MoKj?!^dY zg~Dp{Drgx-cMCin`gYQo$f}99u0C@SlY$$YhkKUXJLvv-mWPnEWjB^g;_m!64?NC& z+R;_&fq4jLxVVXF_`Zfx8H!qSc-=~b=%^#GhsA3q?YuVhqo@Tb%cJNutK<^1V8()_ z&F|%2axVl(i>bXpTphf;d!3UKv@uyxh3ZZg-{hk+}Uzmq{WNDYLm7;Ba zsdBcY3{S4TTB0D`IoNh;^awQ2oVKW zQmg~SbxKrlG))Y61;}s?ffN)y6$A}}!5FbJaPyjMe!ty*?N>Hb9*(c;57|L>xC4;E zFS~$zmrwlrlch9S7E<88KN@!ZWyA4MVAU(<3k(&Utr%Lx zuUH~-M!+wxBm61Y(;4b!@>59V30m4+LS*ROL5(>C z_zYebGMEvXIAd5opf9~lur?Y;iE7Vjyr!TI)+$3RwvJ?{lr6p_uiDJ|{xOkB*M|yI zA5YtYi49Rv%PM{EJiVSxqb|Xi?09hfg$|Mu@rK(waeGR%*43#5vE|92wWpQVVPPm2 z-TA>*gDy{8gJPx!xy_RJ`EGHO+o+i}n}C5%L}}2I6DH@+-m;1sAeHL4aMJ?Iq|I_g z8KVxbxs?Gr6%;*=OmW+4GQA~cNj26j>+LbPuoh>wP}E4v>^ahk;QfFW0l9QFwqIWs zuzx`x0#t_-Rw7tyzJEDW(l&n49yhdlK?IXr3$QTwH1fl{m*$FLc%QuU>$^MEt<&1{ zexD-$x?}2xW)cIF3h=+XAZX3}y8O}n{{NoHb@cU~#BVGB;Ez%DZ}i_CitF&}r}lr~ zL+IcC#~J>*f$LS<-v;UfNN)Tx@Sio@bt~61|KC>pF~|shS^1|9xDLG@xc`Ah5&aRy zuLG|K!he9z(8ULO>;HDx>)`8N{ttMH + +SPDX-License-Identifier: MPL-2.0 diff --git a/tests/data/vision/vision_optional_extra_partial.xlsx b/tests/data/vision/vision_optional_extra_partial.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..3346203d160872536242c553ffcd57c38e46e957 GIT binary patch literal 4987 zcmZ`-2Q(br`(8a*y)R32qFcR1i6y!O(Fqo-x9CFj9-ZjXMUT}Jv1$-qh%QPHL`(F@ zf0LZ=A1Yf5PP+Ry1#V6jF8BK>i;5&zDR_om`rk)Bos(MR zk8PzG(C=pU5)SV!eN57?1Zc#Eb}3Ah(;jByW zPJ_e--7Dc0&aH~3u#9YvbUOi#({%fz_^|0$42{lb53$}PpJQqLHQ#mEp#2xrJh{*T z0Fr;t*V5Sy_H)8RNk|n80pf5oMYp9K^=Q0@Surc+SjgZN9+nA_=9y3{t0re>PsgT6 znV`ORmuH`7Gp6pq)(ryf1o)uII`q_}-9NU0$6r=U|Ha^z>OPjWvOJ1ijxthdrU1I@v0G$FLcl zjr!r?SzC}NfVk1vx~L!uDx43A&>k{p2qP7zX&t)dfpEBo1ACH+04Y) z06;c706>N!$J>F=!xrWU`+XPqNzbmKk^8I|`QyEsQE!`LNBmxqCyfX)cev*q;#ugf z216K$ew>4kRbpu18zm9q!8C&1B=y!TDNOFIi(y5jjZHD1qc6M`o8gfuhu-Ehd~;K6 zO?->X4+L3Az-v4DOu(ysa7m>a6z)B;xSQq33dTQ3f`$B>@(E z;>1s@e=~5{xpYm#F!G~LXLpC4&22sVzOixTpy2+16s*>}$AuFVHv|bJC35tKJ^hcn zck^NgW<7))LCdQL-eQe|<1sFDn9JI2xt%#*9o`hl;UWs*qgEe|XR8xSUc}wkoHz+s z+Rn1g#)~u3oUr#=n23++ts3wgiCBDw1A!JLAR)1(PO-EtDJ2mzwqV`^Id zHFAy7%biK%#tBm+oDEu_8<}sPd{YW$?3u6m{w=+&zX;-+6Cg2cpArbKKr7vC%Nj|( zbl+UdD@~qOmmhX{#f^J#!EE#}g561y%uRyMM&5tq_&BkcRd8hQ3~yUQ%7#HNt=42@ zWGy$=jk#^%SU=!cQ^|>Wy1b*FX*vbUqL}xVr*(})tl=(!AldC3IpG<0T6E||xB_X< z!%q>tl_bk#0Zr{U-Y&&Oo>VwzNA3QR^il!A{f7oN_-Mp`cZSY4UyimQ0`Dtkh9KZTFwTqW@- zyq>w#gKcON0zyJ-Xdgw`j!XGoVVz|Q!FOH@V0&W8kYFXs!kFvc^L~D z5s+I0<&#F&@=$X_(%vy|jd29Yr9q`|vW)rLouR}>!&8ZbBuS>CsBed35=h?Ahl{CX z?iRS}V}8#y?YfKH3?8)p-uyWHJ;Z@K{zPe#N@b0Rt@QSmpb$=BrUCBjP(8c67ZE}- zW%Po0SO+ciGV8wHc%o^&ue7Rz{m_1l&5CwboCbG@2l57t6z{csm3}_nB+49MgPe#l zT7)_obzf8q%uWmOa#%ia8+zNPQ*zfg8Q@!NM2ECZ21|#r*fv6`Dcv?HG=&Ir9=? zxfzKqcoX*$oV(7pdb~j^;xm{n0T~Y*H$IGwU2Gt=YYX?J(!hO@hXssZo`+|QbSXsm z_?czcr1okFkvlq6vT#G2%$uhVxXG8T%z5&0LTa?p5AW%k3mElca3vJ@tHmmAx@@Ey zo2`x|tL$DIH?p#;eVIJZ)4h=9hu>ZLm5A0QH_X18C{#Z_URXVMN|sbs%l7FZ zLGw^VXHKQVrlb;F)B2K>fK7%rl+u4^A3fNrgf1xK9RKwpu!WOixJpJmP?0+%T*Gvn zm6SZX7Kp`2<+i%Fmu%-1JAKam#4%Jy(CR(afI<{f8set#L5KCIdX^18Y;Hwos14N4 zX0F=h>y*R8{$tK`QW`-e4LvG+woG3i;hyE&+^JakLw}0?OmIouWlmxd!}Jpww?ffl zWI%Lu5J`naduxs68EdvA!tdb8&8*9*xuWBRWuX6Lg<-K37s+i^9Djx+Y|B`quR>-| z$t|i%>Z*EY1%N(5`bWU$)?!N$ikTqD`VjJqKXI5@*vEFtPo=w7LC?C`9C>FCSXS2zMeuE7iBm)2OYNk+m~%-rl?&2-h`b$x zKIv5)Z8YT#zADpqjp{-7_dA3ElyeZR|TgTbx9s4!E#Rqo;;PXQC zZnU|*mC3?z^Dgk1cJ$dB_A-OKX4R8yZ&n!VBlKd|7*%hK7DrKIMO(Li+%PEdlo(xf zcYv{sRpJy;uowF4j2y7~OUs@7kaQ_mX=&og+Vokpil@w#`PB`Hpw*GhEsY1Cd(X~x zuB2+Y(!i9p7T<9CVX4haChQUDUJd@R@0!#;GC0PTQ4<;Uiyt!~aVO{bdwLS%n0DcF1Gc#G6Wj&kc|0)sXok4(AMW_JKtoN^$m4 zmordC+u5G|sH+Y4Yy8By1`lMKx0j&AHAJp5aG}qGrf}7NJ|?p%l6SD8;~0_KBosiQ zCq0OqOqc-;#IKn-@wNbf>>V!HAI;FhPtX)3^PSNVpDFUo8S8~(gRa0Gw&=0e#7K(e zSD$z~x!T_jQ)Qf1`u5n^y>%d9HZxyQsNXFR-QattI)0-xT;9hg#(5wmuW^hsp%N^z z7k{^Z)1`6}9j6-Wg(*MV$P?4>2{ZX~hQCGPe^fZHjViy6aRC5=-y(7M@Ns~-|Ew<- z^yZ@H_=&$$d?YGM&JcFJd#I&g{+tKDjVM6ZLY!oCW#BOB>$W7bxMzKZ0Y^ZE`u;n* zoEBP}iHi!-5gSISHe1-aqvvt^MfS$`18$+c+cO%}vWDA2C;6@S7riw0OO%rI&g|+q z+=q($J!v7EuaBiAXOX9ljc*SZN*b&ix$A9%3XSoP2i}WRv?SrMcNtU{w?dEb2$U)+ zE$Q!(C}XSs;OZPKdze!ke@UzR9{Adi0hynZ)Ox=!J|bKYke=LFW{!4QNt& zJ_(86RPx)n7=QU;yAONrUF~Gn4cs^j4>F3cKT4Z6FY*>%YP1{vfCbs9jRq1<#jvv* z`rw`yzkYf8rD_MSKSv{SKIq<(1;`M-^sr zb_+PBz%9a5w4NyLTf;weMlvheEp(i=SzfY<+b5A3a264YQm_w|FRMH-)K%0@(^1-% zcqXU*t%~~~{TR9)4!lJOJ#RCem?#llTb0hYJ|7F$N*&Kq)>SzHC3-9+XzOml@4F_* zsXv(o$L7U{NTz-w$I?h0i1Q7B7ro%X^b&Yo2)pZ1+AS%~@R^4*`h^N}WHE#T4-eUt z2rUEhPc>T;fQhBJ9Cx_wT9d>vb;3bO|PveOWlheL>f=$6id(X zY05bV)kvth%EOgaA%y!w}>zj&EmAjoI3e|m#McHYAF$n1lp$Ijg0ERxTurS)=8!)uD-Cin!LFepmEdVl=+#~ zc@s*im5C>tDgHiG7EX4F*108hWym>iB?8PGo$keGd6|;BhqbUuwC*;KOj!M-5zxAY zx}h<95uKC+laphH#4l9!GRKQw+_D=(B4uy>s}~O20rluA#lXB3Ta>VgY1Dy=T;&{v z+VHxyAn>>&xQD@a`q?ES;seBjh(RxI%{r~zJdCb*Y4cl^uT%&xQ5l6Vl)ZzCYrk_+ zggQPawnW+4;;VG*hSo~cCRHWQJLuoWunqS$b!6r4nK0^fn zSL3hkh?6*4(8b&1>RLL7CHN%KpcXiS z@q0gLq~Pr20dw*&)A4bExtshHQdP3LMi(lR*O0=DSb4ei^Gj&W@l9ONEHyzDSIIVp z<8u&=S(SWJiZJy`@5NUmx#^XQPf2PwbH05@0c-n{qwC^mSb&+ls}YRicP}#=nN%8* zjfhVM*F&_BWZo}0{8F~xLlADx<=$I9wCeln&pIp&q~bc?*{RSJ3agM#_aL_!Qa;}) zYjGbnw_)Pd*NUzPH9iBoeD;%+Re>thB}SPRTc&PS$x0Y@_|C2j&?wA-uq4V}ucm=4 z(aY;FZrW^*c?hVpG-hVf;GqdLaA*%s>?%sHOkgVXuR) wd-)&mDd8{h|M2 + +SPDX-License-Identifier: MPL-2.0 diff --git a/tests/unit/converters/test_tabular_converter.py b/tests/unit/converters/test_tabular_converter.py index c96d8eac..8e0ed031 100644 --- a/tests/unit/converters/test_tabular_converter.py +++ b/tests/unit/converters/test_tabular_converter.py @@ -1532,3 +1532,143 @@ def test_optional_extra__integration(): assert extra_info[node_0_id]["ID"] == "N1" assert extra_info[node_0_id]["Name"] == "Node 1" assert extra_info[node_0_id]["GUID"] == "guid-1" + + +def test_optional_extra__with_table_mask(converter: TabularConverter): + """Test optional_extra works correctly with table filtering/masking""" + # Arrange + data = TabularData( + test_table=pd.DataFrame( + { + "id": [1, 2, 3, 4], + "value": [10, 20, 30, 40], + "guid": ["g1", "g2", "g3", "g4"], + "name": ["n1", "n2", "n3", "n4"], + } + ) + ) + # Create a mask that filters to only rows 0 and 2 + table_mask = np.array([True, False, True, False]) + col_def = {"optional_extra": ["guid", "station"]} # 'station' is missing + + # Act + result = converter._parse_col_def( + data=data, table="test_table", col_def=col_def, table_mask=table_mask, extra_info=None, allow_missing=False + ) + + # Assert - should only have 2 rows (from the mask) and 1 column (guid) + assert len(result) == 2 + assert list(result.columns) == ["guid"] + assert list(result["guid"]) == ["g1", "g3"] + + +def test_optional_extra__nested_in_list(converter: TabularConverter): + """Test optional_extra can be nested within a regular list of columns""" + # Arrange + data = TabularData( + test_table=pd.DataFrame( + {"id": [1, 2], "name": ["n1", "n2"], "value": [100, 200], "guid": ["g1", "g2"]} # station missing + ) + ) + col_def = ["name", "value", {"optional_extra": ["guid", "station"]}] + + # Act + result = converter._parse_col_def( + data=data, table="test_table", col_def=col_def, table_mask=None, extra_info=None, allow_missing=False + ) + + # Assert + assert list(result.columns) == ["name", "value", "guid"] + assert list(result["name"]) == ["n1", "n2"] + assert list(result["value"]) == [100, 200] + assert list(result["guid"]) == ["g1", "g2"] + + +def test_optional_extra__with_pipe_separated_columns(converter: TabularConverter): + """Test optional_extra with pipe-separated alternative column names""" + # Arrange + data = TabularData(test_table=pd.DataFrame({"id": [1, 2], "GUID": ["g1", "g2"], "name": ["n1", "n2"]})) + # Use pipe separator for alternative column names (GUID or Guid) + col_def = {"optional_extra": ["GUID|Guid", "StationID|Station"]} # Both StationID and Station missing + + # Act + result = converter._parse_col_def( + data=data, table="test_table", col_def=col_def, table_mask=None, extra_info=None, allow_missing=False + ) + + # Assert - GUID should be found, Station alternatives should be skipped + assert list(result.columns) == ["GUID"] + assert list(result["GUID"]) == ["g1", "g2"] + + +def test_optional_extra__empty_string_values(converter: TabularConverter): + """Test that optional_extra handles empty strings correctly""" + # Arrange + data = TabularData(test_table=pd.DataFrame({"id": [1, 2, 3], "guid": ["g1", "", "g3"], "name": ["n1", "n2", ""]})) + uuids = np.array([100, 200, 300]) + extra_info: ExtraInfo = {} + col_def = {"optional_extra": ["guid", "name"]} + + # Act + converter._handle_extra_info( + data=data, table="test_table", col_def=col_def, uuids=uuids, table_mask=None, extra_info=extra_info + ) + + # Assert - empty strings should still be included (not filtered as NaN) + assert 100 in extra_info + assert 200 in extra_info + assert 300 in extra_info + assert extra_info[100]["guid"] == "g1" + assert extra_info[100]["name"] == "n1" + assert extra_info[200]["guid"] == "" # Empty string preserved + assert extra_info[200]["name"] == "n2" + assert extra_info[300]["guid"] == "g3" + assert extra_info[300]["name"] == "" # Empty string preserved + + +def test_optional_extra__with_nan_values(converter: TabularConverter): + """Test that optional_extra filters out NaN values correctly""" + # Arrange + data = TabularData( + test_table=pd.DataFrame({"id": [1, 2, 3], "guid": ["g1", np.nan, "g3"], "value": [10.0, 20.0, np.nan]}) + ) + uuids = np.array([100, 200, 300]) + extra_info: ExtraInfo = {} + col_def = {"optional_extra": ["guid", "value"]} + + # Act + converter._handle_extra_info( + data=data, table="test_table", col_def=col_def, uuids=uuids, table_mask=None, extra_info=extra_info + ) + + # Assert - NaN values should be filtered out + assert 100 in extra_info + assert extra_info[100] == {"guid": "g1", "value": 10.0} + + assert 200 in extra_info + assert extra_info[200] == {"value": 20.0} # guid was NaN, filtered out + + assert 300 in extra_info + assert extra_info[300] == {"guid": "g3"} # value was NaN, filtered out + + +def test_optional_extra__multiple_optional_extra_sections(): + """Test behavior when multiple optional_extra sections are used (should work independently)""" + # Arrange + converter = TabularConverter(mapping_file=MAPPING_FILE) + data = TabularData( + test_table=pd.DataFrame( + {"id": [1, 2], "name": ["n1", "n2"], "guid": ["g1", "g2"]} # station and zone missing + ) + ) + # Two separate optional_extra sections + col_def = [{"optional_extra": ["guid"]}, {"optional_extra": ["station", "zone"]}] + + # Act + result = converter._parse_col_def( + data=data, table="test_table", col_def=col_def, table_mask=None, extra_info=None, allow_missing=False + ) + + # Assert - only guid should be present + assert list(result.columns) == ["guid"] + assert list(result["guid"]) == ["g1", "g2"] diff --git a/tests/unit/converters/test_vision_excel_converter.py b/tests/unit/converters/test_vision_excel_converter.py index 99bcfaec..a783c401 100644 --- a/tests/unit/converters/test_vision_excel_converter.py +++ b/tests/unit/converters/test_vision_excel_converter.py @@ -147,3 +147,105 @@ def test_ambiguity_in_vision_excel(): excel_file_checker = ExcelAmbiguityChecker(file_path=ambiguious_test_file.as_posix()) res, _ = excel_file_checker.check_ambiguity() assert res + + +def test_optional_extra_all_columns_present(): + """Test Vision Excel conversion with all optional columns present""" + # Arrange + test_file = Path(__file__).parents[2] / "data" / "vision" / "vision_optional_extra_full.xlsx" + mapping_file = Path(__file__).parents[2] / "data" / "vision" / "vision_optional_extra_mapping.yaml" + + from power_grid_model import ComponentType + + converter = VisionExcelConverter(source_file=test_file, mapping_file=mapping_file) + + # Act + result, extra_info = converter.load_input_data() + + # Assert + assert ComponentType.node in result + assert len(result[ComponentType.node]) == 3 + + # Check that all extra fields are present (including optional ones) + for node in result[ComponentType.node]: + node_id = node["id"] + assert node_id in extra_info + assert "ID" in extra_info[node_id] + assert "Name" in extra_info[node_id] + assert "GUID" in extra_info[node_id] # Optional but present + assert "StationID" in extra_info[node_id] # Optional but present + + # Verify specific values + node_0_id = result[ComponentType.node][0]["id"] + assert extra_info[node_0_id]["ID"] == "N001" + assert extra_info[node_0_id]["Name"] == "Node1" + assert extra_info[node_0_id]["GUID"] == "guid-001" + assert extra_info[node_0_id]["StationID"] == "ST1" + + +def test_optional_extra_some_columns_missing(): + """Test Vision Excel conversion with some optional columns missing""" + # Arrange + test_file = Path(__file__).parents[2] / "data" / "vision" / "vision_optional_extra_partial.xlsx" + mapping_file = Path(__file__).parents[2] / "data" / "vision" / "vision_optional_extra_mapping.yaml" + + from power_grid_model import ComponentType + + converter = VisionExcelConverter(source_file=test_file, mapping_file=mapping_file) + + # Act + result, extra_info = converter.load_input_data() + + # Assert + assert ComponentType.node in result + assert len(result[ComponentType.node]) == 3 + + # Check that required and present optional fields are included + for node in result[ComponentType.node]: + node_id = node["id"] + assert node_id in extra_info + assert "ID" in extra_info[node_id] + assert "Name" in extra_info[node_id] + assert "GUID" in extra_info[node_id] # Optional and present + assert "StationID" not in extra_info[node_id] # Optional and missing - should not be present + + # Verify specific values + node_1_id = result[ComponentType.node][1]["id"] + assert extra_info[node_1_id]["ID"] == "N002" + assert extra_info[node_1_id]["Name"] == "Node2" + assert extra_info[node_1_id]["GUID"] == "guid-002" + + +def test_optional_extra_all_optional_missing(): + """Test Vision Excel conversion with all optional columns missing""" + # Arrange + test_file = Path(__file__).parents[2] / "data" / "vision" / "vision_optional_extra_minimal.xlsx" + mapping_file = Path(__file__).parents[2] / "data" / "vision" / "vision_optional_extra_mapping.yaml" + + from power_grid_model import ComponentType + + converter = VisionExcelConverter(source_file=test_file, mapping_file=mapping_file) + + # Act + result, extra_info = converter.load_input_data() + + # Assert + assert ComponentType.node in result + assert len(result[ComponentType.node]) == 3 + + # Check that only required fields are present + for node in result[ComponentType.node]: + node_id = node["id"] + assert node_id in extra_info + assert "ID" in extra_info[node_id] + assert "Name" in extra_info[node_id] + assert "GUID" not in extra_info[node_id] # Optional and missing + assert "StationID" not in extra_info[node_id] # Optional and missing + + # Verify specific values + node_2_id = result[ComponentType.node][2]["id"] + assert extra_info[node_2_id]["ID"] == "N003" + assert extra_info[node_2_id]["Name"] == "Node3" + # Check that optional fields are not present (only ID, Name, and id_reference) + assert "GUID" not in extra_info[node_2_id] + assert "StationID" not in extra_info[node_2_id] From d357c8a60fb34cfa5b6aaaf29ca41e7bf59d6f2a Mon Sep 17 00:00:00 2001 From: Jerry Guo Date: Thu, 27 Nov 2025 14:35:40 +0100 Subject: [PATCH 05/11] append unit test for coverage Signed-off-by: Jerry Guo --- .../unit/converters/test_tabular_converter.py | 24 +++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/tests/unit/converters/test_tabular_converter.py b/tests/unit/converters/test_tabular_converter.py index 8e0ed031..fcab9777 100644 --- a/tests/unit/converters/test_tabular_converter.py +++ b/tests/unit/converters/test_tabular_converter.py @@ -1672,3 +1672,27 @@ def test_optional_extra__multiple_optional_extra_sections(): # Assert - only guid should be present assert list(result.columns) == ["guid"] assert list(result["guid"]) == ["g1", "g2"] + + +def test_convert_col_def_to_attribute__pgm_data_without_dtype_names(): + """Test error handling when pgm_data has no dtype.names (unusual edge case)""" + # Arrange + converter = TabularConverter(mapping_file=MAPPING_FILE) + data = TabularData(test_table=pd.DataFrame({"id": [1, 2], "name": ["n1", "n2"]})) + + # Create a mock array without dtype.names by using a plain ndarray + pgm_data = np.array([1, 2]) # Regular array without structured dtype + assert pgm_data.dtype.names is None + + # Act & Assert + with pytest.raises(ValueError, match="pgm_data for 'nodes' has no attributes defined"): + converter._convert_col_def_to_attribute( + data=data, + pgm_data=pgm_data, + table="test_table", + component="node", + attr="id", + col_def="id", + table_mask=None, + extra_info=None, + ) From ef1ce320c0d597472d419fe62528c451d107ec65 Mon Sep 17 00:00:00 2001 From: Jerry Guo <6221579+Jerry-Jinfeng-Guo@users.noreply.github.com> Date: Thu, 27 Nov 2025 14:36:09 +0100 Subject: [PATCH 06/11] Update src/power_grid_model_io/converters/tabular_converter.py Copilot Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> Signed-off-by: Jerry Guo <6221579+Jerry-Jinfeng-Guo@users.noreply.github.com> --- src/power_grid_model_io/converters/tabular_converter.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/power_grid_model_io/converters/tabular_converter.py b/src/power_grid_model_io/converters/tabular_converter.py index 373db46f..8e8241a3 100644 --- a/src/power_grid_model_io/converters/tabular_converter.py +++ b/src/power_grid_model_io/converters/tabular_converter.py @@ -837,7 +837,7 @@ def _parse_col_def_composite( if table_mask is not None: table_data = table_data[table_mask] n_rows = len(table_data) - return pd.DataFrame(index=range(n_rows)) + return pd.DataFrame(index=table_data.index) return pd.concat(non_empty_columns, axis=1) def _get_id(self, table: str, key: Mapping[str, int], name: Optional[str]) -> int: From 826b26668b9fd8a1aefd8458416272d0391f45c0 Mon Sep 17 00:00:00 2001 From: Jerry Guo <6221579+Jerry-Jinfeng-Guo@users.noreply.github.com> Date: Thu, 27 Nov 2025 14:36:17 +0100 Subject: [PATCH 07/11] Update src/power_grid_model_io/converters/tabular_converter.py Copilot Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> Signed-off-by: Jerry Guo <6221579+Jerry-Jinfeng-Guo@users.noreply.github.com> --- src/power_grid_model_io/converters/tabular_converter.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/power_grid_model_io/converters/tabular_converter.py b/src/power_grid_model_io/converters/tabular_converter.py index 8e8241a3..47f5f73a 100644 --- a/src/power_grid_model_io/converters/tabular_converter.py +++ b/src/power_grid_model_io/converters/tabular_converter.py @@ -511,7 +511,7 @@ def _parse_col_def_column_name( columns=" or ".join(f"'{col_name}'" for col_name in columns), ) n_rows = len(table_data) - return pd.DataFrame(index=range(n_rows)) + return pd.DataFrame(index=table_data.index) columns_str = " and ".join(f"'{col_name}'" for col_name in columns) raise KeyError(f"Could not find column {columns_str} on table '{table}'") From a9bf00e425a59efe27e71b15359207cd787d3140 Mon Sep 17 00:00:00 2001 From: Jerry Guo Date: Thu, 27 Nov 2025 14:38:14 +0100 Subject: [PATCH 08/11] remove unused variable Signed-off-by: Jerry Guo --- src/power_grid_model_io/converters/tabular_converter.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/power_grid_model_io/converters/tabular_converter.py b/src/power_grid_model_io/converters/tabular_converter.py index 47f5f73a..e7221e55 100644 --- a/src/power_grid_model_io/converters/tabular_converter.py +++ b/src/power_grid_model_io/converters/tabular_converter.py @@ -510,7 +510,6 @@ def _parse_col_def_column_name( table=table, columns=" or ".join(f"'{col_name}'" for col_name in columns), ) - n_rows = len(table_data) return pd.DataFrame(index=table_data.index) columns_str = " and ".join(f"'{col_name}'" for col_name in columns) raise KeyError(f"Could not find column {columns_str} on table '{table}'") @@ -836,7 +835,6 @@ def _parse_col_def_composite( table_data = data[table] if table_mask is not None: table_data = table_data[table_mask] - n_rows = len(table_data) return pd.DataFrame(index=table_data.index) return pd.concat(non_empty_columns, axis=1) From c9b38166b0f421706a2091f3a79d09cea640f3cb Mon Sep 17 00:00:00 2001 From: Jerry Guo <6221579+Jerry-Jinfeng-Guo@users.noreply.github.com> Date: Mon, 1 Dec 2025 21:09:18 +0100 Subject: [PATCH 09/11] Update src/power_grid_model_io/converters/tabular_converter.py Comments from Martijn Co-authored-by: Martijn Govers Signed-off-by: Jerry Guo <6221579+Jerry-Jinfeng-Guo@users.noreply.github.com> --- src/power_grid_model_io/converters/tabular_converter.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/power_grid_model_io/converters/tabular_converter.py b/src/power_grid_model_io/converters/tabular_converter.py index e7221e55..c4dc7e0b 100644 --- a/src/power_grid_model_io/converters/tabular_converter.py +++ b/src/power_grid_model_io/converters/tabular_converter.py @@ -512,7 +512,7 @@ def _parse_col_def_column_name( ) return pd.DataFrame(index=table_data.index) columns_str = " and ".join(f"'{col_name}'" for col_name in columns) - raise KeyError(f"Could not find column {columns_str} on table '{table}'") + raise KeyError(f"Could not find column {columns_str} on table '{table}'") from e return self._parse_col_def_const(data=data, table=table, col_def=const_value, table_mask=table_mask) From 1e4b625adfadf772d30f8bda030c74fac3ab73eb Mon Sep 17 00:00:00 2001 From: Jerry Guo <6221579+Jerry-Jinfeng-Guo@users.noreply.github.com> Date: Mon, 1 Dec 2025 21:09:28 +0100 Subject: [PATCH 10/11] Update src/power_grid_model_io/converters/tabular_converter.py Comments from Martijn Co-authored-by: Martijn Govers Signed-off-by: Jerry Guo <6221579+Jerry-Jinfeng-Guo@users.noreply.github.com> --- src/power_grid_model_io/converters/tabular_converter.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/power_grid_model_io/converters/tabular_converter.py b/src/power_grid_model_io/converters/tabular_converter.py index c4dc7e0b..90ec674c 100644 --- a/src/power_grid_model_io/converters/tabular_converter.py +++ b/src/power_grid_model_io/converters/tabular_converter.py @@ -501,8 +501,7 @@ def _parse_col_def_column_name( try: # Maybe it is not a column name, but a float value like 'inf', let's try to convert the string to a float const_value = float(col_def) - except ValueError: - # pylint: disable=raise-missing-from + except ValueError as e: if allow_missing: # Return empty DataFrame with correct number of rows when column is optional and missing self._log.debug( From 2f84ac9b5d0ad5582a01391250e973148aa46aa8 Mon Sep 17 00:00:00 2001 From: Jerry Guo Date: Mon, 1 Dec 2025 21:28:44 +0100 Subject: [PATCH 11/11] made kwarg; add specfic tests Signed-off-by: Jerry Guo --- docs/converters/vision_converter.md | 15 + .../converters/tabular_converter.py | 57 +++- .../unit/converters/test_tabular_converter.py | 323 ++++++++++++++++++ 3 files changed, 393 insertions(+), 2 deletions(-) diff --git a/docs/converters/vision_converter.md b/docs/converters/vision_converter.md index 9d53ff35..e845e2b9 100644 --- a/docs/converters/vision_converter.md +++ b/docs/converters/vision_converter.md @@ -79,6 +79,21 @@ grid: - If some optional columns are present and others missing, only the present ones are included in `extra_info` - This feature is particularly useful for handling different Vision export configurations or versions +**Duplicate handling:** +When a column appears in both the regular `extra` list and within `optional_extra`, the regular `extra` entry takes precedence and duplicates are automatically eliminated from `optional_extra`: + +```yaml +extra: + - ID # Regular column - always processed + - Name # Regular column - always processed + - optional_extra: + - ID # Duplicate - automatically removed + - GUID # Unique optional - processed if present + - StationID # Unique optional - processed if present +``` + +In this example, `ID` will only be processed once (from the regular `extra` list), while `GUID` and `StationID` are processed as optional columns. This prevents duplicate data in the resulting `extra_info` and ensures consistent behavior regardless of column ordering. + ## Common/Known issues related to Vision So far we have the following issue known to us related to Vision exported spread sheets. We provide a solution from user perspective to the best of our knowledge. diff --git a/src/power_grid_model_io/converters/tabular_converter.py b/src/power_grid_model_io/converters/tabular_converter.py index 90ec674c..55b83bad 100644 --- a/src/power_grid_model_io/converters/tabular_converter.py +++ b/src/power_grid_model_io/converters/tabular_converter.py @@ -337,11 +337,14 @@ def _handle_extra_info( # pylint: disable = too-many-arguments,too-many-positio if extra_info is None: return + # Normalize col_def to handle deduplication when optional_extra contains columns also in regular extra + normalized_col_def = self._normalize_extra_col_def(col_def) + extra = self._parse_col_def( data=data, table=table, table_mask=table_mask, - col_def=col_def, + col_def=normalized_col_def, extra_info=None, ).to_dict(orient="records") for i, xtr in zip(uuids, extra): @@ -356,6 +359,55 @@ def _handle_extra_info( # pylint: disable = too-many-arguments,too-many-positio else: extra_info[i] = xtr + def _normalize_extra_col_def(self, col_def: Any) -> Any: + """ + Normalize extra column definition to eliminate duplicates between regular columns and optional_extra. + Regular columns take precedence over optional_extra columns. + + Args: + col_def: Column definition for extra info that may contain optional_extra sections + + Returns: + Normalized column definition with duplicates removed from optional_extra + """ + if not isinstance(col_def, list): + return col_def + + # Collect all non-optional_extra column names + regular_columns = set() + normalized_list = [] + + for item in col_def: + if isinstance(item, dict) and len(item) == 1 and "optional_extra" in item: + # This is an optional_extra section - we'll process it later + normalized_list.append(item) + else: + # This is a regular column + if isinstance(item, str): + regular_columns.add(item) + normalized_list.append(item) + + # Now process optional_extra sections and remove duplicates + final_list = [] + for item in normalized_list: + if isinstance(item, dict) and len(item) == 1 and "optional_extra" in item: + optional_cols = item["optional_extra"] + if isinstance(optional_cols, list): + # Filter out columns that are already in regular columns + filtered_optional_cols = [ + col for col in optional_cols if not isinstance(col, str) or col not in regular_columns + ] + # Only include the optional_extra section if it has remaining columns + if filtered_optional_cols: + final_list.append({"optional_extra": filtered_optional_cols}) + else: + # Keep non-list optional_extra as-is (shouldn't happen but be safe) + final_list.append(item) + else: + final_list.append(item) + + return final_list + @staticmethod def _merge_pgm_data(data: Dict[str, List[np.ndarray]]) -> Dict[str, np.ndarray]: """During the conversion, multiple numpy arrays can be produced for the same type of component. These arrays @@ -396,6 +448,7 @@ def _parse_col_def( # pylint: disable = too-many-arguments,too-many-positional- col_def: Any, table_mask: Optional[np.ndarray], extra_info: Optional[ExtraInfo], + *, allow_missing: bool = False, ) -> pd.DataFrame: """Interpret the column definition and extract/convert/create the data as a pandas DataFrame. @@ -501,7 +554,7 @@ def _parse_col_def_column_name( try: # Maybe it is not a column name, but a float value like 'inf', let's try to convert the string to a float const_value = float(col_def) - except ValueError as e: + except ValueError as e: if allow_missing: # Return empty DataFrame with correct number of rows when column is optional and missing self._log.debug( diff --git a/tests/unit/converters/test_tabular_converter.py b/tests/unit/converters/test_tabular_converter.py index fcab9777..c475e61a 100644 --- a/tests/unit/converters/test_tabular_converter.py +++ b/tests/unit/converters/test_tabular_converter.py @@ -1696,3 +1696,326 @@ def test_convert_col_def_to_attribute__pgm_data_without_dtype_names(): table_mask=None, extra_info=None, ) + + +def test_parse_col_def_with_allow_missing(): + """Test _parse_col_def function with allow_missing parameter both True and False""" + # Arrange + converter = TabularConverter(mapping_file=MAPPING_FILE) + data = TabularData(test_table=pd.DataFrame({"existing_col": [1, 2, 3], "another_col": ["a", "b", "c"]})) + + # Test 1: String column with allow_missing=False (default) - existing column + result = converter._parse_col_def( + data=data, + table="test_table", + col_def="existing_col", + table_mask=None, + extra_info=None, + allow_missing=False, + ) + assert list(result.iloc[:, 0]) == [1, 2, 3] + + # Test 2: String column with allow_missing=False - missing column (should raise KeyError) + with pytest.raises(KeyError, match="Could not find column 'missing_col' on table 'test_table'"): + converter._parse_col_def( + data=data, + table="test_table", + col_def="missing_col", + table_mask=None, + extra_info=None, + allow_missing=False, + ) + + # Test 3: String column with allow_missing=True - missing column (should return empty DataFrame) + result = converter._parse_col_def( + data=data, + table="test_table", + col_def="missing_col", + table_mask=None, + extra_info=None, + allow_missing=True, + ) + assert len(result.columns) == 0 + assert len(result) == 3 # Should have same number of rows as original table + + # Test 4: String column with allow_missing=True - existing column (should work normally) + result = converter._parse_col_def( + data=data, + table="test_table", + col_def="existing_col", + table_mask=None, + extra_info=None, + allow_missing=True, + ) + assert list(result.iloc[:, 0]) == [1, 2, 3] + + # Test 5: List (composite) with allow_missing=False - all existing columns + result = converter._parse_col_def( + data=data, + table="test_table", + col_def=["existing_col", "another_col"], + table_mask=None, + extra_info=None, + allow_missing=False, + ) + assert len(result.columns) == 2 + assert list(result["existing_col"]) == [1, 2, 3] + assert list(result["another_col"]) == ["a", "b", "c"] + + # Test 6: List (composite) with allow_missing=False - some missing columns (should raise error) + with pytest.raises(KeyError, match="Could not find column 'missing_col' on table 'test_table'"): + converter._parse_col_def( + data=data, + table="test_table", + col_def=["existing_col", "missing_col"], + table_mask=None, + extra_info=None, + allow_missing=False, + ) + + # Test 7: List (composite) with allow_missing=True - some missing columns (should skip missing) + result = converter._parse_col_def( + data=data, + table="test_table", + col_def=["existing_col", "missing_col", "another_col"], + table_mask=None, + extra_info=None, + allow_missing=True, + ) + assert len(result.columns) == 2 # Only existing columns should be present + assert list(result["existing_col"]) == [1, 2, 3] + assert list(result["another_col"]) == ["a", "b", "c"] + + # Test 8: List (composite) with allow_missing=True - all missing columns (should return empty with correct rows) + result = converter._parse_col_def( + data=data, + table="test_table", + col_def=["missing_col1", "missing_col2"], + table_mask=None, + extra_info=None, + allow_missing=True, + ) + assert len(result.columns) == 0 + assert len(result) == 3 # Should have same number of rows as original table + + # Test 9: Dict (optional_extra) - should automatically set allow_missing=True internally + result = converter._parse_col_def( + data=data, + table="test_table", + col_def={"optional_extra": ["existing_col", "missing_col"]}, + table_mask=None, + extra_info=None, + allow_missing=False, # This should be ignored for optional_extra + ) + assert len(result.columns) == 1 # Only existing column should be present + assert list(result["existing_col"]) == [1, 2, 3] + + # Test 10: Constant values should work regardless of allow_missing + result_false = converter._parse_col_def( + data=data, + table="test_table", + col_def=42, + table_mask=None, + extra_info=None, + allow_missing=False, + ) + result_true = converter._parse_col_def( + data=data, + table="test_table", + col_def=42, + table_mask=None, + extra_info=None, + allow_missing=True, + ) + assert list(result_false.iloc[:, 0]) == [42, 42, 42] + assert list(result_true.iloc[:, 0]) == [42, 42, 42] + + +def test_parse_col_def_with_allow_missing_and_table_mask(): + """Test _parse_col_def function with allow_missing and table_mask combinations""" + # Arrange + converter = TabularConverter(mapping_file=MAPPING_FILE) + data = TabularData(test_table=pd.DataFrame({"existing_col": [1, 2, 3, 4], "another_col": ["a", "b", "c", "d"]})) + table_mask = np.array([True, False, True, False]) # Select rows 0 and 2 + + # Test 1: Missing column with table_mask and allow_missing=True + result = converter._parse_col_def( + data=data, + table="test_table", + col_def="missing_col", + table_mask=table_mask, + extra_info=None, + allow_missing=True, + ) + assert len(result.columns) == 0 + assert len(result) == 2 # Should match filtered table length + + # Test 2: Existing column with table_mask and allow_missing=True + result = converter._parse_col_def( + data=data, + table="test_table", + col_def="existing_col", + table_mask=table_mask, + extra_info=None, + allow_missing=True, + ) + assert list(result.iloc[:, 0]) == [1, 3] # Should get filtered values + + # Test 3: Composite with missing columns, table_mask, and allow_missing=True + result = converter._parse_col_def( + data=data, + table="test_table", + col_def=["existing_col", "missing_col"], + table_mask=table_mask, + extra_info=None, + allow_missing=True, + ) + assert len(result.columns) == 1 # Only existing column + assert list(result["existing_col"]) == [1, 3] # Filtered values + + +def test_normalize_extra_col_def(): + """Test _normalize_extra_col_def method for handling duplicate columns""" + # Arrange + converter = TabularConverter(mapping_file=MAPPING_FILE) + + # Test 1: Regular list without optional_extra (should be unchanged) + col_def = ["ID", "Name", "GUID"] + result = converter._normalize_extra_col_def(col_def) + assert result == ["ID", "Name", "GUID"] + + # Test 2: Non-list input (should be unchanged) + col_def = "ID" + result = converter._normalize_extra_col_def(col_def) + assert result == "ID" + + # Test 3: List with optional_extra but no duplicates + col_def = ["ID", {"optional_extra": ["GUID", "StationID"]}] + result = converter._normalize_extra_col_def(col_def) + assert result == ["ID", {"optional_extra": ["GUID", "StationID"]}] + + # Test 4: List with duplicates - regular column should dominate + col_def = ["ID", "Name", {"optional_extra": ["ID", "GUID", "StationID"]}] + result = converter._normalize_extra_col_def(col_def) + expected = ["ID", "Name", {"optional_extra": ["GUID", "StationID"]}] + assert result == expected + + # Test 5: Multiple optional_extra sections with overlaps + col_def = ["ID", {"optional_extra": ["ID", "GUID"]}, {"optional_extra": ["Name", "StationID"]}] + result = converter._normalize_extra_col_def(col_def) + expected = ["ID", {"optional_extra": ["GUID"]}, {"optional_extra": ["Name", "StationID"]}] + assert result == expected + + # Test 6: All optional columns are duplicates (should remove optional_extra section) + col_def = ["ID", "Name", {"optional_extra": ["ID", "Name"]}] + result = converter._normalize_extra_col_def(col_def) + expected = ["ID", "Name"] + assert result == expected + + # Test 7: Empty optional_extra list (should be removed) + col_def = ["ID", {"optional_extra": []}] + result = converter._normalize_extra_col_def(col_def) + expected = ["ID"] + assert result == expected + + +def test_handle_extra_info_with_duplicates(): + """Test that _handle_extra_info correctly handles duplicates between regular and optional columns""" + # Arrange + converter = TabularConverter(mapping_file=MAPPING_FILE) + data = TabularData( + test_table=pd.DataFrame( + { + "ID": ["N001", "N002", "N003"], + "Name": ["Node1", "Node2", "Node3"], + "GUID": ["g1", "g2", "g3"], + "StationID": ["ST1", "ST2", "ST3"], + } + ) + ) + + # Column definition with duplicates (ID appears in both regular and optional_extra) + col_def = ["ID", "Name", {"optional_extra": ["ID", "GUID", "StationID"]}] + + uuids = np.array([100, 200, 300]) + extra_info = {} + + # Act + converter._handle_extra_info( + data=data, + table="test_table", + col_def=col_def, + uuids=uuids, + table_mask=None, + extra_info=extra_info, + ) + + # Assert + # ID should appear only once (from regular column, not duplicated from optional_extra) + assert 100 in extra_info + assert extra_info[100]["ID"] == "N001" + assert extra_info[100]["Name"] == "Node1" + assert extra_info[100]["GUID"] == "g1" + assert extra_info[100]["StationID"] == "ST1" + + # Check that we don't have duplicate ID columns in the result + result_keys = list(extra_info[100].keys()) + assert result_keys.count("ID") == 1, f"ID should appear only once, but got: {result_keys}" + + # Similar checks for other rows + assert extra_info[200]["ID"] == "N002" + assert extra_info[300]["ID"] == "N003" + + +def test_optional_extra_with_duplicates_integration(): + """Integration test to verify duplicate elimination works in a full conversion scenario""" + # Arrange + converter = TabularConverter(mapping_file=MAPPING_FILE) + + # Create test data with columns that will appear in both regular and optional_extra + data = TabularData( + test_table=pd.DataFrame( + { + "id": [1, 2, 3], + "name": ["Node1", "Node2", "Node3"], + "u_nom": [10.0, 10.0, 0.4], + "guid": ["g1", "g2", "g3"], + "station": ["ST1", "ST2", "ST3"], + } + ) + ) + + # Column definition that has ID in both places - regular should dominate + col_def = [ + "id", # Regular column + "name", # Regular column + {"optional_extra": ["id", "guid", "station"]}, # ID is duplicate, guid and station are new + ] + + extra_info = {} + uuids = np.array([100, 200, 300]) + + # Act + converter._handle_extra_info( + data=data, + table="test_table", + col_def=col_def, + uuids=uuids, + table_mask=None, + extra_info=extra_info, + ) + + # Assert - verify no duplicate columns and all expected columns are present + for uuid, expected_id in zip([100, 200, 300], [1, 2, 3]): + assert uuid in extra_info + extra_data = extra_info[uuid] + + # Should have all columns but ID should not be duplicated + expected_keys = {"id", "name", "guid", "station"} + assert set(extra_data.keys()) == expected_keys + + # Verify values + assert extra_data["id"] == expected_id + assert extra_data["name"] == f"Node{expected_id}" + assert extra_data["guid"] == f"g{expected_id}" + assert extra_data["station"] == f"ST{expected_id}"