-
Notifications
You must be signed in to change notification settings - Fork 12
Logic to handle optional_extra in Vision Excel converter input #344
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
60011ca
7f00482
5d78037
e375135
d357c8a
ef1ce32
826b266
a9bf00e
c9b3816
1e4b625
2f84ac9
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change | ||||||
|---|---|---|---|---|---|---|---|---|
|
|
@@ -337,11 +337,14 @@ | |||||||
| if extra_info is None: | ||||||||
| return | ||||||||
|
|
||||||||
| # Normalize col_def to handle deduplication when optional_extra contains columns also in regular extra | ||||||||
| normalized_col_def = self._normalize_extra_col_def(col_def) | ||||||||
|
|
||||||||
| extra = self._parse_col_def( | ||||||||
| data=data, | ||||||||
| table=table, | ||||||||
| table_mask=table_mask, | ||||||||
| col_def=col_def, | ||||||||
| col_def=normalized_col_def, | ||||||||
| extra_info=None, | ||||||||
| ).to_dict(orient="records") | ||||||||
| for i, xtr in zip(uuids, extra): | ||||||||
|
|
@@ -356,6 +359,55 @@ | |||||||
| else: | ||||||||
| extra_info[i] = xtr | ||||||||
|
|
||||||||
| def _normalize_extra_col_def(self, col_def: Any) -> Any: | ||||||||
|
Check failure on line 362 in src/power_grid_model_io/converters/tabular_converter.py
|
||||||||
| """ | ||||||||
| Normalize extra column definition to eliminate duplicates between regular columns and optional_extra. | ||||||||
| Regular columns take precedence over optional_extra columns. | ||||||||
|
|
||||||||
| Args: | ||||||||
| col_def: Column definition for extra info that may contain optional_extra sections | ||||||||
|
|
||||||||
| Returns: | ||||||||
| Normalized column definition with duplicates removed from optional_extra | ||||||||
| """ | ||||||||
| if not isinstance(col_def, list): | ||||||||
| return col_def | ||||||||
|
|
||||||||
| # Collect all non-optional_extra column names | ||||||||
| regular_columns = set() | ||||||||
| normalized_list = [] | ||||||||
|
|
||||||||
| for item in col_def: | ||||||||
| if isinstance(item, dict) and len(item) == 1 and "optional_extra" in item: | ||||||||
| # This is an optional_extra section - we'll process it later | ||||||||
| normalized_list.append(item) | ||||||||
| else: | ||||||||
| # This is a regular column | ||||||||
| if isinstance(item, str): | ||||||||
| regular_columns.add(item) | ||||||||
| normalized_list.append(item) | ||||||||
|
|
||||||||
| # Now process optional_extra sections and remove duplicates | ||||||||
| final_list = [] | ||||||||
| for item in normalized_list: | ||||||||
| if isinstance(item, dict) and len(item) == 1 and "optional_extra" in item: | ||||||||
| optional_cols = item["optional_extra"] | ||||||||
| if isinstance(optional_cols, list): | ||||||||
| # Filter out columns that are already in regular columns | ||||||||
| filtered_optional_cols = [ | ||||||||
| col for col in optional_cols if not isinstance(col, str) or col not in regular_columns | ||||||||
| ] | ||||||||
| # Only include the optional_extra section if it has remaining columns | ||||||||
| if filtered_optional_cols: | ||||||||
| final_list.append({"optional_extra": filtered_optional_cols}) | ||||||||
| else: | ||||||||
| # Keep non-list optional_extra as-is (shouldn't happen but be safe) | ||||||||
| final_list.append(item) | ||||||||
| else: | ||||||||
| final_list.append(item) | ||||||||
|
|
||||||||
| return final_list | ||||||||
|
|
||||||||
| @staticmethod | ||||||||
| def _merge_pgm_data(data: Dict[str, List[np.ndarray]]) -> Dict[str, np.ndarray]: | ||||||||
| """During the conversion, multiple numpy arrays can be produced for the same type of component. These arrays | ||||||||
|
|
@@ -396,6 +448,8 @@ | |||||||
| col_def: Any, | ||||||||
| table_mask: Optional[np.ndarray], | ||||||||
| extra_info: Optional[ExtraInfo], | ||||||||
| *, | ||||||||
| allow_missing: bool = False, | ||||||||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. please make a keyword arg
Suggested change
optionally, you can even demand that the other fields are keyword args as well (that's OK because it's a protected member function so we can change the interface without any issues)
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If it's a private function and we can change it at will. Does it really matter if we have
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. the reason is that it's very bugprone unless we thoroughly test this
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. |
||||||||
| ) -> pd.DataFrame: | ||||||||
| """Interpret the column definition and extract/convert/create the data as a pandas DataFrame. | ||||||||
|
|
||||||||
|
|
@@ -404,15 +458,27 @@ | |||||||
| table: str: | ||||||||
| col_def: Any: | ||||||||
| extra_info: Optional[ExtraInfo]: | ||||||||
| allow_missing: bool: If True, missing columns will return empty DataFrame instead of raising KeyError | ||||||||
|
|
||||||||
| Returns: | ||||||||
|
|
||||||||
| """ | ||||||||
| if isinstance(col_def, (int, float)): | ||||||||
| return self._parse_col_def_const(data=data, table=table, col_def=col_def, table_mask=table_mask) | ||||||||
| if isinstance(col_def, str): | ||||||||
| return self._parse_col_def_column_name(data=data, table=table, col_def=col_def, table_mask=table_mask) | ||||||||
| return self._parse_col_def_column_name( | ||||||||
| data=data, table=table, col_def=col_def, table_mask=table_mask, allow_missing=allow_missing | ||||||||
| ) | ||||||||
| if isinstance(col_def, dict): | ||||||||
| # Check if this is an optional_extra wrapper | ||||||||
| if len(col_def) == 1 and "optional_extra" in col_def: | ||||||||
| # Extract the list of optional columns and parse as composite with allow_missing=True | ||||||||
| optional_cols = col_def["optional_extra"] | ||||||||
| if not isinstance(optional_cols, list): | ||||||||
| raise TypeError(f"optional_extra value must be a list, got {type(optional_cols).__name__}") | ||||||||
| return self._parse_col_def_composite( | ||||||||
| data=data, table=table, col_def=optional_cols, table_mask=table_mask, allow_missing=True | ||||||||
| ) | ||||||||
|
Comment on lines
+479
to
+481
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is the only place where I see I don't think I've ever worked in this repo, so genuinely asking. I'd appreciate tech review over this :)
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. For now only |
||||||||
| return self._parse_col_def_filter( | ||||||||
| data=data, | ||||||||
| table=table, | ||||||||
|
|
@@ -421,7 +487,9 @@ | |||||||
| extra_info=extra_info, | ||||||||
| ) | ||||||||
| if isinstance(col_def, list): | ||||||||
| return self._parse_col_def_composite(data=data, table=table, col_def=col_def, table_mask=table_mask) | ||||||||
| return self._parse_col_def_composite( | ||||||||
| data=data, table=table, col_def=col_def, table_mask=table_mask, allow_missing=allow_missing | ||||||||
| ) | ||||||||
| raise TypeError(f"Invalid column definition: {col_def}") | ||||||||
|
|
||||||||
| @staticmethod | ||||||||
|
|
@@ -454,6 +522,7 @@ | |||||||
| table: str, | ||||||||
| col_def: str, | ||||||||
| table_mask: Optional[np.ndarray] = None, | ||||||||
| allow_missing: bool = False, | ||||||||
| ) -> pd.DataFrame: | ||||||||
| """Extract a column from the data. If the column doesn't exist, check if the col_def is a special float value, | ||||||||
| like 'inf'. If that's the case, create a single column pandas DataFrame containing the const value. | ||||||||
|
|
@@ -462,6 +531,7 @@ | |||||||
| data: TabularData: | ||||||||
| table: str: | ||||||||
| col_def: str: | ||||||||
| allow_missing: bool: If True, return empty DataFrame when column is missing instead of raising KeyError | ||||||||
|
|
||||||||
| Returns: | ||||||||
|
|
||||||||
|
|
@@ -484,10 +554,17 @@ | |||||||
|
|
||||||||
| try: # Maybe it is not a column name, but a float value like 'inf', let's try to convert the string to a float | ||||||||
| const_value = float(col_def) | ||||||||
| except ValueError: | ||||||||
| # pylint: disable=raise-missing-from | ||||||||
| except ValueError as e: | ||||||||
| if allow_missing: | ||||||||
| # Return empty DataFrame with correct number of rows when column is optional and missing | ||||||||
| self._log.debug( | ||||||||
| "Optional column not found", | ||||||||
| table=table, | ||||||||
| columns=" or ".join(f"'{col_name}'" for col_name in columns), | ||||||||
| ) | ||||||||
| return pd.DataFrame(index=table_data.index) | ||||||||
| columns_str = " and ".join(f"'{col_name}'" for col_name in columns) | ||||||||
| raise KeyError(f"Could not find column {columns_str} on table '{table}'") | ||||||||
| raise KeyError(f"Could not find column {columns_str} on table '{table}'") from e | ||||||||
|
|
||||||||
| return self._parse_col_def_const(data=data, table=table, col_def=const_value, table_mask=table_mask) | ||||||||
|
|
||||||||
|
|
@@ -778,13 +855,15 @@ | |||||||
| table: str, | ||||||||
| col_def: list, | ||||||||
| table_mask: Optional[np.ndarray], | ||||||||
| allow_missing: bool = False, | ||||||||
| ) -> pd.DataFrame: | ||||||||
| """Select multiple columns (each is created from a column definition) and return them as a new DataFrame. | ||||||||
|
|
||||||||
| Args: | ||||||||
| data: TabularData: | ||||||||
| table: str: | ||||||||
| col_def: list: | ||||||||
| allow_missing: bool: If True, skip missing columns instead of raising errors | ||||||||
|
|
||||||||
| Returns: | ||||||||
|
|
||||||||
|
|
@@ -797,10 +876,19 @@ | |||||||
| col_def=sub_def, | ||||||||
| table_mask=table_mask, | ||||||||
| extra_info=None, | ||||||||
| allow_missing=allow_missing, | ||||||||
| ) | ||||||||
| for sub_def in col_def | ||||||||
| ] | ||||||||
| return pd.concat(columns, axis=1) | ||||||||
| # Filter out DataFrames with no columns (from missing optional columns) | ||||||||
| non_empty_columns = [col for col in columns if len(col.columns) > 0] | ||||||||
| if not non_empty_columns: | ||||||||
| # If all columns are missing, return an empty DataFrame with the correct number of rows | ||||||||
| table_data = data[table] | ||||||||
| if table_mask is not None: | ||||||||
| table_data = table_data[table_mask] | ||||||||
| return pd.DataFrame(index=table_data.index) | ||||||||
| return pd.concat(non_empty_columns, axis=1) | ||||||||
|
|
||||||||
| def _get_id(self, table: str, key: Mapping[str, int], name: Optional[str]) -> int: | ||||||||
| """ | ||||||||
|
|
||||||||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,24 @@ | ||
| # SPDX-FileCopyrightText: Contributors to the Power Grid Model project <powergridmodel@lfenergy.org> | ||
| # | ||
| # SPDX-License-Identifier: MPL-2.0 | ||
| --- | ||
| # Test mapping file for optional_extra feature | ||
| grid: | ||
| nodes: | ||
| node: | ||
| id: | ||
| auto_id: | ||
| key: node_id | ||
| u_rated: voltage | ||
| extra: | ||
| - ID | ||
| - Name | ||
| - optional_extra: | ||
| - GUID | ||
| - StationID | ||
|
|
||
| units: | ||
| V: | ||
| kV: 1000.0 | ||
|
|
||
| substitutions: {} |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,3 @@ | ||
| SPDX-FileCopyrightText: Contributors to the Power Grid Model project <powergridmodel@lfenergy.org> | ||
|
|
||
| SPDX-License-Identifier: MPL-2.0 |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,30 @@ | ||
| # SPDX-FileCopyrightText: Contributors to the Power Grid Model project <powergridmodel@lfenergy.org> | ||
| # | ||
| # SPDX-License-Identifier: MPL-2.0 | ||
| --- | ||
| # Test mapping file for optional_extra feature with Vision Excel format | ||
| id_reference: | ||
| nodes_table: Nodes | ||
| number: Number | ||
| node_number: Node.Number | ||
| sub_number: Subnumber | ||
|
|
||
| grid: | ||
| Nodes: | ||
| node: | ||
| id: | ||
| auto_id: | ||
| key: Number | ||
| u_rated: Unom | ||
| extra: | ||
| - ID | ||
| - Name | ||
| - optional_extra: | ||
| - GUID | ||
| - StationID | ||
|
|
||
| units: | ||
| V: | ||
| kV: 1000.0 | ||
|
|
||
| substitutions: {} |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,3 @@ | ||
| SPDX-FileCopyrightText: Contributors to the Power Grid Model project <powergridmodel@lfenergy.org> | ||
|
|
||
| SPDX-License-Identifier: MPL-2.0 |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,3 @@ | ||
| SPDX-FileCopyrightText: Contributors to the Power Grid Model project <powergridmodel@lfenergy.org> | ||
|
|
||
| SPDX-License-Identifier: MPL-2.0 |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,3 @@ | ||
| SPDX-FileCopyrightText: Contributors to the Power Grid Model project <powergridmodel@lfenergy.org> | ||
|
|
||
| SPDX-License-Identifier: MPL-2.0 |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
what happens if an element is specified in both the required and optional?
I believe that the default should be that required precedes optional, so maybe we need to add an explicit test case for this.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
2f84ac9