Skip to content

Commit

Permalink
feat: stabilize Column (#981)
Browse files Browse the repository at this point in the history
Closes partially #754
Closes partially #977

### Summary of Changes

- Improve documentation for all methods of `Column`.
- Add the option to specify the column type when calling the
constructor. If omitted, it is still inferred from the data.
  • Loading branch information
lars-reimann authored Jan 14, 2025
1 parent ca1ce3d commit 38dc89c
Show file tree
Hide file tree
Showing 154 changed files with 1,278 additions and 947 deletions.
6 changes: 6 additions & 0 deletions src/safeds/_validation/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,11 @@

if TYPE_CHECKING:
from ._check_bounds_module import _check_bounds, _ClosedBound, _OpenBound
from ._check_column_has_no_missing_values import _check_column_has_no_missing_values
from ._check_column_is_numeric_module import _check_column_is_numeric, _check_columns_are_numeric
from ._check_columns_dont_exist_module import _check_columns_dont_exist
from ._check_columns_exist_module import _check_columns_exist
from ._check_indices_module import _check_indices
from ._check_row_counts_are_equal_module import _check_row_counts_are_equal
from ._check_schema_module import _check_schema
from ._normalize_and_check_file_path_module import _normalize_and_check_file_path
Expand All @@ -19,10 +21,12 @@
"_check_bounds": "._check_bounds_module:_check_bounds",
"_ClosedBound": "._check_bounds_module:_ClosedBound",
"_OpenBound": "._check_bounds_module:_OpenBound",
"_check_column_has_no_missing_values": "._check_column_has_no_missing_values:_check_column_has_no_missing_values",
"_check_column_is_numeric": "._check_column_is_numeric_module:_check_column_is_numeric",
"_check_columns_are_numeric": "._check_column_is_numeric_module:_check_columns_are_numeric",
"_check_columns_dont_exist": "._check_columns_dont_exist_module:_check_columns_dont_exist",
"_check_columns_exist": "._check_columns_exist_module:_check_columns_exist",
"_check_indices": "._check_indices_module:_check_indices",
"_check_row_counts_are_equal": "._check_row_counts_are_equal_module:_check_row_counts_are_equal",
"_check_schema": "._check_schema_module:_check_schema",
"_normalize_and_check_file_path": "._normalize_and_check_file_path_module:_normalize_and_check_file_path",
Expand All @@ -33,10 +37,12 @@
"_ClosedBound",
"_OpenBound",
"_check_bounds",
"_check_column_has_no_missing_values",
"_check_column_is_numeric",
"_check_columns_are_numeric",
"_check_columns_dont_exist",
"_check_columns_exist",
"_check_indices",
"_check_row_counts_are_equal",
"_check_schema",
"_normalize_and_check_file_path",
Expand Down
46 changes: 46 additions & 0 deletions src/safeds/_validation/_check_column_has_no_missing_values.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
from __future__ import annotations

from typing import TYPE_CHECKING

from safeds.exceptions import MissingValuesError

if TYPE_CHECKING:
from safeds.data.tabular.containers import Column


def _check_column_has_no_missing_values(
column: Column,
*,
other_columns: list[Column] | None = None,
operation: str = "do an operation",
) -> None:
"""
Check if the column has no missing values.
Parameters
----------
column:
The column to check.
other_columns:
Other columns to check. This provides better error messages than checking each column individually.
operation:
The operation that is performed on the column. This is used in the error message.
Raises
------
MissingValuesError:
If a has missing values.
"""
if other_columns is None: # pragma: no cover
other_columns = []

columns = [column, *other_columns]
missing_values_columns = [column.name for column in columns if column._series.has_nulls()]

if missing_values_columns:
message = _build_error_message(missing_values_columns, operation)
raise MissingValuesError(message) from None


def _build_error_message(missing_values_names: list[str], operation: str) -> str:
return f"Tried to {operation} on columns with missing values {missing_values_names}."
17 changes: 13 additions & 4 deletions src/safeds/_validation/_check_column_is_numeric_module.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,25 +16,34 @@
def _check_column_is_numeric(
column: Column,
*,
other_columns: list[Column] | None = None,
operation: str = "do a numeric operation",
) -> None:
"""
Check whether the column is numeric, and raise an error if it is not.
Check whether a column is numeric, and raise an error if it is not.
Parameters
----------
column:
The column to check.
other_columns:
Other columns to check. This provides better error messages than checking each column individually.
operation:
The operation that is performed on the column. This is used in the error message.
Raises
------
ColumnTypeError
If the column is not numeric.
If a column is not numeric.
"""
if not column.type.is_numeric:
message = _build_error_message([column.name], operation)
if other_columns is None:
other_columns = []

columns = [column, *other_columns]
non_numeric_names = [col.name for col in columns if not col.type.is_numeric]

if non_numeric_names:
message = _build_error_message(non_numeric_names, operation)
raise ColumnTypeError(message) from None


Expand Down
8 changes: 4 additions & 4 deletions src/safeds/_validation/_check_columns_exist_module.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,15 +49,15 @@ def _check_columns_exist(table_or_schema: Table | Schema, requested_names: str |


def _build_error_message(schema: Schema, unknown_names: list[str]) -> str:
message = "Could not find column(s):"
result = "Could not find column(s):"

for unknown_name in unknown_names:
similar_columns = _get_similar_column_names(schema, unknown_name)
message += f"\n - '{unknown_name}'"
result += f"\n - '{unknown_name}'"
if similar_columns:
message += f": Did you mean one of {similar_columns}?"
result += f": Did you mean one of {similar_columns}?"

return message
return result


def _get_similar_column_names(schema: Schema, name: str) -> list[str]:
Expand Down
47 changes: 47 additions & 0 deletions src/safeds/_validation/_check_indices_module.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
from __future__ import annotations

from typing import TYPE_CHECKING

from safeds.exceptions import IndexOutOfBoundsError

if TYPE_CHECKING:
from collections.abc import Sequence


def _check_indices(
sequence: Sequence,
indices: int | list[int],
*,
allow_negative: bool = True,
) -> None:
"""
Check if indices are valid for the provided sequence.
Parameters
----------
sequence:
The sequence to check.
indices:
The indices to check.
allow_negative:
If negative indices are allowed.
Raises
------
IndexOutOfBoundsError:
If the index is out of bounds.
"""
if isinstance(indices, int):
indices = [indices]

min_legal = -len(sequence) if allow_negative else 0
max_legal = len(sequence) - 1

illegal_indices = [index for index in indices if not min_legal <= index <= max_legal]
if illegal_indices:
message = _build_error_message(illegal_indices, min_legal, max_legal)
raise IndexOutOfBoundsError(message) from None


def _build_error_message(illegal_indices: list[int], min_legal: int, max_legal: int) -> str:
return f"The indices {illegal_indices} are outside the legal interval [{min_legal}, {max_legal}]."
14 changes: 7 additions & 7 deletions src/safeds/_validation/_check_schema_module.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,15 +108,15 @@ def _build_error_message_for_additional_columns(additional_columns: list[str]) -


def _build_error_message_for_columns_in_wrong_order(expected: list[str], actual: list[str]) -> str:
message = "The columns are in the wrong order:\n"
message += f" Expected: {expected}\n"
message += f" Actual: {actual}"
return message
result = "The columns are in the wrong order:\n"
result += f" Expected: {expected}\n"
result += f" Actual: {actual}"
return result


def _build_error_message_for_column_types(mismatched_types: list[tuple[str, pl.DataType, pl.DataType]]) -> str:
message = "The following columns have the wrong type:"
result = "The following columns have the wrong type:"
for column_name, expected_type, actual_type in mismatched_types:
message += f"\n - '{column_name}': Expected '{expected_type}', but got '{actual_type}'."
result += f"\n - '{column_name}': Expected '{expected_type}', but got '{actual_type}'."

return message
return result
2 changes: 1 addition & 1 deletion src/safeds/data/image/containers/_empty_image_list.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,7 @@ def size_count(self) -> int:
return 0

def get_image(self, index: int) -> Image:
raise IndexOutOfBoundsError(index)
raise IndexOutOfBoundsError(f"There is no element at index '{index}'.")

def index(self, _image: Image) -> list[int]:
return []
Expand Down
4 changes: 2 additions & 2 deletions src/safeds/data/image/containers/_multi_size_image_list.py
Original file line number Diff line number Diff line change
Expand Up @@ -218,7 +218,7 @@ def size_count(self) -> int:

def get_image(self, index: int) -> Image:
if index not in self._indices_to_image_size_dict:
raise IndexOutOfBoundsError(index)
raise IndexOutOfBoundsError(f"There is no element at index '{index}'.")
return self._image_list_dict[self._indices_to_image_size_dict[index]].get_image(index)

def index(self, image: Image) -> list[int]:
Expand Down Expand Up @@ -282,7 +282,7 @@ def to_images(self, indices: list[int] | None = None) -> list[Image]:
if index not in self._indices_to_image_size_dict:
wrong_indices.append(index)
if len(wrong_indices) != 0:
raise IndexOutOfBoundsError(wrong_indices)
raise IndexOutOfBoundsError(f"There are no elements at indices {wrong_indices}.")
images = []
for index in indices:
images.append(self._image_list_dict[self._indices_to_image_size_dict[index]].get_image(index))
Expand Down
6 changes: 3 additions & 3 deletions src/safeds/data/image/containers/_single_size_image_list.py
Original file line number Diff line number Diff line change
Expand Up @@ -200,7 +200,7 @@ def _get_batch(self, batch_number: int, batch_size: int | None = None) -> Tensor
if batch_size is None:
batch_size = self._batch_size
if batch_size * batch_number >= len(self):
raise IndexOutOfBoundsError(batch_size * batch_number)
raise IndexOutOfBoundsError(f"There is no element at index '{batch_size * batch_number}'.")
max_index = batch_size * (batch_number + 1) if batch_size * (batch_number + 1) < len(self) else len(self)
return (
self._tensor[
Expand Down Expand Up @@ -311,7 +311,7 @@ def size_count(self) -> int:

def get_image(self, index: int) -> Image:
if index not in self._indices_to_tensor_positions:
raise IndexOutOfBoundsError(index)
raise IndexOutOfBoundsError(f"There is no element at index '{index}'.")
return Image(self._tensor[self._indices_to_tensor_positions[index]])

def index(self, image: Image) -> list[int]:
Expand Down Expand Up @@ -433,7 +433,7 @@ def to_images(self, indices: list[int] | None = None) -> list[Image]:
if index not in self._indices_to_tensor_positions:
wrong_indices.append(index)
if len(wrong_indices) != 0:
raise IndexOutOfBoundsError(wrong_indices)
raise IndexOutOfBoundsError(f"There are no elements at indices {wrong_indices}.")
return [Image(self._tensor[self._indices_to_tensor_positions[index]]) for index in indices]

def change_channel(self, channel: int) -> ImageList:
Expand Down
Loading

0 comments on commit 38dc89c

Please sign in to comment.