Skip to content

Commit

Permalink
feat: finalize Row class
Browse files Browse the repository at this point in the history
  • Loading branch information
lars-reimann committed Jan 13, 2025
1 parent 212c0ae commit faa7904
Show file tree
Hide file tree
Showing 107 changed files with 693 additions and 399 deletions.
2 changes: 1 addition & 1 deletion src/safeds/data/tabular/containers/_lazy_vectorized_row.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ class _LazyVectorizedRow(Row):
up operations on the row.
Moreover, accessing a column only builds an expression that will be evaluated when needed. This is useful when later
operations remove more rows or columns, so we don't do unnecessary work upfront.
operations remove rows or columns, so we don't do unnecessary work upfront.
"""

# ------------------------------------------------------------------------------------------------------------------
Expand Down
35 changes: 18 additions & 17 deletions src/safeds/data/tabular/containers/_row.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,27 +2,29 @@

from abc import ABC, abstractmethod
from collections.abc import Iterator, Mapping
from typing import TYPE_CHECKING, Any
from typing import TYPE_CHECKING

from ._cell import Cell

if TYPE_CHECKING:
from safeds.data.tabular.typing import ColumnType, Schema

from ._cell import Cell


class Row(ABC, Mapping[str, Any]):
class Row(ABC, Mapping[str, Cell]):
"""
A one-dimensional collection of named, heterogeneous values.
This class cannot be instantiated directly. It is only used for arguments of callbacks.
You only need to interact with this class in callbacks passed to higher-order functions.
"""

# ------------------------------------------------------------------------------------------------------------------
# Dunder methods
# ------------------------------------------------------------------------------------------------------------------

def __contains__(self, name: Any) -> bool:
return self.has_column(name)
def __contains__(self, key: object, /) -> bool:
if not isinstance(key, str):
return False
return self.has_column(key)

@abstractmethod
def __eq__(self, other: object) -> bool: ...
Expand All @@ -33,7 +35,7 @@ def __getitem__(self, name: str) -> Cell:
@abstractmethod
def __hash__(self) -> int: ...

def __iter__(self) -> Iterator[Any]:
def __iter__(self) -> Iterator[str]:
return iter(self.column_names)

def __len__(self) -> int:
Expand All @@ -48,18 +50,18 @@ def __sizeof__(self) -> int: ...

@property
@abstractmethod
def column_names(self) -> list[str]:
"""The names of the columns in the row."""
def column_count(self) -> int:
"""The number of columns."""

@property
@abstractmethod
def column_count(self) -> int:
"""The number of columns in the row."""
def column_names(self) -> list[str]:
"""The names of the columns."""

@property
@abstractmethod
def schema(self) -> Schema:
"""The schema of the row."""
"""The schema, which is a mapping from column names to their types."""

# ------------------------------------------------------------------------------------------------------------------
# Column operations
Expand Down Expand Up @@ -98,7 +100,6 @@ def get_cell(self, name: str) -> Cell:
| 2 | 4 |
+------+------+
>>> table.remove_rows(lambda row: row["col1"] == 1)
+------+------+
| col1 | col2 |
Expand All @@ -112,7 +113,7 @@ def get_cell(self, name: str) -> Cell:
@abstractmethod
def get_column_type(self, name: str) -> ColumnType:
"""
Get the type of the specified column.
Get the type of a column. This is equivalent to using the `[]` operator (indexed access).
Parameters
----------
Expand All @@ -127,13 +128,13 @@ def get_column_type(self, name: str) -> ColumnType:
Raises
------
ColumnNotFoundError
If the column name does not exist.
If the column does not exist.
"""

@abstractmethod
def has_column(self, name: str) -> bool:
"""
Check if the row has a column with the specified name.
Check if the row has a column with a specific name. This is equivalent to using the `in` operator.
Parameters
----------
Expand Down
24 changes: 12 additions & 12 deletions src/safeds/data/tabular/containers/_table.py
Original file line number Diff line number Diff line change
Expand Up @@ -393,41 +393,41 @@ def _data_frame(self) -> pl.DataFrame:
return self.__data_frame_cache

@property
def column_names(self) -> list[str]:
def column_count(self) -> int:
"""
The names of the columns in the table.
The number of columns.
**Note:** This operation must compute the schema of the table, which can be expensive.
Examples
--------
>>> from safeds.data.tabular.containers import Table
>>> table = Table({"a": [1, 2, 3], "b": [4, 5, 6]})
>>> table.column_names
['a', 'b']
>>> table.column_count
2
"""
return self.schema.column_names
return len(self.column_names)

@property
def column_count(self) -> int:
def column_names(self) -> list[str]:
"""
The number of columns in the table.
The names of the columns in the table.
**Note:** This operation must compute the schema of the table, which can be expensive.
Examples
--------
>>> from safeds.data.tabular.containers import Table
>>> table = Table({"a": [1, 2, 3], "b": [4, 5, 6]})
>>> table.column_count
2
>>> table.column_names
['a', 'b']
"""
return len(self.column_names)
return self.schema.column_names

@property
def row_count(self) -> int:
"""
The number of rows in the table.
The number of rows.
**Note:** This operation must fully load the data into memory, which can be expensive.
Expand Down Expand Up @@ -458,7 +458,7 @@ def plot(self) -> TablePlotter:
@property
def schema(self) -> Schema:
"""
The schema of the table.
The schema, which is a mapping from column names to their types.
Examples
--------
Expand Down
10 changes: 6 additions & 4 deletions src/safeds/data/tabular/typing/_schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

import sys
from collections.abc import Iterator, Mapping
from typing import TYPE_CHECKING, Any
from typing import TYPE_CHECKING

from safeds._utils import _structural_hash
from safeds._validation import _check_columns_exist
Expand Down Expand Up @@ -39,8 +39,10 @@ def __init__(self, schema: Mapping[str, ColumnType]) -> None:
check_dtypes=False,
)

def __contains__(self, name: Any) -> bool:
return self.has_column(name)
def __contains__(self, key: object, /) -> bool:
if not isinstance(key, str):
return False
return self.has_column(key)

def __eq__(self, other: object) -> bool:
if not isinstance(other, Schema):
Expand Down Expand Up @@ -151,7 +153,7 @@ def get_column_type(self, name: str) -> ColumnType:

def has_column(self, name: str) -> bool:
"""
Check if the schema has a column with a specific name. This is equivalent to using the `in` operator.
Check if the schema has a column with a specific name. This is equivalent to using the `in` operator.
Parameters
----------
Expand Down
65 changes: 46 additions & 19 deletions tests/helpers/_assertions.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from polars.testing import assert_frame_equal

from safeds.data.labeled.containers import TabularDataset
from safeds.data.tabular.containers import Cell, Column, Table
from safeds.data.tabular.containers import Cell, Column, Row, Table


def assert_tables_are_equal(
Expand Down Expand Up @@ -62,44 +62,71 @@ def assert_that_tabular_datasets_are_equal(table1: TabularDataset, table2: Tabul


def assert_cell_operation_works(
input_value: Any,
value: Any,
transformer: Callable[[Cell], Cell],
expected_value: Any,
expected: Any,
) -> None:
"""
Assert that a cell operation works as expected.
Parameters
----------
input_value:
value:
The value in the input cell.
transformer:
The transformer to apply to the cells.
expected_value:
expected:
The expected value of the transformed cell.
"""
column = Column("A", [input_value])
column = Column("A", [value])
transformed_column = column.transform(transformer)
assert transformed_column == Column("A", [expected_value]), f"Expected: {expected_value}\nGot: {transformed_column}"
actual = transformed_column[0]
assert actual == expected


def assert_row_operation_works(
input_value: Any,
transformer: Callable[[Table], Table],
expected_value: Any,
table: Table,
computer: Callable[[Row], Cell],
expected: list[Any],
) -> None:
"""
Assert that a row operation works as expected.
Parameters
----------
input_value:
The value in the input row.
transformer:
The transformer to apply to the rows.
expected_value:
The expected value of the transformed row.
table:
The input table.
computer:
The function that computes the new column.
expected:
The expected values of the computed column.
"""
column_name = _find_free_column_name(table, "computed")

new_table = table.add_computed_column(column_name, computer)
actual = list(new_table.get_column(column_name))
assert actual == expected


def _find_free_column_name(table: Table, prefix: str) -> str:
"""
table = Table(input_value)
transformed_table = transformer(table)
assert transformed_table == Table(expected_value), f"Expected: {expected_value}\nGot: {transformed_table}"
Find a free column name in the table.
Parameters
----------
table:
The table to search for a free column name.
prefix:
The prefix to use for the column name.
Returns
-------
free_name:
A free column name.
"""
column_name = prefix

while column_name in table.column_names:
column_name += "_"

return column_name
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
# serializer version: 1
# name: TestContract.test_should_return_same_hash_in_different_processes[empty]
1789859531466043636
# ---
# name: TestContract.test_should_return_same_hash_in_different_processes[no rows]
585695607399955642
# ---
# name: TestContract.test_should_return_same_hash_in_different_processes[with data]
909875695937937648
# ---
Original file line number Diff line number Diff line change
Expand Up @@ -8,13 +8,15 @@
("table", "expected"),
[
(Table({}), 0),
(Table({"A": [1, 2, 3]}), 1),
(Table({"col1": []}), 1),
(Table({"col1": [1], "col2": [1]}), 2),
],
ids=[
"empty",
"non-empty",
"no rows",
"with data",
],
)
def test_should_return_the_number_of_columns(table: Table, expected: int) -> None:
row = _LazyVectorizedRow(table=table)
def test_should_return_number_of_columns(table: Table, expected: int) -> None:
row = _LazyVectorizedRow(table)
assert row.column_count == expected
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
import pytest

from safeds.data.tabular.containers import Table
from safeds.data.tabular.containers._lazy_vectorized_row import _LazyVectorizedRow


@pytest.mark.parametrize(
("table", "expected"),
[
(Table({}), []),
(Table({"col1": []}), ["col1"]),
(Table({"col1": [1], "col2": [1]}), ["col1", "col2"]),
],
ids=[
"empty",
"no rows",
"with data",
],
)
def test_should_return_column_names(table: Table, expected: list[str]) -> None:
row = _LazyVectorizedRow(table)
assert row.column_names == expected
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
import pytest

from safeds.data.tabular.containers import Table
from safeds.data.tabular.containers._lazy_vectorized_row import _LazyVectorizedRow


@pytest.mark.parametrize(
("table", "column", "expected"),
[
(Table({}), "C", False),
(Table({"A": []}), "A", True),
(Table({"A": []}), "B", False),
(Table({"A": []}), 1, False),
],
ids=[
"empty",
"has column",
"doesn't have column",
"key is not string",
],
)
def test_should_return_if_column_is_in_row(table: Table, column: str, expected: bool) -> None:
row = _LazyVectorizedRow(table)
assert (column in row) == expected
Loading

0 comments on commit faa7904

Please sign in to comment.