Skip to content

Commit 8e781f9

Browse files
authored
feat: major API redesign (WIP) (#752)
Closes #694 Closes #699 Closes #714 Closes #748 ### Summary of Changes * Replace old implementation of tabular containers * New, more efficient implementation of metrics * Standalone package for metrics * New regression metrics * Abstract base class for classifiers & regressors * Introspection methods to get information about features and target of supervised models * Rename `LogisticRegressionClassifier` to `LogisticClassifier` (shorter + does not show up when searching for regression) * Rename `LinearRegressionRegressor` to `LinearRegressor` (shorter) * Rename `SupportVectorMachineClassifier` to `SupportVectorClassifier` (a little less precise, but still unambiguous and shorter) * Rename `SupportVectorMachineRegressor` to `SupportVectorRegressor` (ditto)
1 parent 0e5a54b commit 8e781f9

File tree

163 files changed

+7217
-15007
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

163 files changed

+7217
-15007
lines changed

benchmarks/metrics/__init__.py

Whitespace-only changes.

benchmarks/metrics/classification.py

Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,71 @@
1+
from __future__ import annotations
2+
3+
from timeit import timeit
4+
from typing import TYPE_CHECKING
5+
6+
import polars as pl
7+
8+
from benchmarks.table.utils import create_synthetic_table
9+
from safeds.data.tabular.containers import Table
10+
from safeds.ml.metrics import ClassificationMetrics
11+
12+
13+
REPETITIONS = 10
14+
15+
16+
def _run_accuracy() -> None:
17+
ClassificationMetrics.accuracy(table.get_column("predicted"), table.get_column("expected"))
18+
19+
20+
def _run_f1_score() -> None:
21+
ClassificationMetrics.f1_score(table.get_column("predicted"), table.get_column("expected"), 1)
22+
23+
24+
def _run_precision() -> None:
25+
ClassificationMetrics.precision(table.get_column("predicted"), table.get_column("expected"), 1)
26+
27+
28+
def _run_recall() -> None:
29+
ClassificationMetrics.recall(table.get_column("predicted"), table.get_column("expected"), 1)
30+
31+
32+
if __name__ == "__main__":
33+
# Create a synthetic Table
34+
table = (
35+
create_synthetic_table(10000, 2)
36+
.rename_column("column_0", "predicted")
37+
.rename_column("column_1", "expected")
38+
)
39+
40+
# Run the benchmarks
41+
timings: dict[str, float] = {
42+
"accuracy": timeit(
43+
_run_accuracy,
44+
number=REPETITIONS,
45+
),
46+
"f1_score": timeit(
47+
_run_f1_score,
48+
number=REPETITIONS,
49+
),
50+
"precision": timeit(
51+
_run_precision,
52+
number=REPETITIONS,
53+
),
54+
"recall": timeit(
55+
_run_recall,
56+
number=REPETITIONS,
57+
),
58+
}
59+
60+
# Print the timings
61+
with pl.Config(
62+
tbl_rows=-1,
63+
):
64+
print(
65+
Table(
66+
{
67+
"method": list(timings.keys()),
68+
"timing": list(timings.values()),
69+
}
70+
)
71+
)

benchmarks/table/column_operations_polars.py renamed to benchmarks/table/column_operations.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11
from timeit import timeit
22

3-
from safeds.data.tabular.containers import ExperimentalTable
3+
from safeds.data.tabular.containers import Table
44

5-
from benchmarks.table.utils import create_synthetic_table_polars
5+
from benchmarks.table.utils import create_synthetic_table
66

77
REPETITIONS = 10
88

@@ -21,7 +21,7 @@ def _run_summarize_statistics() -> None:
2121

2222
if __name__ == "__main__":
2323
# Create a synthetic Table
24-
table = create_synthetic_table_polars(100, 5000)
24+
table = create_synthetic_table(100, 5000)
2525

2626
# Run the benchmarks
2727
timings: dict[str, float] = {
@@ -41,7 +41,7 @@ def _run_summarize_statistics() -> None:
4141

4242
# Print the timings
4343
print(
44-
ExperimentalTable(
44+
Table(
4545
{
4646
"method": list(timings.keys()),
4747
"timing": list(timings.values()),

benchmarks/table/row_operations.py

Lines changed: 38 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -1,54 +1,58 @@
11
from timeit import timeit
22

3+
import polars as pl
4+
35
from safeds.data.tabular.containers import Table
46

57
from benchmarks.table.utils import create_synthetic_table
68

79
REPETITIONS = 10
810

911

10-
def _run_group_rows() -> None:
11-
table.group_rows(lambda row: row.get_value("column_0") % 2 == 0)
12-
13-
1412
def _run_remove_duplicate_rows() -> None:
15-
table.remove_duplicate_rows()
13+
table.remove_duplicate_rows()._lazy_frame.collect()
1614

1715

1816
def _run_remove_rows_with_missing_values() -> None:
19-
table.remove_rows_with_missing_values()
17+
table.remove_rows_with_missing_values()._lazy_frame.collect()
2018

2119

2220
def _run_remove_rows_with_outliers() -> None:
2321
table.remove_rows_with_outliers()
2422

2523

2624
def _run_remove_rows() -> None:
27-
table.remove_rows(lambda row: row.get_value("column_0") % 2 == 0)
25+
table.remove_rows(lambda row: row.get_value("column_0") % 2 == 0)._lazy_frame.collect()
26+
27+
28+
def _run_remove_rows_by_column() -> None:
29+
table.remove_rows_by_column("column_0", lambda cell: cell % 2 == 0)._lazy_frame.collect()
2830

2931

3032
def _run_shuffle_rows() -> None:
31-
table.shuffle_rows()
33+
table.shuffle_rows()._lazy_frame.collect()
3234

3335

3436
def _run_slice_rows() -> None:
35-
table.slice_rows(end=table.number_of_rows // 2)
37+
table.slice_rows(length=table.number_of_rows // 2)._lazy_frame.collect()
3638

3739

3840
def _run_sort_rows() -> None:
39-
table.sort_rows(lambda row1, row2: row1.get_value("column_0") - row2.get_value("column_0"))
41+
table.sort_rows(lambda row: row.get_value("column_0"))._lazy_frame.collect()
4042

4143

42-
def _run_split_rows() -> None:
43-
table.split_rows(0.5)
44+
def _run_sort_rows_by_column() -> None:
45+
table.sort_rows_by_column("column_0")._lazy_frame.collect()
4446

4547

46-
def _run_to_rows() -> None:
47-
table.to_rows()
48+
def _run_split_rows() -> None:
49+
table_1, table_2 = table.split_rows(0.5)
50+
table_1._lazy_frame.collect()
51+
table_2._lazy_frame.collect()
4852

4953

5054
def _run_transform_column() -> None:
51-
table.transform_column("column_0", lambda row: row.get_value("column_0") * 2)
55+
table.transform_column("column_0", lambda value: value * 2)._lazy_frame.collect()
5256

5357

5458
if __name__ == "__main__":
@@ -57,10 +61,6 @@ def _run_transform_column() -> None:
5761

5862
# Run the benchmarks
5963
timings: dict[str, float] = {
60-
"group_rows": timeit(
61-
_run_group_rows,
62-
number=REPETITIONS,
63-
),
6464
"remove_duplicate_rows": timeit(
6565
_run_remove_duplicate_rows,
6666
number=REPETITIONS,
@@ -77,6 +77,10 @@ def _run_transform_column() -> None:
7777
_run_remove_rows,
7878
number=REPETITIONS,
7979
),
80+
"remove_rows_by_column": timeit(
81+
_run_remove_rows_by_column,
82+
number=REPETITIONS,
83+
),
8084
"shuffle_rows": timeit(
8185
_run_shuffle_rows,
8286
number=REPETITIONS,
@@ -89,26 +93,29 @@ def _run_transform_column() -> None:
8993
_run_sort_rows,
9094
number=REPETITIONS,
9195
),
92-
"split_rows": timeit(
93-
_run_split_rows,
96+
"sort_rows_by_column": timeit(
97+
_run_sort_rows_by_column,
9498
number=REPETITIONS,
9599
),
96-
"to_rows": timeit(
97-
_run_to_rows,
100+
"split_rows": timeit(
101+
_run_split_rows,
98102
number=REPETITIONS,
99103
),
100-
"transform_colum": timeit(
104+
"transform_column": timeit(
101105
_run_transform_column,
102106
number=REPETITIONS,
103107
),
104108
}
105109

106110
# Print the timings
107-
print(
108-
Table(
109-
{ # noqa: T201
110-
"method": list(timings.keys()),
111-
"timing": list(timings.values()),
112-
}
111+
with pl.Config(
112+
tbl_rows=-1,
113+
):
114+
print(
115+
Table(
116+
{
117+
"method": list(timings.keys()),
118+
"timing": list(timings.values()),
119+
}
120+
)
113121
)
114-
)

benchmarks/table/row_operations_polars.py

Lines changed: 0 additions & 121 deletions
This file was deleted.

benchmarks/table/utils/__init__.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,5 @@
11
from .create_synthetic_table import create_synthetic_table
2-
from .create_synthetic_table_polars import create_synthetic_table_polars
32

43
__all__ = [
54
"create_synthetic_table",
6-
"create_synthetic_table_polars",
75
]

0 commit comments

Comments
 (0)