diff --git a/src/safeds/data/labeled/containers/__init__.py b/src/safeds/data/labeled/containers/__init__.py
index 402c635b6..e6237ec24 100644
--- a/src/safeds/data/labeled/containers/__init__.py
+++ b/src/safeds/data/labeled/containers/__init__.py
@@ -7,16 +7,19 @@
if TYPE_CHECKING:
from ._image_dataset import ImageDataset
from ._tabular_dataset import TabularDataset
+ from ._time_series_dataset import TimeSeriesDataset
apipkg.initpkg(
__name__,
{
"ImageDataset": "._image_dataset:ImageDataset",
"TabularDataset": "._tabular_dataset:TabularDataset",
+ "TimeSeriesDataset": "._time_series_dataset:TimeSeriesDataset",
},
)
__all__ = [
"ImageDataset",
"TabularDataset",
+ "TimeSeriesDataset",
]
diff --git a/src/safeds/data/labeled/containers/_time_series_dataset.py b/src/safeds/data/labeled/containers/_time_series_dataset.py
new file mode 100644
index 000000000..33d941541
--- /dev/null
+++ b/src/safeds/data/labeled/containers/_time_series_dataset.py
@@ -0,0 +1,328 @@
+from __future__ import annotations
+
+import sys
+from typing import TYPE_CHECKING
+
+from safeds._utils import _structural_hash
+from safeds.data.tabular.containers import Column, Table
+
+if TYPE_CHECKING:
+ from collections.abc import Mapping, Sequence
+ from typing import Any
+
+ import torch
+ from torch.utils.data import DataLoader, Dataset
+
+
+class TimeSeriesDataset:
+ """
+ A time series dataset maps feature and time columns to a target column. Not like the TabularDataset a TimeSeries needs to contain one target and one time column, but can have empty features.
+
+ Create a time series dataset from a mapping of column names to their values.
+
+ Parameters
+ ----------
+ data:
+ The data.
+ target_name:
+ Name of the target column.
+ time_name:
+ Name of the time column.
+ extra_names:
+ Names of the columns that are neither features nor target. If None, no extra columns are used, i.e. all but
+ the target column are used as features.
+
+ Raises
+ ------
+ ColumnLengthMismatchError
+ If columns have different lengths.
+ ValueError
+ If the target column is also an extra column.
+ ValueError
+ If no feature column remains.
+
+ Examples
+ --------
+ >>> from safeds.data.labeled.containers import TabularDataset
+ >>> dataset = TimeSeriesDataset(
+ ... {"id": [1, 2, 3], "feature": [4, 5, 6], "target": [1, 2, 3], "error":[0,0,1]},
+ ... target_name="target",
+ ... time_name = "id",
+ ... extra_names=["error"]
+ ... )
+ """
+
+ # ------------------------------------------------------------------------------------------------------------------
+ # Dunder methods
+ # ------------------------------------------------------------------------------------------------------------------
+ def __init__(
+ self,
+ data: Table | Mapping[str, Sequence[Any]],
+ target_name: str,
+ time_name: str,
+ extra_names: list[str] | None = None,
+ ):
+ # Preprocess inputs
+ if not isinstance(data, Table):
+ data = Table(data)
+ if extra_names is None:
+ extra_names = []
+
+ # Derive feature names
+ feature_names = [name for name in data.column_names if name not in {target_name, *extra_names, time_name}]
+
+ # Validate inputs
+ if time_name in extra_names:
+ raise ValueError(f"Column '{time_name}' cannot be both time and extra.")
+ if target_name in extra_names:
+ raise ValueError(f"Column '{target_name}' cannot be both target and extra.")
+ if len(feature_names) == 0:
+ feature_names = []
+
+ # Set attributes
+ self._table: Table = data
+ self._features: Table = data.keep_only_columns(feature_names)
+ self._target: Column = data.get_column(target_name)
+ self._time: Column = data.get_column(time_name)
+ self._extras: Table = data.keep_only_columns(extra_names)
+
+ def __eq__(self, other: object) -> bool:
+ """
+ Compare two time series datasets.
+
+ Returns
+ -------
+ equals:
+ 'True' if features, time, target and extras are equal, 'False' otherwise.
+ """
+ if not isinstance(other, TimeSeriesDataset):
+ return NotImplemented
+ return (self is other) or (
+ self.target == other.target
+ and self.features == other.features
+ and self.extras == other.extras
+ and self.time == other.time
+ )
+
+ def __hash__(self) -> int:
+ """
+ Return a deterministic hash value for this time series dataset.
+
+ Returns
+ -------
+ hash:
+ The hash value.
+ """
+ return _structural_hash(self.target, self.features, self.extras, self.time)
+
+ def __sizeof__(self) -> int:
+ """
+ Return the complete size of this object.
+
+ Returns
+ -------
+ size:
+ Size of this object in bytes.
+ """
+ return (
+ sys.getsizeof(self._target)
+ + sys.getsizeof(self._features)
+ + sys.getsizeof(self.extras)
+ + sys.getsizeof(self._time)
+ )
+
+ # ------------------------------------------------------------------------------------------------------------------
+ # Properties
+ # ------------------------------------------------------------------------------------------------------------------
+
+ @property
+ def features(self) -> Table:
+ """The feature columns of the time series dataset."""
+ return self._features
+
+ @property
+ def target(self) -> Column:
+ """The target column of the time series dataset."""
+ return self._target
+
+ @property
+ def time(self) -> Column:
+ """The time column of the time series dataset."""
+ return self._time
+
+ @property
+ def extras(self) -> Table:
+ """
+ Additional columns of the time series dataset that are neither features, target nor time.
+
+ These can be used to store additional information about instances, such as IDs.
+ """
+ return self._extras
+
+ # ------------------------------------------------------------------------------------------------------------------
+ # Conversion
+ # ------------------------------------------------------------------------------------------------------------------
+
+ def to_table(self) -> Table:
+ """
+ Return a new `Table` containing the feature columns, the target column, the time column and the extra columns.
+
+ The original `TimeSeriesDataset` is not modified.
+
+ Returns
+ -------
+ table:
+ A table containing the feature columns, the target column, the time column and the extra columns.
+ """
+ return self._table
+
+ def _into_dataloader_with_window(self, window_size: int, forecast_horizon: int, batch_size: int) -> DataLoader:
+ """
+ Return a Dataloader for the data stored in this time series, used for training neural networks.
+
+ It splits the target column into windows, uses them as feature and creates targets for the time series, by
+ forecast length. The original time series dataset is not modified.
+
+ Parameters
+ ----------
+ window_size:
+ The size of the created windows
+ forecast_horizon:
+ The length of the forecast horizon, where all datapoints are collected until the given lag.
+ batch_size:
+ The size of data batches that should be loaded at one time.
+
+ Raises
+ ValueError:
+ If the size is smaller or even than forecast_horizon+window_size
+
+ Returns
+ -------
+ result:
+ The DataLoader.
+ """
+ import torch
+ from torch.utils.data import DataLoader
+
+ target_tensor = torch.tensor(self.target._data.values, dtype=torch.float32)
+
+ x_s = []
+ y_s = []
+
+ size = target_tensor.size(0)
+ if window_size < 1:
+ raise ValueError("window_size must be greater than or equal to 1")
+ if forecast_horizon < 1:
+ raise ValueError("forecast_horizon must be greater than or equal to 1")
+ if size <= forecast_horizon + window_size:
+ raise ValueError("Can not create windows with window size less then forecast horizon + window_size")
+ # create feature windows and for that features targets lagged by forecast len
+ # every feature column wird auch gewindowed
+ # -> [i, win_size],[target]
+ feature_cols = self.features.to_columns()
+ for i in range(size - (forecast_horizon + window_size)):
+ window = target_tensor[i : i + window_size]
+ label = target_tensor[i + window_size + forecast_horizon]
+ for col in feature_cols:
+ data = torch.tensor(col._data.values, dtype=torch.float32)
+ window = torch.cat((window, data[i : i + window_size]), dim=0)
+ x_s.append(window)
+ y_s.append(label)
+ x_s_tensor = torch.stack(x_s)
+ y_s_tensor = torch.stack(y_s)
+ dataset = _create_dataset(x_s_tensor, y_s_tensor)
+ return DataLoader(dataset=dataset, batch_size=batch_size)
+
+ def _into_dataloader_with_window_predict(
+ self,
+ window_size: int,
+ forecast_horizon: int,
+ batch_size: int,
+ ) -> DataLoader:
+ """
+ Return a Dataloader for the data stored in this time series, used for training neural networks.
+
+ It splits the target column into windows, uses them as feature and creates targets for the time series, by
+ forecast length. The original time series dataset is not modified.
+
+ Parameters
+ ----------
+ window_size:
+ The size of the created windows
+ batch_size:
+ The size of data batches that should be loaded at one time.
+
+ Returns
+ -------
+ result:
+ The DataLoader.
+ """
+ import torch
+ from torch.utils.data import DataLoader
+
+ target_tensor = torch.tensor(self.target._data.values, dtype=torch.float32)
+ x_s = []
+
+ size = target_tensor.size(0)
+ feature_cols = self.features.to_columns()
+ for i in range(size - (forecast_horizon + window_size)):
+ window = target_tensor[i : i + window_size]
+ for col in feature_cols:
+ data = torch.tensor(col._data.values, dtype=torch.float32)
+ window = torch.cat((window, data[i : i + window_size]), dim=-1)
+ x_s.append(window)
+
+ x_s_tensor = torch.stack(x_s)
+
+ dataset = _create_dataset_predict(x_s_tensor)
+ return DataLoader(dataset=dataset, batch_size=batch_size)
+
+ # ------------------------------------------------------------------------------------------------------------------
+ # IPython integration
+ # ------------------------------------------------------------------------------------------------------------------
+
+ def _repr_html_(self) -> str:
+ """
+ Return an HTML representation of the time series dataset.
+
+ Returns
+ -------
+ output:
+ The generated HTML.
+ """
+ return self._table._repr_html_()
+
+
+def _create_dataset(features: torch.Tensor, target: torch.Tensor) -> Dataset:
+ from torch.utils.data import Dataset
+
+ class _CustomDataset(Dataset):
+ def __init__(self, features_dataset: torch.Tensor, target_dataset: torch.Tensor):
+ self.X = features_dataset
+ self.Y = target_dataset.unsqueeze(-1)
+ self.len = self.X.shape[0]
+
+ def __getitem__(self, item: int) -> tuple[torch.Tensor, torch.Tensor]:
+ return self.X[item], self.Y[item]
+
+ def __len__(self) -> int:
+ return self.len
+
+ return _CustomDataset(features, target)
+
+
+def _create_dataset_predict(features: torch.Tensor) -> Dataset:
+ from torch.utils.data import Dataset
+
+ class _CustomDataset(Dataset):
+ def __init__(self, features: torch.Tensor):
+ self.X = features
+ self.len = self.X.shape[0]
+
+ def __getitem__(self, item: int) -> torch.Tensor:
+ return self.X[item]
+
+ def __len__(self) -> int:
+ return self.len
+
+ return _CustomDataset(features)
diff --git a/src/safeds/data/tabular/containers/__init__.py b/src/safeds/data/tabular/containers/__init__.py
index 4d2a37901..66777d097 100644
--- a/src/safeds/data/tabular/containers/__init__.py
+++ b/src/safeds/data/tabular/containers/__init__.py
@@ -12,7 +12,6 @@
from ._experimental_polars_table import ExperimentalPolarsTable
from ._row import Row
from ._table import Table
- from ._time_series import TimeSeries
apipkg.initpkg(
__name__,
@@ -24,7 +23,6 @@
"ExperimentalPolarsTable": "._experimental_polars_table:ExperimentalPolarsTable",
"Row": "._row:Row",
"Table": "._table:Table",
- "TimeSeries": "._time_series:TimeSeries",
},
)
@@ -36,5 +34,4 @@
"ExperimentalPolarsTable",
"Row",
"Table",
- "TimeSeries",
]
diff --git a/src/safeds/data/tabular/containers/_column.py b/src/safeds/data/tabular/containers/_column.py
index 7c242a0dc..fdeaf5b58 100644
--- a/src/safeds/data/tabular/containers/_column.py
+++ b/src/safeds/data/tabular/containers/_column.py
@@ -23,7 +23,6 @@
from safeds.data.tabular.containers import Table
-
T = TypeVar("T")
R = TypeVar("R")
@@ -1032,9 +1031,103 @@ def plot_histogram(self, *, number_of_bins: int = 10) -> Image:
>>> histogram = column.plot_histogram()
"""
from safeds.data.tabular.containers import Table
-
+
return Table({self._name: self._data}).plot_histograms(number_of_bins=number_of_bins)
+ def plot_compare_columns(self, column_list: list[Column]) -> Image:
+ """
+ Create a plot comparing the numerical values of columns using IDs as the x-axis.
+
+ Parameters
+ ----------
+ column_list:
+ A list of time columns to be plotted.
+
+ Returns
+ -------
+ plot:
+ A plot with all the Columns plotted by the ID on the x-axis.
+
+ Raises
+ ------
+ NonNumericColumnError
+ if the target column contains non numerical values
+ ValueError
+ if the columns do not have the same size
+
+ Examples
+ --------
+ >>> from safeds.data.tabular.containers import Column
+ >>> col1 =Column("target", [4, 5, 6, 7, 8, 9, 10, 11, 12, 13])
+ >>> col2 =Column("target", [42, 51, 63, 71, 83, 91, 10, 11, 12, 13])
+ >>> image = col1.plot_compare_columns([col2])
+ """
+ import matplotlib.pyplot as plt
+ import pandas as pd
+ import seaborn as sns
+
+ data = pd.DataFrame()
+ column_list.append(self)
+ size = len(column_list[0])
+ data["INDEX"] = pd.DataFrame({"INDEX": range(size)})
+ for index, col in enumerate(column_list):
+ if not col.type.is_numeric():
+ raise NonNumericColumnError("The time series plotted column contains non-numerical columns.")
+ if len(col) != size:
+ raise ValueError("The columns must have the same size.")
+ data[col.name + " " + str(index)] = col._data
+
+ fig = plt.figure()
+ data = pd.melt(data, ["INDEX"])
+ sns.lineplot(x="INDEX", y="value", hue="variable", data=data)
+ plt.title("Multiple Series Plot")
+ plt.xlabel("Time")
+
+ plt.tight_layout()
+ buffer = io.BytesIO()
+ fig.savefig(buffer, format="png")
+ plt.close() # Prevents the figure from being displayed directly
+ buffer.seek(0)
+ return Image.from_bytes(buffer.read())
+
+ def plot_lagplot(self, lag: int) -> Image:
+ """
+ Plot a lagplot for the given column.
+
+ Parameters
+ ----------
+ lag:
+ The amount of lag used to plot
+
+ Returns
+ -------
+ plot:
+ The plot as an image.
+
+ Raises
+ ------
+ NonNumericColumnError
+ If the column contains non-numerical values.
+
+ Examples
+ --------
+ >>> from safeds.data.tabular.containers import Table
+ >>> table = Column("values", [1,2,3,4,3,2])
+ >>> image = table.plot_lagplot(2)
+ """
+ import matplotlib.pyplot as plt
+ import pandas as pd
+
+ if not self.type.is_numeric():
+ raise NonNumericColumnError("This time series target contains non-numerical columns.")
+ ax = pd.plotting.lag_plot(self._data, lag=lag)
+ fig = ax.figure
+ buffer = io.BytesIO()
+ fig.savefig(buffer, format="png")
+ plt.close() # Prevents the figure from being displayed directly
+ buffer.seek(0)
+ return Image.from_bytes(buffer.read())
+
# ------------------------------------------------------------------------------------------------------------------
# Conversion
# ------------------------------------------------------------------------------------------------------------------
diff --git a/src/safeds/data/tabular/containers/_table.py b/src/safeds/data/tabular/containers/_table.py
index bfad88bfe..c62ba7c9b 100644
--- a/src/safeds/data/tabular/containers/_table.py
+++ b/src/safeds/data/tabular/containers/_table.py
@@ -30,11 +30,9 @@
import pandas as pd
from torch.utils.data import DataLoader, Dataset
- from safeds.data.labeled.containers import TabularDataset
+ from safeds.data.labeled.containers import TabularDataset, TimeSeriesDataset
from safeds.data.tabular.transformation import InvertibleTableTransformer, TableTransformer
- from ._time_series import TimeSeries
-
# noinspection PyProtectedMember
class Table:
@@ -1859,43 +1857,6 @@ def split_rows(self, percentage_in_first: float) -> tuple[Table, Table]:
self.slice_rows(round(percentage_in_first * self.number_of_rows)),
)
- def time_columns(self, target_name: str, time_name: str, feature_names: list[str] | None = None) -> TimeSeries:
- """
- Return a new `TimeSeries` with columns marked as a target and time column or feature columns.
-
- The original table is not modified.
-
- Parameters
- ----------
- target_name:
- Name of the target column.
- time_name:
- Name of the time column.
- feature_names:
- Names of the feature columns. If None, all columns except the target and time columns are used.
-
- Returns
- -------
- time_series:
- A new time series with the given target, time and feature names.
-
- Raises
- ------
- ValueError
- If the target column is also a feature column.
- ValueError
- If there is no other column than the specified target and time columns left to be a feature column
-
- Examples
- --------
- >>> from safeds.data.tabular.containers import Table, TimeSeries
- >>> table = Table.from_dict({"time": ["01.01", "01.02", "01.03"], "price": [1.10, 1.19, 1.79], "amount_bought": [74, 72, 51]})
- >>> tabular_dataset = table.time_columns(target_name="amount_bought",time_name = "time", feature_names=["price"])
- """
- from ._time_series import TimeSeries
-
- return TimeSeries._from_table(self, target_name, time_name, feature_names)
-
def transform_column(self, name: str, transformer: Callable[[Row], Any]) -> Table:
"""
Return a new `Table` with the provided column transformed by calling the provided transformer.
@@ -2304,7 +2265,7 @@ def plot_histograms(self, *, number_of_bins: int = 10) -> Image:
bars = np.array([])
for i in range(len(hist)):
- bars = np.append(bars, f"{round(bin_edges[i], 2)}-{round(bin_edges[i+1], 2)}")
+ bars = np.append(bars, f"{round(bin_edges[i], 2)}-{round(bin_edges[i + 1], 2)}")
ax.bar(bars, hist, edgecolor="black")
ax.set_xticks(np.arange(len(hist)), bars, rotation=45, horizontalalignment="right")
@@ -2564,6 +2525,49 @@ def to_tabular_dataset(self, target_name: str, extra_names: list[str] | None = N
return TabularDataset(self, target_name, extra_names)
+ def to_time_series_dataset(
+ self,
+ target_name: str,
+ time_name: str,
+ extra_names: list[str] | None = None,
+ ) -> TimeSeriesDataset:
+ """
+ Return a new `TimeSeriesDataset` with columns marked as a target column, time or feature columns.
+
+ The original table is not modified.
+
+ Parameters
+ ----------
+ target_name:
+ Name of the target column.
+ time_name:
+ Name of the time column.
+ extra_names:
+ Names of the columns that are neither features nor target. If None, no extra columns are used, i.e. all but
+ the target column are used as features.
+
+ Returns
+ -------
+ dataset:
+ A new time series dataset with the given target and feature names.
+
+ Raises
+ ------
+ ValueError
+ If the target column is also a feature column.
+ ValueError
+ If the time column is also a feature column.
+
+ Examples
+ --------
+ >>> from safeds.data.tabular.containers import Table
+ >>> table = Table({"day": [0, 1, 2], "price": [1.10, 1.19, 1.79], "amount_bought": [74, 72, 51]})
+ >>> dataset = table.to_time_series_dataset(target_name="amount_bought", time_name= "day")
+ """
+ from safeds.data.labeled.containers import TimeSeriesDataset
+
+ return TimeSeriesDataset(self, target_name, time_name, extra_names)
+
# ------------------------------------------------------------------------------------------------------------------
# IPython integration
# ------------------------------------------------------------------------------------------------------------------
diff --git a/src/safeds/data/tabular/containers/_time_series.py b/src/safeds/data/tabular/containers/_time_series.py
deleted file mode 100644
index bf71e92e6..000000000
--- a/src/safeds/data/tabular/containers/_time_series.py
+++ /dev/null
@@ -1,1236 +0,0 @@
-from __future__ import annotations
-
-import io
-import sys
-from typing import TYPE_CHECKING
-
-from safeds._utils import _structural_hash
-from safeds.data.image.containers import Image
-from safeds.data.tabular.containers import Column, Row, Table
-from safeds.exceptions import (
- ColumnIsTargetError,
- ColumnIsTimeError,
- IllegalSchemaModificationError,
- NonNumericColumnError,
- UnknownColumnNameError,
-)
-
-if TYPE_CHECKING:
- from collections.abc import Callable, Mapping, Sequence
- from pathlib import Path
- from typing import Any
-
-
-class TimeSeries(Table):
-
- # ------------------------------------------------------------------------------------------------------------------
- # Creation
- # ------------------------------------------------------------------------------------------------------------------
-
- @staticmethod
- def timeseries_from_csv_file(
- path: str | Path,
- target_name: str,
- time_name: str,
- feature_names: list[str] | None = None,
- ) -> TimeSeries:
- """
- Read data from a CSV file into a table.
-
- Parameters
- ----------
- path:
- The path to the CSV file.
- target_name:
- The name of the target column
- time_name:
- The name of the time column
- feature_names:
- The name(s) of the column(s)
-
- Returns
- -------
- table:
- The time series created from the CSV file.
-
- Raises
- ------
- FileNotFoundError
- If the specified file does not exist.
- WrongFileExtensionError
- If the file is not a csv file.
- UnknownColumnNameError
- If target_name or time_name matches none of the column names.
- Value Error
- If one column is target and feature
- Value Error
- If one column is time and feature
-
- """
- return TimeSeries._from_table(
- Table.from_csv_file(path=path),
- target_name=target_name,
- time_name=time_name,
- feature_names=feature_names,
- )
-
- @staticmethod
- def _from_table(
- table: Table,
- target_name: str,
- time_name: str,
- feature_names: list[str] | None = None,
- ) -> TimeSeries:
- """Create a TimeSeries from a table.
-
- Parameters
- ----------
- table:
- The table.
- target_name:
- Name of the target column.
- time_name:
- Name of the date column.
- feature_names:
- Names of the feature columns. If None, all columns except the target and time columns are used.
-
- Returns
- -------
- time_series:
- the created time series
-
- Raises
- ------
- UnknownColumnNameError
- If target_name or time_name matches none of the column names.
- Value Error
- If one column is target and feature
- Value Error
- If one column is time and feature
-
- Examples
- --------
- >>> from safeds.data.tabular.containers import Table, TimeSeries
- >>> test_table = Table({"date": ["01.01", "01.02", "01.03", "01.04"], "f1": ["a", "b", "c", "a"], "t": [1,2,3,4]})
- >>> timeseries = TimeSeries._from_table(test_table, "t", "date", ["f1"])
- """
- import pandas as pd
-
- table = table._as_table()
- if feature_names is not None and time_name in feature_names:
- raise ValueError(f"Column '{time_name}' can not be time and feature column.")
- if feature_names is not None and target_name in feature_names:
- raise ValueError(f"Column '{target_name}' can not be target and feature column.")
-
- if target_name not in table.column_names:
- raise UnknownColumnNameError([target_name])
- result = object.__new__(TimeSeries)
- result._data = table._data
-
- result._schema = table._schema
- result._time = table.get_column(time_name)
- result._target = table.get_column(target_name)
- # empty Columns have dtype Object
- if len(result._time._data) == 0:
- result._time._data = pd.Series(name=time_name)
- if len(result.target._data) == 0:
- result.target._data = pd.Series(name=target_name)
- if feature_names is None or len(feature_names) == 0:
- result._feature_names = []
- result._features = Table()
- else:
- result._feature_names = feature_names
- result._features = table.keep_only_columns(feature_names)
-
- # check if time column got added as feature column
- return result
-
- # ------------------------------------------------------------------------------------------------------------------
- # Dunder methods
- # ------------------------------------------------------------------------------------------------------------------
-
- def __init__(
- self,
- data: Mapping[str, Sequence[Any]],
- target_name: str,
- time_name: str,
- feature_names: list[str] | None = None,
- ):
- """
- Create a time series from a mapping of column names to their values.
-
- Parameters
- ----------
- data:
- The data.
- target_name:
- Name of the target column.
- time_name:
- Name of the time column
- feature_names:
- Names of the feature columns. If None, all columns except the target and time columns are used.
-
- Raises
- ------
- ColumnLengthMismatchError
- If columns have different lengths.
- ValueError
- If the target column is also a feature column.
- ValueError
- If time column is also a feature column
- UnknownColumnNameError
- If time column does not exist
-
- Examples
- --------
- >>> from safeds.data.tabular.containers import TimeSeries
- >>> table = TimeSeries({"a": [1, 2, 3], "b": [4, 5, 6]}, "b", "a")
- """
- import pandas as pd
-
- # Enable copy-on-write for pandas dataframes
- pd.options.mode.copy_on_write = True
-
- # Validate inputs
- super().__init__(data)
- _data: Table = Table(data)
- if feature_names is None:
- self._features = Table()
- self._feature_names = []
- feature_names = []
- else:
- self._feature_names = feature_names
- self._features = _data.keep_only_columns(feature_names)
- if time_name in feature_names:
- raise ValueError(f"Column '{time_name}' can not be time and feature column.")
- if target_name in feature_names:
- raise ValueError(f"Column '{target_name}' can not be time and feature column.")
- if time_name not in _data.column_names:
- raise UnknownColumnNameError([time_name])
- self._time: Column = _data.get_column(time_name)
- self._target: Column = _data.get_column(target_name)
- # empty Columns have dtype Object
- if len(self._time._data) == 0:
- self._time._data = pd.Series(name=time_name)
- if len(self.target._data) == 0:
- self.target._data = pd.Series(name=target_name)
-
- self._data = _data._data
-
- def __eq__(self, other: object) -> bool:
- """
- Compare two time series instances.
-
- Returns
- -------
- equals:
- 'True' if contents are equal, 'False' otherwise.
- """
- if not isinstance(other, TimeSeries):
- return NotImplemented
- if self is other:
- return True
-
- return (
- self.time == other.time
- and self.target == other.target
- and self.features == other.features
- and Table.__eq__(self, other)
- )
-
- def __hash__(self) -> int:
- """
- Return a deterministic hash value for this time series.
-
- Returns
- -------
- hash:
- The hash value.
- """
- return _structural_hash(self.time, self.target, self.features, Table.__hash__(self))
-
- def __sizeof__(self) -> int:
- """
- Return the complete size of this object.
-
- Returns
- -------
- size:
- Size of this object in bytes.
- """
- return Table.__sizeof__(self) + sys.getsizeof(self._time)
-
- # ------------------------------------------------------------------------------------------------------------------
- # Properties
- # ------------------------------------------------------------------------------------------------------------------
-
- @property
- def target(self) -> Column:
- """
- Get the target column of the time series.
-
- Returns
- -------
- target:
- The target column.
- """
- return self._target
-
- @property
- def features(self) -> Table:
- """
- Get the feature columns of the time series.
-
- Returns
- -------
- features:
- The table containing the feature columns.
- """
- return self._features
-
- @property
- def time(self) -> Column:
- """
- Get the time column of the time series.
-
- Returns
- -------
- time:
- The time column.
- """
- return self._time
-
- # ------------------------------------------------------------------------------------------------------------------
- # Overridden methods from Table class
- # ------------------------------------------------------------------------------------------------------------------
- def _as_table(self: TimeSeries) -> Table:
- """
- Return a new plain `Table`.
-
- The original time series is not modified.
-
- Parameters
- ----------
- self:
- The Time Series.
-
- Returns
- -------
- table:
- The time series as an plain Table, i.e. without the information about which columns are features, target or
- time.
-
- """
- return Table.from_columns(super().to_columns())
-
- def add_column(self, column: Column) -> TimeSeries:
- """
- Return a new `TimeSeries` with the provided column attached at the end, as neither target nor feature column.
-
- The original time series is not modified.
-
- Parameters
- ----------
- column:
- The column to be added.
-
- Returns
- -------
- result:
- The time series with the column attached as neither target nor feature column.
-
- Raises
- ------
- DuplicateColumnNameError
- If the new column already exists.
- ColumnSizeError
- If the size of the column does not match the number of rows.
- """
- return TimeSeries._from_table(
- super().add_column(column),
- time_name=self.time.name,
- target_name=self._target.name,
- )
-
- def add_column_as_feature(self, column: Column) -> TimeSeries:
- """
- Return a new `TimeSeries` with the provided column attached at the end, as a feature column.
-
- the original time series is not modified.
-
- Parameters
- ----------
- column:
- The column to be added.
-
- Returns
- -------
- result:
- The time series with the attached feature column.
-
- Raises
- ------
- DuplicateColumnNameError
- If the new column already exists.
- ColumnSizeError
- If the size of the column does not match the number of rows.
- """
- return TimeSeries._from_table(
- super().add_column(column),
- target_name=self._target.name,
- time_name=self.time.name,
- feature_names=[*self._feature_names, column.name],
- )
-
- def add_columns_as_features(self, columns: list[Column] | Table) -> TimeSeries:
- """
- Return a new `TimeSeries` with the provided columns attached at the end, as feature columns.
-
- The original time series is not modified.
-
- Parameters
- ----------
- columns:
- The columns to be added as features.
-
- Returns
- -------
- result:
- The time series with the attached feature columns.
-
- Raises
- ------
- DuplicateColumnNameError
- If any of the new feature columns already exist.
- ColumnSizeError
- If the size of any feature column does not match the number of rows.
- """
- return TimeSeries._from_table(
- super().add_columns(columns),
- time_name=self.time.name,
- target_name=self._target.name,
- feature_names=self._feature_names
- + [col.name for col in (columns.to_columns() if isinstance(columns, Table) else columns)],
- )
-
- def add_columns(self, columns: list[Column] | Table) -> TimeSeries:
- """
- Return a new `TimeSeries` with multiple added columns, as neither target nor feature columns.
-
- The original time series is not modified.
-
- Parameters
- ----------
- columns:
- The columns to be added.
-
- Returns
- -------
- result:
- A new time series combining the original table and the given columns as neither target nor feature columns.
-
- Raises
- ------
- DuplicateColumnNameError
- If at least one column name from the provided column list already exists in the time series.
- ColumnSizeError
- If at least one of the column sizes from the provided column list does not match the time series.
- """
- return TimeSeries._from_table(
- super().add_columns(columns),
- time_name=self.time.name,
- target_name=self._target.name,
- feature_names=self._feature_names,
- )
-
- def add_row(self, row: Row) -> TimeSeries:
- """
- Return a new `TimeSeries` with an extra Row attached.
-
- The original time series is not modified.
-
- Parameters
- ----------
- row:
- The row to be added.
-
- Returns
- -------
- table:
- A new time series with the added row at the end.
-
- Raises
- ------
- UnknownColumnNameError
- If the row has different column names than the time series.
- """
- return TimeSeries._from_table(
- super().add_row(row),
- target_name=self._target.name,
- time_name=self.time.name,
- feature_names=self._feature_names,
- )
-
- def add_rows(self, rows: list[Row] | Table) -> TimeSeries:
- """
- Return a new `TimeSeries` with multiple extra Rows attached.
-
- The original time series is not modified.
-
- Parameters
- ----------
- rows:
- The rows to be added.
-
- Returns
- -------
- result:
- A new time series which combines the original time series and the given rows.
-
- Raises
- ------
- UnknownColumnNameError
- If at least one of the rows have different column names than the time series.
- """
- return TimeSeries._from_table(
- super().add_rows(rows),
- target_name=self._target.name,
- time_name=self.time.name,
- feature_names=self._feature_names,
- )
-
- def filter_rows(self, query: Callable[[Row], bool]) -> TimeSeries:
- """
- Return a new `TimeSeries` containing only rows that match the given Callable (e.g. lambda function).
-
- The original time series is not modified.
-
- Parameters
- ----------
- query:
- A Callable that is applied to all rows.
-
- Returns
- -------
- result:
- A time series containing only the rows to match the query.
- """
- return TimeSeries._from_table(
- super().filter_rows(query),
- target_name=self._target.name,
- time_name=self.time.name,
- feature_names=self._feature_names,
- )
-
- def keep_only_columns(self, column_names: list[str]) -> TimeSeries:
- """
- Return a new `TimeSeries` with only the given column(s).
-
- The original time series is not modified.
-
- Parameters
- ----------
- column_names:
- A list containing the columns to be kept.
-
- Returns
- -------
- table:
- A time series containing only the given column(s).
-
- Raises
- ------
- UnknownColumnNameError
- If any of the given columns does not exist.
- IllegalSchemaModificationError
- If none of the given columns is the target or time column or any of the feature columns.
- """
- if self._target.name not in column_names:
- raise IllegalSchemaModificationError("Must keep the target column.")
- if self.time.name not in column_names:
- raise IllegalSchemaModificationError("Must keep the time column.")
- return TimeSeries._from_table(
- super().keep_only_columns(column_names),
- target_name=self._target.name,
- time_name=self.time.name,
- feature_names=sorted(
- set(self._feature_names).intersection(set(column_names)),
- key={val: ix for ix, val in enumerate(self._feature_names)}.__getitem__,
- ),
- )
-
- def remove_columns(self, column_names: list[str]) -> TimeSeries:
- """
- Return a new `TimeSeries` with the given column(s) removed from the time series.
-
- The original time series is not modified.
-
- Parameters
- ----------
- column_names:
- The names of all columns to be dropped.
-
- Returns
- -------
- table:
- A time series without the given columns.
-
- Raises
- ------
- UnknownColumnNameError
- If any of the given columns does not exist.
- ColumnIsTargetError
- If any of the given columns is the target column.
- ColumnIsTimeError
- If any of the given columns is the time column.
- IllegalSchemaModificationError
- If the given columns contain all the feature columns.
- """
- if self._target.name in column_names:
- raise ColumnIsTargetError(self._target.name)
- if self.time.name in column_names:
- raise ColumnIsTimeError(self.time.name)
- return TimeSeries._from_table(
- super().remove_columns(column_names),
- target_name=self._target.name,
- time_name=self.time.name,
- feature_names=sorted(
- set(self._feature_names) - set(column_names),
- key={val: ix for ix, val in enumerate(self._feature_names)}.__getitem__,
- ),
- )
-
- def remove_columns_with_missing_values(self) -> TimeSeries:
- """
- Return a new `TimeSeries` with every column that misses values removed.
-
- The original time series is not modified.
-
- Returns
- -------
- table:
- A time series without the columns that contain missing values.
-
- Raises
- ------
- ColumnIsTargetError
- If any of the columns to be removed is the target column.
- ColumnIsTimeError
- If any of the columns to be removed is the time column.
- IllegalSchemaModificationError
- If the columns to remove contain all the feature columns.
- """
- table = super().remove_columns_with_missing_values()
- if self._target.name not in table.column_names:
- raise ColumnIsTargetError(self._target.name)
- if self.time.name not in table.column_names:
- raise ColumnIsTimeError(self.time.name)
- return TimeSeries._from_table(
- table,
- target_name=self._target.name,
- time_name=self._time.name,
- feature_names=sorted(
- set(self._feature_names).intersection(set(table.column_names)),
- key={val: ix for ix, val in enumerate(self._feature_names)}.__getitem__,
- ),
- )
-
- def remove_columns_with_non_numerical_values(self) -> TimeSeries:
- """
- Return a new `TimeSeries` with every column that contains non-numerical values removed.
-
- The original time series is not modified.
-
- Returns
- -------
- table:
- A time series without the columns that contain non-numerical values.
-
- Raises
- ------
- ColumnIsTargetError
- If any of the columns to be removed is the target column.
- ColumnIsTimeError
- If any of the columns to be removed is the time column.
- IllegalSchemaModificationError
- If the columns to remove contain all the feature columns.
- """
- table = super().remove_columns_with_non_numerical_values()
- if self._target.name not in table.column_names:
- raise ColumnIsTargetError(self._target.name)
- if self.time.name not in table.column_names:
- raise ColumnIsTimeError(self.time.name)
- return TimeSeries._from_table(
- table,
- self._target.name,
- time_name=self.time.name,
- feature_names=sorted(
- set(self._feature_names).intersection(set(table.column_names)),
- key={val: ix for ix, val in enumerate(self._feature_names)}.__getitem__,
- ),
- )
-
- def remove_duplicate_rows(self) -> TimeSeries:
- """
- Return a new `TimeSeries` with all row duplicates removed.
-
- The original time series is not modified.
-
- Returns
- -------
- result:
- The time series with the duplicate rows removed.
- """
- return TimeSeries._from_table(
- super().remove_duplicate_rows(),
- target_name=self._target.name,
- feature_names=self._feature_names,
- time_name=self.time.name,
- )
-
- def remove_rows_with_missing_values(self) -> TimeSeries:
- """
- Return a new `TimeSeries` without the rows that contain missing values.
-
- The original time series is not modified.
-
- Returns
- -------
- table:
- A time series without the rows that contain missing values.
- """
- return TimeSeries._from_table(
- super().remove_rows_with_missing_values(),
- time_name=self.time.name,
- target_name=self._target.name,
- feature_names=self._feature_names,
- )
-
- def remove_rows_with_outliers(self) -> TimeSeries:
- """
- Return a new `TimeSeries` with all rows that contain at least one outlier removed.
-
- We define an outlier as a value that has a distance of more than 3 standard deviations from the column mean.
- Missing values are not considered outliers. They are also ignored during the calculation of the standard
- deviation.
-
- The original time series is not modified.
-
- Returns
- -------
- new_time_series:
- A new time series without rows containing outliers.
- """
- return TimeSeries._from_table(
- super().remove_rows_with_outliers(),
- time_name=self.time.name,
- target_name=self._target.name,
- feature_names=self._feature_names,
- )
-
- def rename_column(self, old_name: str, new_name: str) -> TimeSeries:
- """
- Return a new `TimeSeries` with a single column renamed.
-
- The original time series is not modified.
-
- Parameters
- ----------
- old_name:
- The old name of the column.
- new_name:
- The new name of the column.
-
- Returns
- -------
- table:
- The time series with the renamed column.
-
- Raises
- ------
- UnknownColumnNameError
- If the specified old target column name does not exist.
- DuplicateColumnNameError
- If the specified new target column name already exists.
- """
- return TimeSeries._from_table(
- super().rename_column(old_name, new_name),
- time_name=new_name if self.time.name == old_name else self.time.name,
- target_name=new_name if self._target.name == old_name else self._target.name,
- feature_names=(
- self._feature_names
- if old_name not in self._feature_names
- else [column_name if column_name != old_name else new_name for column_name in self._feature_names]
- ),
- )
-
- def replace_column(self, old_column_name: str, new_columns: list[Column]) -> TimeSeries:
- """
- Return a new `TimeSeries` with the specified old column replaced by a list of new columns.
-
- If the column to be replaced is the target or time column, it must be replaced by exactly one column. That column
- becomes the new target or time column. If the column to be replaced is a feature column, the new columns that replace it
- all become feature columns.
-
- The order of columns is kept. The original time series is not modified.
-
- Parameters
- ----------
- old_column_name:
- The name of the column to be replaced.
- new_columns:
- The new columns replacing the old column.
-
- Returns
- -------
- result:
- A time series with the old column replaced by the new columns.
-
- Raises
- ------
- UnknownColumnNameError
- If the old column does not exist.
- DuplicateColumnNameError
- If the new column already exists and the existing column is not affected by the replacement.
- ColumnSizeError
- If the size of the column does not match the amount of rows.
- IllegalSchemaModificationError
- If the target or time column would be removed or replaced by more than one column.
- """
- if old_column_name == self.time.name:
- if len(new_columns) != 1:
- raise IllegalSchemaModificationError(
- f'Time column "{self.time.name}" can only be replaced by exactly one new column.',
- )
- else:
- return TimeSeries._from_table(
- super().replace_column(old_column_name, new_columns),
- target_name=self._target.name,
- feature_names=self._feature_names,
- time_name=new_columns[0].name,
- )
- if old_column_name == self._target.name:
- if len(new_columns) != 1:
- raise IllegalSchemaModificationError(
- f'Target column "{self._target.name}" can only be replaced by exactly one new column.',
- )
- else:
- return TimeSeries._from_table(
- super().replace_column(old_column_name, new_columns),
- target_name=new_columns[0].name,
- time_name=self.time.name,
- feature_names=self._feature_names,
- )
-
- else:
- return TimeSeries._from_table(
- super().replace_column(old_column_name, new_columns),
- target_name=self._target.name,
- time_name=self.time.name,
- feature_names=(
- self._feature_names
- if old_column_name not in self._feature_names
- else self._feature_names[: self._feature_names.index(old_column_name)]
- + [col.name for col in new_columns]
- + self._feature_names[self._feature_names.index(old_column_name) + 1 :]
- ),
- )
-
- def slice_rows(
- self,
- start: int | None = None,
- end: int | None = None,
- step: int = 1,
- ) -> TimeSeries:
- """
- Slice a part of the table into a new `TimeSeries`.
-
- The original time series is not modified.
-
- Parameters
- ----------
- start:
- The first index of the range to be copied into a new time series, None by default.
- end:
- The last index of the range to be copied into a new time series, None by default.
- step:
- The step size used to iterate through the time series, 1 by default.
-
- Returns
- -------
- result:
- The resulting time series.
-
- Raises
- ------
- IndexOutOfBoundsError
- If the index is out of bounds.
- """
- return TimeSeries._from_table(
- super().slice_rows(start, end, step),
- target_name=self._target.name,
- feature_names=self._feature_names,
- time_name=self.time.name,
- )
-
- def sort_columns(
- self,
- comparator: Callable[[Column, Column], int] = lambda col1, col2: (col1.name > col2.name)
- - (col1.name < col2.name),
- ) -> TimeSeries:
- """
- Sort the columns of a `TimeSeries` with the given comparator and return a new `TimeSeries`.
-
- The comparator is a function that takes two columns `col1` and `col2` and
- returns an integer:
-
- * If the function returns a negative number, `col1` will be ordered before `col2`.
- * If the function returns a positive number, `col1` will be ordered after `col2`.
- * If the function returns 0, the original order of `col1` and `col2` will be kept.
-
- If no comparator is given, the columns will be sorted alphabetically by their name.
-
- The original time series is not modified.
-
- Parameters
- ----------
- comparator:
- The function used to compare two columns.
-
- Returns
- -------
- new_time_series:
- A new time series with sorted columns.
- """
- sorted_table = super().sort_columns(comparator)
- return TimeSeries._from_table(
- sorted_table,
- time_name=self.time.name,
- target_name=self._target.name,
- feature_names=sorted(
- set(sorted_table.column_names).intersection(self._feature_names),
- key={val: ix for ix, val in enumerate(sorted_table.column_names)}.__getitem__,
- ),
- )
-
- def transform_column(self, name: str, transformer: Callable[[Row], Any]) -> TimeSeries:
- """
- Return a new `TimeSeries` with the provided column transformed by calling the provided transformer.
-
- The original time series is not modified.
-
- Parameters
- ----------
- name:
- The name of the column to be transformed.
- transformer:
- The transformer to the given column
-
- Returns
- -------
- result:
- The time series with the transformed column.
-
- Raises
- ------
- UnknownColumnNameError
- If the column does not exist.
- """
- return TimeSeries._from_table(
- super().transform_column(name, transformer),
- time_name=self.time.name,
- target_name=self._target.name,
- feature_names=self._feature_names,
- )
-
- def plot_lagplot(self, lag: int) -> Image:
- """
- Plot a lagplot for the target column.
-
- Parameters
- ----------
- lag:
- The amount of lag used to plot
-
- Returns
- -------
- plot:
- The plot as an image.
-
- Raises
- ------
- NonNumericColumnError
- If the time series targets contains non-numerical values.
-
- Examples
- --------
- >>> from safeds.data.tabular.containers import TimeSeries
- >>> table = TimeSeries({"time":[1, 2], "target": [3, 4], "feature":[2,2]}, target_name= "target", time_name="time", feature_names=["feature"], )
- >>> image = table.plot_lagplot(lag = 1)
- """
- import matplotlib.pyplot as plt
- import pandas as pd
-
- if not self._target.type.is_numeric():
- raise NonNumericColumnError("This time series target contains non-numerical columns.")
- ax = pd.plotting.lag_plot(self._target._data, lag=lag)
- fig = ax.figure
- buffer = io.BytesIO()
- fig.savefig(buffer, format="png")
- plt.close() # Prevents the figure from being displayed directly
- buffer.seek(0)
- return Image.from_bytes(buffer.read())
-
- def plot_lineplot(self, x_column_name: str | None = None, y_column_name: str | None = None) -> Image:
- """
-
- Plot the time series target or the given column(s) as line plot.
-
- The function will take the time column as the default value for y_column_name and the target column as the
- default value for x_column_name.
-
- Parameters
- ----------
- x_column_name:
- The column name of the column to be plotted on the x-Axis, default is the time column.
- y_column_name:
- The column name of the column to be plotted on the y-Axis, default is the target column.
-
- Returns
- -------
- plot:
- The plot as an image.
-
- Raises
- ------
- NonNumericColumnError
- If the time series given columns contain non-numerical values.
-
- UnknownColumnNameError
- If one of the given names does not exist in the table
-
- Examples
- --------
- >>> from safeds.data.tabular.containers import TimeSeries
- >>> table = TimeSeries({"time":[1, 2], "target": [3, 4], "feature":[2,2]}, target_name= "target", time_name="time", feature_names=["feature"], )
- >>> image = table.plot_lineplot()
- """
- import matplotlib.pyplot as plt
- import seaborn as sns
-
- self._data.index.name = "index"
- if x_column_name is not None and not self.get_column(x_column_name).type.is_numeric():
- raise NonNumericColumnError("The time series plotted column contains non-numerical columns.")
-
- if y_column_name is None:
- y_column_name = self._target.name
-
- elif y_column_name not in self._data.columns:
- raise UnknownColumnNameError([y_column_name])
-
- if x_column_name is None:
- x_column_name = self.time.name
-
- if not self.get_column(y_column_name).type.is_numeric():
- raise NonNumericColumnError("The time series plotted column contains non-numerical columns.")
-
- fig = plt.figure()
- ax = sns.lineplot(
- data=self._data,
- x=x_column_name,
- y=y_column_name,
- )
- ax.set(xlabel=x_column_name, ylabel=y_column_name)
- ax.set_xticks(ax.get_xticks())
- ax.set_xticklabels(
- ax.get_xticklabels(),
- rotation=45,
- horizontalalignment="right",
- ) # rotate the labels of the x Axis to prevent the chance of overlapping of the labels
- plt.tight_layout()
-
- buffer = io.BytesIO()
- fig.savefig(buffer, format="png")
- plt.close() # Prevents the figure from being displayed directly
- buffer.seek(0)
- self._data = self._data.reset_index()
- return Image.from_bytes(buffer.read())
-
- def plot_scatterplot(
- self,
- x_column_name: str | None = None,
- y_column_name: str | None = None,
- ) -> Image:
- """
- Plot the time series target or the given column(s) as scatter plot.
-
- The function will take the time column as the default value for x_column_name and the target column as the
- default value for y_column_name.
-
- Parameters
- ----------
- x_column_name:
- The column name of the column to be plotted on the x-Axis.
- y_column_name:
- The column name of the column to be plotted on the y-Axis.
-
- Returns
- -------
- plot:
- The plot as an image.
-
- Raises
- ------
- NonNumericColumnError
- If the time series given columns contain non-numerical values.
-
- UnknownColumnNameError
- If one of the given names does not exist in the table
-
- Examples
- --------
- >>> from safeds.data.tabular.containers import TimeSeries
- >>> table = TimeSeries({"time":[1, 2], "target": [3, 4], "feature":[2,2]}, target_name= "target", time_name="time", feature_names=["feature"], )
- >>> image = table.plot_scatterplot()
-
- """
- import matplotlib.pyplot as plt
- import seaborn as sns
-
- self._data.index.name = "index"
- if x_column_name is not None and not self.get_column(x_column_name).type.is_numeric():
- raise NonNumericColumnError("The time series plotted column contains non-numerical columns.")
-
- if y_column_name is None:
- y_column_name = self._target.name
- elif y_column_name not in self._data.columns:
- raise UnknownColumnNameError([y_column_name])
- if x_column_name is None:
- x_column_name = self.time.name
-
- if not self.get_column(y_column_name).type.is_numeric():
- raise NonNumericColumnError("The time series plotted column contains non-numerical columns.")
-
- fig = plt.figure()
- ax = sns.scatterplot(
- data=self._data,
- x=x_column_name,
- y=y_column_name,
- )
- ax.set(xlabel=x_column_name, ylabel=y_column_name)
- ax.set_xticks(ax.get_xticks())
- ax.set_xticklabels(
- ax.get_xticklabels(),
- rotation=45,
- horizontalalignment="right",
- ) # rotate the labels of the x Axis to prevent the chance of overlapping of the labels
- plt.tight_layout()
-
- buffer = io.BytesIO()
- fig.savefig(buffer, format="png")
- plt.close() # Prevents the figure from being displayed directly
- buffer.seek(0)
- self._data = self._data.reset_index()
- return Image.from_bytes(buffer.read())
-
- def split_rows(self, percentage_in_first: float) -> tuple[TimeSeries, TimeSeries]:
- """
- Split the table into two new tables.
-
- The original time series is not modified.
-
- Parameters
- ----------
- percentage_in_first:
- The desired size of the first time series in percentage to the given time series; must be between 0 and 1.
-
- Returns
- -------
- result:
- A tuple containing the two resulting time series. The first time series has the specified size, the second time series
- contains the rest of the data.
-
- Raises
- ------
- ValueError:
- if the 'percentage_in_first' is not between 0 and 1.
-
- Examples
- --------
- >>> from safeds.data.tabular.containers import TimeSeries
- >>> time_series = TimeSeries({"time":[0, 1, 2, 3, 4], "temperature": [10, 15, 20, 25, 30], "sales": [54, 74, 90, 206, 210]}, time_name="time", target_name="sales")
- >>> slices = time_series.split_rows(0.4)
- >>> slices[0]
- time temperature sales
- 0 0 10 54
- 1 1 15 74
- >>> slices[1]
- time temperature sales
- 0 2 20 90
- 1 3 25 206
- 2 4 30 210
- """
- temp = self._as_table()
- t1, t2 = temp.split_rows(percentage_in_first=percentage_in_first)
- return (
- TimeSeries._from_table(
- t1,
- time_name=self.time.name,
- target_name=self._target.name,
- feature_names=self._feature_names,
- ),
- TimeSeries._from_table(
- t2,
- time_name=self.time.name,
- target_name=self._target.name,
- feature_names=self._feature_names,
- ),
- )
-
- def plot_compare_time_series(self, time_series: list[TimeSeries]) -> Image:
- """
- Plot the given time series targets along the time on the x-axis.
-
- Parameters
- ----------
- time_series:
- A list of time series to be plotted.
-
- Returns
- -------
- plot:
- A plot with all the time series targets plotted by the time on the x-axis.
-
- Raises
- ------
- NonNumericColumnError
- if the target column contains non numerical values
- """
- import matplotlib.pyplot as plt
- import pandas as pd
- import seaborn as sns
-
- if not self._target.type.is_numeric():
- raise NonNumericColumnError("The time series plotted column contains non-numerical columns.")
-
- data = pd.DataFrame()
- data[self.time.name] = self.time._data
- data[self.target.name] = self.target._data
- for index, ts in enumerate(time_series):
- if not ts.target.type.is_numeric():
- raise NonNumericColumnError("The time series plotted column contains non-numerical columns.")
- data[ts.target.name + " " + str(index)] = ts.target._data
- fig = plt.figure()
-
- data = pd.melt(data, [self.time.name])
- sns.lineplot(x=self.time.name, y="value", hue="variable", data=data)
- plt.title("Multiple Series Plot")
- plt.xlabel("Time")
-
- plt.tight_layout()
- buffer = io.BytesIO()
- fig.savefig(buffer, format="png")
- plt.close() # Prevents the figure from being displayed directly
- buffer.seek(0)
- self._data = self._data.reset_index()
- return Image.from_bytes(buffer.read())
diff --git a/src/safeds/data/tabular/transformation/_table_transformer.py b/src/safeds/data/tabular/transformation/_table_transformer.py
index bdf6bb861..277543c6a 100644
--- a/src/safeds/data/tabular/transformation/_table_transformer.py
+++ b/src/safeds/data/tabular/transformation/_table_transformer.py
@@ -137,10 +137,8 @@ def fit_and_transform(self, table: Table, column_names: list[str] | None = None)
Returns
-------
- fitted_transformer:
- The fitted transformer.
- transformed_table:
- The transformed table.
+ fitted_transformer, transformed_table:
+ The fitted transformer and the transformed table.:
"""
fitted_transformer = self.fit(table, column_names)
transformed_table = fitted_transformer.transform(table)
diff --git a/src/safeds/exceptions/__init__.py b/src/safeds/exceptions/__init__.py
index cc8366298..7a654021b 100644
--- a/src/safeds/exceptions/__init__.py
+++ b/src/safeds/exceptions/__init__.py
@@ -6,14 +6,11 @@
if TYPE_CHECKING:
from safeds.exceptions._data import (
- ColumnIsTargetError,
- ColumnIsTimeError,
ColumnLengthMismatchError,
ColumnSizeError,
DuplicateColumnNameError,
DuplicateIndexError,
IllegalFormatError,
- IllegalSchemaModificationError,
IndexOutOfBoundsError,
MissingValuesColumnError,
NonNumericColumnError,
@@ -37,7 +34,6 @@
InvalidModelStructureError,
LearningError,
ModelNotFittedError,
- NonTimeSeriesError,
PlainTableError,
PredictionError,
)
@@ -72,7 +68,6 @@
"InvalidModelStructureError": "._ml:InvalidModelStructureError",
"LearningError": "._ml:LearningError",
"ModelNotFittedError": "._ml:ModelNotFittedError",
- "NonTimeSeriesError": "._ml:NonTimeSeriesError",
"PlainTableError": "._ml:PlainTableError",
"PredictionError": "._ml:PredictionError",
# Other
@@ -86,14 +81,11 @@
# Generic exceptions
"OutOfBoundsError",
# Data exceptions
- "ColumnIsTargetError",
- "ColumnIsTimeError",
"ColumnLengthMismatchError",
"ColumnSizeError",
"DuplicateColumnNameError",
"DuplicateIndexError",
"IllegalFormatError",
- "IllegalSchemaModificationError",
"IndexOutOfBoundsError",
"MissingValuesColumnError",
"NonNumericColumnError",
@@ -110,7 +102,6 @@
"InvalidModelStructureError",
"LearningError",
"ModelNotFittedError",
- "NonTimeSeriesError",
"PlainTableError",
"PredictionError",
# Other
diff --git a/src/safeds/exceptions/_data.py b/src/safeds/exceptions/_data.py
index a060f21aa..0d9303a76 100644
--- a/src/safeds/exceptions/_data.py
+++ b/src/safeds/exceptions/_data.py
@@ -163,27 +163,6 @@ def __init__(self, file: str | Path, file_extension: str | list[str]) -> None:
)
-class IllegalSchemaModificationError(Exception):
- """Exception raised when modifying a schema in a way that is inconsistent with the subclass's requirements."""
-
- def __init__(self, msg: str) -> None:
- super().__init__(f"Illegal schema modification: {msg}")
-
-
-class ColumnIsTargetError(IllegalSchemaModificationError):
- """Exception raised when removing the target column of a TimeSeries."""
-
- def __init__(self, column_name: str) -> None:
- super().__init__(f'Column "{column_name}" is the target column and cannot be removed.')
-
-
-class ColumnIsTimeError(IllegalSchemaModificationError):
- """Exception raised when removing the time column of a TimeSeries."""
-
- def __init__(self, column_name: str) -> None:
- super().__init__(f'Column "{column_name}" is the time column and cannot be removed.')
-
-
class IllegalFormatError(Exception):
"""Exception raised when a format is not legal."""
diff --git a/src/safeds/exceptions/_ml.py b/src/safeds/exceptions/_ml.py
index d87960f94..5b920153b 100644
--- a/src/safeds/exceptions/_ml.py
+++ b/src/safeds/exceptions/_ml.py
@@ -93,15 +93,3 @@ def __init__(self) -> None:
"Use `Table.to_tabular_dataset()` to create a tabular dataset."
),
)
-
-
-class NonTimeSeriesError(Exception):
- """Raised when a table is used instead of a TimeSeries in a regression or classification."""
-
- def __init__(self) -> None:
- super().__init__(
- (
- "This method needs a time series.\nA time series is a table that additionally knows which columns are"
- " time and which are the target to predict.\n"
- ),
- )
diff --git a/src/safeds/ml/classical/regression/_arima.py b/src/safeds/ml/classical/regression/_arima.py
index f35e066fc..a5c9de89f 100644
--- a/src/safeds/ml/classical/regression/_arima.py
+++ b/src/safeds/ml/classical/regression/_arima.py
@@ -6,13 +6,13 @@
from safeds._utils import _structural_hash
from safeds.data.image.containers import Image
-from safeds.data.tabular.containers import Column, Table, TimeSeries
+from safeds.data.labeled.containers import TimeSeriesDataset
+from safeds.data.tabular.containers import Column
from safeds.exceptions import (
DatasetMissesDataError,
MissingValuesColumnError,
ModelNotFittedError,
NonNumericColumnError,
- NonTimeSeriesError,
)
if TYPE_CHECKING:
@@ -39,7 +39,7 @@ def __init__(self) -> None:
self._order: tuple[int, int, int] | None = None
self._fitted = False
- def fit(self, time_series: TimeSeries) -> ArimaModelRegressor:
+ def fit(self, time_series: TimeSeriesDataset) -> ArimaModelRegressor:
"""
Create a copy of this ARIMA Model and fit it with the given training data.
@@ -70,9 +70,8 @@ def fit(self, time_series: TimeSeries) -> ArimaModelRegressor:
"""
from statsmodels.tsa.arima.model import ARIMA
- if not isinstance(time_series, TimeSeries) and isinstance(time_series, Table):
- raise NonTimeSeriesError
- if time_series.number_of_rows == 0:
+ table = time_series.to_table()
+ if table.number_of_rows == 0:
raise DatasetMissesDataError
if not time_series.target.type.is_numeric():
raise NonNumericColumnError(time_series.target.name)
@@ -109,7 +108,7 @@ def fit(self, time_series: TimeSeries) -> ArimaModelRegressor:
fitted_arima._fitted = True
return fitted_arima
- def predict(self, time_series: TimeSeries) -> TimeSeries:
+ def predict(self, time_series: TimeSeriesDataset) -> TimeSeriesDataset:
"""
Predict a target vector using a time series target column. The model has to be trained first.
@@ -134,7 +133,7 @@ def predict(self, time_series: TimeSeries) -> TimeSeries:
"""
# make a table without
forecast_horizon = len(time_series.target._data)
- result_table = time_series._as_table()
+ result_table = time_series.to_table()
result_table = result_table.remove_columns([time_series.target.name])
# Validation
if not self.is_fitted or self._arima is None:
@@ -147,14 +146,13 @@ def predict(self, time_series: TimeSeries) -> TimeSeries:
# create new TimeSeries
result_table = result_table.add_column(target_column)
- return TimeSeries._from_table(
- result_table,
- time_name=time_series.time.name,
+ return result_table.to_time_series_dataset(
target_name=time_series.target.name + " " + "forecasted",
- feature_names=time_series.features.column_names,
+ time_name=time_series.time.name,
+ extra_names=time_series.extras.column_names,
)
- def plot_predictions(self, test_series: TimeSeries) -> Image:
+ def plot_predictions(self, test_series: TimeSeriesDataset) -> Image:
"""
Plot the predictions of the trained model to the given target of the time series. So you can see the predictions and the actual values in one plot.
diff --git a/src/safeds/ml/nn/__init__.py b/src/safeds/ml/nn/__init__.py
index e43ad88fb..a2ceb7ac5 100644
--- a/src/safeds/ml/nn/__init__.py
+++ b/src/safeds/ml/nn/__init__.py
@@ -11,7 +11,9 @@
from ._input_conversion import InputConversion
from ._input_conversion_image import InputConversionImage
from ._input_conversion_table import InputConversionTable
+ from ._input_conversion_time_series import InputConversionTimeSeries
from ._layer import Layer
+ from ._lstm_layer import LSTMLayer
from ._model import NeuralNetworkClassifier, NeuralNetworkRegressor
from ._output_conversion import OutputConversion
from ._output_conversion_image import (
@@ -20,6 +22,7 @@
OutputConversionImageToTable,
)
from ._output_conversion_table import OutputConversionTable
+ from ._output_conversion_time_series import OutputConversionTimeSeries
from ._pooling2d_layer import AvgPooling2DLayer, MaxPooling2DLayer
apipkg.initpkg(
@@ -34,14 +37,17 @@
"InputConversionImage": "._input_conversion_image:InputConversionImage",
"InputConversionTable": "._input_conversion_table:InputConversionTable",
"Layer": "._layer:Layer",
+ "OutputConversion": "._output_conversion:OutputConversion",
+ "InputConversionTimeSeries": "._input_conversion_time_series:InputConversionTimeSeries",
+ "LSTMLayer": "._lstm_layer:LSTMLayer",
+ "OutputConversionTable": "._output_conversion_table:OutputConversionTable",
+ "OutputConversionTimeSeries": "._output_conversion_time_series:OutputConversionTimeSeries",
"MaxPooling2DLayer": "._pooling2d_layer:MaxPooling2DLayer",
"NeuralNetworkClassifier": "._model:NeuralNetworkClassifier",
"NeuralNetworkRegressor": "._model:NeuralNetworkRegressor",
- "OutputConversion": "._output_conversion:OutputConversion",
"OutputConversionImageToColumn": "._output_conversion_image:OutputConversionImageToColumn",
"OutputConversionImageToImage": "._output_conversion_image:OutputConversionImageToImage",
"OutputConversionImageToTable": "._output_conversion_image:OutputConversionImageToTable",
- "OutputConversionTable": "._output_conversion_table:OutputConversionTable",
},
)
@@ -56,11 +62,14 @@
"InputConversionTable",
"Layer",
"MaxPooling2DLayer",
+ "OutputConversion",
+ "InputConversionTimeSeries",
+ "LSTMLayer",
+ "OutputConversionTable",
+ "OutputConversionTimeSeries",
"NeuralNetworkClassifier",
"NeuralNetworkRegressor",
- "OutputConversion",
"OutputConversionImageToColumn",
"OutputConversionImageToImage",
"OutputConversionImageToTable",
- "OutputConversionTable",
]
diff --git a/src/safeds/ml/nn/_forward_layer.py b/src/safeds/ml/nn/_forward_layer.py
index baa91c17f..5c3268802 100644
--- a/src/safeds/ml/nn/_forward_layer.py
+++ b/src/safeds/ml/nn/_forward_layer.py
@@ -40,7 +40,7 @@ def forward(self, x: Tensor) -> Tensor:
class ForwardLayer(Layer):
def __init__(self, output_size: int, input_size: int | None = None):
"""
- Create a FNN Layer.
+ Create a Feed Forward Layer.
Parameters
----------
diff --git a/src/safeds/ml/nn/_input_conversion.py b/src/safeds/ml/nn/_input_conversion.py
index 0e2cf952e..b3e1e41a6 100644
--- a/src/safeds/ml/nn/_input_conversion.py
+++ b/src/safeds/ml/nn/_input_conversion.py
@@ -6,15 +6,15 @@
if TYPE_CHECKING:
from torch.utils.data import DataLoader
- from safeds.data.image.containers._single_size_image_list import _SingleSizeImageList
- from safeds.data.image.typing import ImageSize
+from safeds.data.image.containers._single_size_image_list import _SingleSizeImageList
+from safeds.data.image.typing import ImageSize
from safeds.data.image.containers import ImageList
-from safeds.data.labeled.containers import ImageDataset, TabularDataset
-from safeds.data.tabular.containers import Table, TimeSeries
+from safeds.data.labeled.containers import ImageDataset, TabularDataset, TimeSeriesDataset
+from safeds.data.tabular.containers import Table
-FT = TypeVar("FT", TabularDataset, TimeSeries, ImageDataset)
-PT = TypeVar("PT", Table, TimeSeries, ImageList)
+FT = TypeVar("FT", TabularDataset, TimeSeriesDataset, ImageDataset)
+PT = TypeVar("PT", Table, TimeSeriesDataset, ImageList)
class InputConversion(Generic[FT, PT], ABC):
diff --git a/src/safeds/ml/nn/_input_conversion_table.py b/src/safeds/ml/nn/_input_conversion_table.py
index 5ac205ed0..e5c009f56 100644
--- a/src/safeds/ml/nn/_input_conversion_table.py
+++ b/src/safeds/ml/nn/_input_conversion_table.py
@@ -13,19 +13,12 @@
class InputConversionTable(InputConversion[TabularDataset, Table]):
"""The input conversion for a neural network, defines the input parameters for the neural network."""
- def __init__(self, feature_names: list[str], target_name: str) -> None:
- """
- Define the input parameters for the neural network in the input conversion.
-
- Parameters
- ----------
- feature_names:
- The names of the features for the input table, used as features for the training.
- target_name:
- The name of the target for the input table, used as target for the training.
- """
- self._feature_names = feature_names
- self._target_name = target_name
+ def __init__(self) -> None:
+ """Define the input parameters for the neural network in the input conversion."""
+ self._target_name = ""
+ self._time_name = ""
+ self._feature_names: list[str] = []
+ self._first = True
@property
def _data_size(self) -> int:
@@ -41,6 +34,10 @@ def _data_conversion_predict(self, input_data: Table, batch_size: int) -> DataLo
return input_data._into_dataloader(batch_size)
def _is_fit_data_valid(self, input_data: TabularDataset) -> bool:
+ if self._first:
+ self._feature_names = input_data.features.column_names
+ self._target_name = input_data.target.name
+ self._first = False
return (sorted(input_data.features.column_names)).__eq__(sorted(self._feature_names))
def _is_predict_data_valid(self, input_data: Table) -> bool:
diff --git a/src/safeds/ml/nn/_input_conversion_time_series.py b/src/safeds/ml/nn/_input_conversion_time_series.py
new file mode 100644
index 000000000..18cf9fb23
--- /dev/null
+++ b/src/safeds/ml/nn/_input_conversion_time_series.py
@@ -0,0 +1,82 @@
+from __future__ import annotations
+
+from typing import TYPE_CHECKING, Any
+
+if TYPE_CHECKING:
+ from torch.utils.data import DataLoader
+
+from safeds.data.labeled.containers import TimeSeriesDataset
+from safeds.ml.nn._input_conversion import InputConversion
+
+
+class InputConversionTimeSeries(InputConversion[TimeSeriesDataset, TimeSeriesDataset]):
+ """The input conversion for a neural network, defines the input parameters for the neural network."""
+
+ def __init__(
+ self,
+ window_size: int,
+ forecast_horizon: int,
+ ) -> None:
+ """
+ Define the input parameters for the neural network in the input conversion.
+
+ Parameters
+ ----------
+ window_size:
+ The size of the created windows
+ forecast_horizon:
+ The forecast horizon defines the future lag of the predicted values
+ """
+ self._window_size = window_size
+ self._forecast_horizon = forecast_horizon
+ self._first = True
+ self._target_name: str = ""
+ self._time_name: str = ""
+ self._feature_names: list[str] = []
+
+ @property
+ def _data_size(self) -> int:
+ """
+ Gives the size for the input of an internal layer.
+
+ Returns
+ -------
+ size:
+ The size of the input for the neural network
+
+ """
+ return (len(self._feature_names) + 1) * self._window_size
+
+ def _data_conversion_fit(
+ self,
+ input_data: TimeSeriesDataset,
+ batch_size: int,
+ num_of_classes: int = 1,
+ ) -> DataLoader:
+ self._num_of_classes = num_of_classes
+ return input_data._into_dataloader_with_window(
+ self._window_size,
+ self._forecast_horizon,
+ batch_size,
+ )
+
+ def _data_conversion_predict(self, input_data: TimeSeriesDataset, batch_size: int) -> DataLoader:
+ return input_data._into_dataloader_with_window_predict(self._window_size, self._forecast_horizon, batch_size)
+
+ def _is_fit_data_valid(self, input_data: TimeSeriesDataset) -> bool:
+ if self._first:
+ self._time_name = input_data.time.name
+ self._feature_names = input_data.features.column_names
+ self._target_name = input_data.target.name
+ self._first = False
+ return (
+ (sorted(input_data.features.column_names)).__eq__(sorted(self._feature_names))
+ and input_data.target.name == self._target_name
+ and input_data.time.name == self._time_name
+ )
+
+ def _is_predict_data_valid(self, input_data: TimeSeriesDataset) -> bool:
+ return self._is_fit_data_valid(input_data)
+
+ def _get_output_configuration(self) -> dict[str, Any]:
+ return {"window_size": self._window_size, "forecast_horizon": self._forecast_horizon}
diff --git a/src/safeds/ml/nn/_lstm_layer.py b/src/safeds/ml/nn/_lstm_layer.py
new file mode 100644
index 000000000..4b7053892
--- /dev/null
+++ b/src/safeds/ml/nn/_lstm_layer.py
@@ -0,0 +1,147 @@
+from __future__ import annotations
+
+from typing import TYPE_CHECKING, Any
+
+from safeds.data.image.typing import ImageSize
+
+if TYPE_CHECKING:
+ from torch import Tensor, nn
+
+import sys
+
+from safeds._utils import _structural_hash
+from safeds.exceptions import ClosedBound, OutOfBoundsError
+from safeds.ml.nn import Layer
+
+
+def _create_internal_model(input_size: int, output_size: int, activation_function: str) -> nn.Module:
+ from torch import nn
+
+ class _InternalLayer(nn.Module):
+ def __init__(self, input_size: int, output_size: int, activation_function: str):
+ super().__init__()
+ self._layer = nn.LSTM(input_size, output_size)
+ match activation_function:
+ case "sigmoid":
+ self._fn = nn.Sigmoid()
+ case "relu":
+ self._fn = nn.ReLU()
+ case "softmax":
+ self._fn = nn.Softmax()
+ case "none":
+ self._fn = None
+ case _:
+ raise ValueError("Unknown Activation Function: " + activation_function)
+
+ def forward(self, x: Tensor) -> Tensor:
+ return self._fn(self._layer(x)[0]) if self._fn is not None else self._layer(x)[0]
+
+ return _InternalLayer(input_size, output_size, activation_function)
+
+
+class LSTMLayer(Layer):
+ def __init__(self, output_size: int, input_size: int | None = None):
+ """
+ Create a LSTM Layer.
+
+ Parameters
+ ----------
+ input_size:
+ The number of neurons in the previous layer
+ output_size:
+ The number of neurons in this layer
+
+ Raises
+ ------
+ ValueError
+ If input_size < 1
+ If output_size < 1
+ """
+ if input_size is not None:
+ self._set_input_size(input_size=input_size)
+ if output_size < 1:
+ raise OutOfBoundsError(actual=output_size, name="output_size", lower_bound=ClosedBound(1))
+ self._output_size = output_size
+
+ def _get_internal_layer(self, **kwargs: Any) -> nn.Module:
+ if "activation_function" not in kwargs:
+ raise ValueError(
+ "The activation_function is not set. The internal layer can only be created when the activation_function is provided in the kwargs.",
+ )
+ else:
+ activation_function: str = kwargs["activation_function"]
+ return _create_internal_model(self._input_size, self._output_size, activation_function)
+
+ @property
+ def input_size(self) -> int:
+ """
+ Get the input_size of this layer.
+
+ Returns
+ -------
+ result:
+ The amount of values being passed into this layer.
+ """
+ return self._input_size
+
+ @property
+ def output_size(self) -> int:
+ """
+ Get the output_size of this layer.
+
+ Returns
+ -------
+ result:
+ The Number of Neurons in this layer.
+ """
+ return self._output_size
+
+ def _set_input_size(self, input_size: int | ImageSize) -> None:
+ if isinstance(input_size, ImageSize):
+ raise TypeError("The input_size of a forward layer has to be of type int.")
+ if input_size < 1:
+ raise OutOfBoundsError(actual=input_size, name="input_size", lower_bound=ClosedBound(1))
+ self._input_size = input_size
+
+ def __hash__(self) -> int:
+ """
+ Return a deterministic hash value for this LSTM layer.
+
+ Returns
+ -------
+ hash:
+ the hash value
+ """
+ return _structural_hash(
+ self._input_size,
+ self._output_size,
+ ) # pragma: no cover
+
+ def __eq__(self, other: object) -> bool:
+ """
+ Compare two lstm layer.
+
+ Parameters
+ ----------
+ other:
+ The lstm layer to compare to.
+
+ Returns
+ -------
+ equals:
+ Whether the two lstm layer are the same.
+ """
+ if not isinstance(other, LSTMLayer):
+ return NotImplemented
+ return (self is other) or (self._input_size == other._input_size and self._output_size == other._output_size)
+
+ def __sizeof__(self) -> int:
+ """
+ Return the complete size of this object.
+
+ Returns
+ -------
+ size:
+ Size of this object in bytes.
+ """
+ return sys.getsizeof(self._input_size) + sys.getsizeof(self._output_size)
diff --git a/src/safeds/ml/nn/_model.py b/src/safeds/ml/nn/_model.py
index e763c09c8..569444b9c 100644
--- a/src/safeds/ml/nn/_model.py
+++ b/src/safeds/ml/nn/_model.py
@@ -4,8 +4,8 @@
from typing import TYPE_CHECKING, Generic, Self, TypeVar
from safeds.data.image.containers import ImageList
-from safeds.data.labeled.containers import ImageDataset, TabularDataset
-from safeds.data.tabular.containers import Table, TimeSeries
+from safeds.data.labeled.containers import TabularDataset, TimeSeriesDataset, ImageDataset
+from safeds.data.tabular.containers import Table
from safeds.exceptions import (
ClosedBound,
FeatureDataMismatchError,
@@ -31,12 +31,15 @@
from torch import Tensor, nn
+ from safeds.ml.nn._input_conversion import InputConversion
+ from safeds.ml.nn._layer import Layer
+ from safeds.ml.nn._output_conversion import OutputConversion
from safeds.data.image.typing import ImageSize
- from safeds.ml.nn import InputConversion, Layer, OutputConversion
-IFT = TypeVar("IFT", TabularDataset, TimeSeries, ImageDataset) # InputFitType
-IPT = TypeVar("IPT", Table, TimeSeries, ImageList) # InputPredictType
-OT = TypeVar("OT", TabularDataset, TimeSeries, ImageDataset) # OutputType
+
+IFT = TypeVar("IFT", TabularDataset, TimeSeriesDataset, ImageDataset) # InputFitType
+IPT = TypeVar("IPT", Table, TimeSeriesDataset, ImageList) # InputPredictType
+OT = TypeVar("OT", TabularDataset, TimeSeriesDataset, ImageDataset) # OutputType
class NeuralNetworkRegressor(Generic[IFT, IPT, OT]):
@@ -156,14 +159,14 @@ def fit(
import torch
from torch import nn
+ if not self._input_conversion._is_fit_data_valid(train_data):
+ raise FeatureDataMismatchError
if epoch_size < 1:
raise OutOfBoundsError(actual=epoch_size, name="epoch_size", lower_bound=ClosedBound(1))
if batch_size < 1:
raise OutOfBoundsError(actual=batch_size, name="batch_size", lower_bound=ClosedBound(1))
if self._input_conversion._data_size is not self._input_size:
raise InputSizeError(self._input_conversion._data_size, self._input_size)
- if not self._input_conversion._is_fit_data_valid(train_data):
- raise FeatureDataMismatchError
copied_model = copy.deepcopy(self)
@@ -368,14 +371,14 @@ def fit(
import torch
from torch import nn
+ if not self._input_conversion._is_fit_data_valid(train_data):
+ raise FeatureDataMismatchError
if epoch_size < 1:
raise OutOfBoundsError(actual=epoch_size, name="epoch_size", lower_bound=ClosedBound(1))
if batch_size < 1:
raise OutOfBoundsError(actual=batch_size, name="batch_size", lower_bound=ClosedBound(1))
if self._input_conversion._data_size is not self._input_size:
raise InputSizeError(self._input_conversion._data_size, self._input_size)
- if not self._input_conversion._is_fit_data_valid(train_data):
- raise FeatureDataMismatchError
copied_model = copy.deepcopy(self)
@@ -391,7 +394,6 @@ def fit(
loss_fn = nn.CrossEntropyLoss()
else:
loss_fn = nn.BCELoss()
-
optimizer = torch.optim.SGD(copied_model._model.parameters(), lr=learning_rate)
for _ in range(epoch_size):
loss_sum = 0.0
@@ -399,7 +401,6 @@ def fit(
for x, y in iter(dataloader):
optimizer.zero_grad()
pred = copied_model._model(x)
-
loss = loss_fn(pred, y)
loss_sum += loss.item()
amount_of_loss_values_calculated += 1
diff --git a/src/safeds/ml/nn/_output_conversion.py b/src/safeds/ml/nn/_output_conversion.py
index 301413823..f29867e31 100644
--- a/src/safeds/ml/nn/_output_conversion.py
+++ b/src/safeds/ml/nn/_output_conversion.py
@@ -9,10 +9,11 @@
if TYPE_CHECKING:
from torch import Tensor
-from safeds.data.tabular.containers import Table, TimeSeries
+from safeds.data.labeled.containers import TimeSeriesDataset
+from safeds.data.tabular.containers import Table
-IT = TypeVar("IT", Table, TimeSeries, ImageList)
-OT = TypeVar("OT", TabularDataset, TimeSeries, ImageDataset)
+IT = TypeVar("IT", Table, TimeSeriesDataset, ImageList)
+OT = TypeVar("OT", TabularDataset, TimeSeriesDataset, ImageDataset)
class OutputConversion(Generic[IT, OT], ABC):
diff --git a/src/safeds/ml/nn/_output_conversion_table.py b/src/safeds/ml/nn/_output_conversion_table.py
index a77b9862f..4146aaef1 100644
--- a/src/safeds/ml/nn/_output_conversion_table.py
+++ b/src/safeds/ml/nn/_output_conversion_table.py
@@ -25,6 +25,6 @@ def __init__(self, prediction_name: str = "prediction") -> None:
self._prediction_name = prediction_name
def _data_conversion(self, input_data: Table, output_data: Tensor, **kwargs: Any) -> TabularDataset: # noqa: ARG002
- return input_data.add_column(Column(self._prediction_name, output_data.tolist())).to_tabular_dataset(
+ return input_data.add_columns([Column(self._prediction_name, output_data.tolist())]).to_tabular_dataset(
self._prediction_name,
)
diff --git a/src/safeds/ml/nn/_output_conversion_time_series.py b/src/safeds/ml/nn/_output_conversion_time_series.py
new file mode 100644
index 000000000..f3ad6d43f
--- /dev/null
+++ b/src/safeds/ml/nn/_output_conversion_time_series.py
@@ -0,0 +1,89 @@
+from __future__ import annotations
+
+import sys
+from typing import TYPE_CHECKING, Any
+
+from safeds._utils import _structural_hash
+
+if TYPE_CHECKING:
+ from torch import Tensor
+from safeds.data.labeled.containers import TimeSeriesDataset
+from safeds.data.tabular.containers import Column, Table
+from safeds.ml.nn._output_conversion import OutputConversion
+
+
+class OutputConversionTimeSeries(OutputConversion[TimeSeriesDataset, TimeSeriesDataset]):
+ """The output conversion for a neural network, defines the output parameters for the neural network."""
+
+ def __hash__(self) -> int:
+ """
+ Return a deterministic hash value for this OutputConversionTimeSeries instance.
+
+ Returns
+ -------
+ hash:
+ the hash value
+ """
+ return _structural_hash(self.__class__.__name__ + self._prediction_name)
+
+ def __eq__(self, other: object) -> bool:
+ """
+ Compare two OutputConversionTimeSeries instances.
+
+ Parameters
+ ----------
+ other:
+ The OutputConversionTimeSeries instance to compare to.
+
+ Returns
+ -------
+ equals:
+ Whether the instances are the same.
+ """
+ if not isinstance(other, OutputConversionTimeSeries):
+ return False
+ return self._prediction_name == other._prediction_name
+
+ def __sizeof__(self) -> int:
+ """
+ Return the complete size of this object.
+
+ Returns
+ -------
+ size:
+ Size of this object in bytes.
+ """
+ return sys.getsizeof(self._prediction_name)
+
+ def __init__(self, prediction_name: str = "prediction_nn") -> None:
+ """
+ Define the output parameters for the neural network in the output conversion.
+
+ Parameters
+ ----------
+ prediction_name:
+ The name of the new column where the prediction will be stored.
+ """
+ self._prediction_name = prediction_name
+
+ def _data_conversion(self, input_data: TimeSeriesDataset, output_data: Tensor, **kwargs: Any) -> TimeSeriesDataset:
+ if "window_size" not in kwargs or not isinstance(kwargs.get("window_size"), int):
+ raise ValueError(
+ "The window_size is not set. The data can only be converted if the window_size is provided as `int` in the kwargs.",
+ )
+ if "forecast_horizon" not in kwargs or not isinstance(kwargs.get("forecast_horizon"), int):
+ raise ValueError(
+ "The forecast_horizon is not set. The data can only be converted if the forecast_horizon is provided as `int` in the kwargs.",
+ )
+ window_size: int = kwargs["window_size"]
+ forecast_horizon: int = kwargs["forecast_horizon"]
+ input_data_table = input_data.to_table()
+ input_data_table = Table.from_rows(input_data_table.to_rows()[window_size + forecast_horizon :])
+
+ return input_data_table.add_columns(
+ [Column(self._prediction_name, output_data.tolist())]
+ ).to_time_series_dataset(
+ target_name=self._prediction_name,
+ time_name=input_data.time.name,
+ extra_names=input_data.extras.column_names,
+ )
diff --git a/tests/helpers/__init__.py b/tests/helpers/__init__.py
index 8e0d85676..8dbe9ac54 100644
--- a/tests/helpers/__init__.py
+++ b/tests/helpers/__init__.py
@@ -1,7 +1,6 @@
from ._assertions import (
assert_that_tables_are_close,
assert_that_tabular_datasets_are_equal,
- assert_that_time_series_are_equal,
)
from ._devices import (
device_cpu,
@@ -39,7 +38,6 @@
__all__ = [
"assert_that_tables_are_close",
"assert_that_tabular_datasets_are_equal",
- "assert_that_time_series_are_equal",
"device_cpu",
"device_cuda",
"grayscale_jpg_id",
diff --git a/tests/helpers/_assertions.py b/tests/helpers/_assertions.py
index 7bab1bdd5..e595fd9e2 100644
--- a/tests/helpers/_assertions.py
+++ b/tests/helpers/_assertions.py
@@ -1,6 +1,6 @@
import pytest
from safeds.data.labeled.containers import TabularDataset
-from safeds.data.tabular.containers import Table, TimeSeries
+from safeds.data.tabular.containers import Table
def assert_that_tables_are_close(table1: Table, table2: Table) -> None:
@@ -40,22 +40,3 @@ def assert_that_tabular_datasets_are_equal(table1: TabularDataset, table2: Tabul
assert table1.features == table2.features
assert table1.target == table2.target
assert table1 == table2
-
-
-def assert_that_time_series_are_equal(table1: TimeSeries, table2: TimeSeries) -> None:
- """
- Assert that two time series are equal.
-
- Parameters
- ----------
- table1: TimeSeries
- The first timeseries.
- table2: TimeSeries
- The timeseries to compare the first timeseries to.
- """
- assert table1.schema == table2.schema
- assert table1._feature_names == table2._feature_names
- assert table1.features == table2.features
- assert table1.target == table2.target
- assert table1.time == table2.time
- assert table1 == table2
diff --git a/tests/safeds/data/tabular/containers/_time_series/__init__.py b/tests/safeds/data/labeled/containers/_time_series_dataset/__init__.py
similarity index 100%
rename from tests/safeds/data/tabular/containers/_time_series/__init__.py
rename to tests/safeds/data/labeled/containers/_time_series_dataset/__init__.py
diff --git a/tests/safeds/data/tabular/containers/_time_series/__snapshots__/test_plot_compare_time_series/test_legit_compare.png b/tests/safeds/data/labeled/containers/_time_series_dataset/__snapshots__/test_plot_compare_time_series/test_legit_compare.png
similarity index 100%
rename from tests/safeds/data/tabular/containers/_time_series/__snapshots__/test_plot_compare_time_series/test_legit_compare.png
rename to tests/safeds/data/labeled/containers/_time_series_dataset/__snapshots__/test_plot_compare_time_series/test_legit_compare.png
diff --git a/tests/safeds/data/tabular/containers/_time_series/__snapshots__/test_plot_lag/test_should_return_table.png b/tests/safeds/data/labeled/containers/_time_series_dataset/__snapshots__/test_plot_lag/test_should_return_table.png
similarity index 100%
rename from tests/safeds/data/tabular/containers/_time_series/__snapshots__/test_plot_lag/test_should_return_table.png
rename to tests/safeds/data/labeled/containers/_time_series_dataset/__snapshots__/test_plot_lag/test_should_return_table.png
diff --git a/tests/safeds/data/tabular/containers/_time_series/__snapshots__/test_plot_lineplot/test_should_plot_feature.png b/tests/safeds/data/labeled/containers/_time_series_dataset/__snapshots__/test_plot_lineplot/test_should_plot_feature.png
similarity index 100%
rename from tests/safeds/data/tabular/containers/_time_series/__snapshots__/test_plot_lineplot/test_should_plot_feature.png
rename to tests/safeds/data/labeled/containers/_time_series_dataset/__snapshots__/test_plot_lineplot/test_should_plot_feature.png
diff --git a/tests/safeds/data/tabular/containers/_time_series/__snapshots__/test_plot_lineplot/test_should_plot_feature_x.png b/tests/safeds/data/labeled/containers/_time_series_dataset/__snapshots__/test_plot_lineplot/test_should_plot_feature_x.png
similarity index 100%
rename from tests/safeds/data/tabular/containers/_time_series/__snapshots__/test_plot_lineplot/test_should_plot_feature_x.png
rename to tests/safeds/data/labeled/containers/_time_series_dataset/__snapshots__/test_plot_lineplot/test_should_plot_feature_x.png
diff --git a/tests/safeds/data/tabular/containers/_time_series/__snapshots__/test_plot_lineplot/test_should_plot_feature_y.png b/tests/safeds/data/labeled/containers/_time_series_dataset/__snapshots__/test_plot_lineplot/test_should_plot_feature_y.png
similarity index 100%
rename from tests/safeds/data/tabular/containers/_time_series/__snapshots__/test_plot_lineplot/test_should_plot_feature_y.png
rename to tests/safeds/data/labeled/containers/_time_series_dataset/__snapshots__/test_plot_lineplot/test_should_plot_feature_y.png
diff --git a/tests/safeds/data/labeled/containers/_time_series_dataset/__snapshots__/test_plot_lineplot/test_should_return_table.png b/tests/safeds/data/labeled/containers/_time_series_dataset/__snapshots__/test_plot_lineplot/test_should_return_table.png
new file mode 100644
index 000000000..6b5c1ae22
Binary files /dev/null and b/tests/safeds/data/labeled/containers/_time_series_dataset/__snapshots__/test_plot_lineplot/test_should_return_table.png differ
diff --git a/tests/safeds/data/tabular/containers/_time_series/__snapshots__/test_plot_lineplot/test_should_return_table_both.png b/tests/safeds/data/labeled/containers/_time_series_dataset/__snapshots__/test_plot_lineplot/test_should_return_table_both.png
similarity index 100%
rename from tests/safeds/data/tabular/containers/_time_series/__snapshots__/test_plot_lineplot/test_should_return_table_both.png
rename to tests/safeds/data/labeled/containers/_time_series_dataset/__snapshots__/test_plot_lineplot/test_should_return_table_both.png
diff --git a/tests/safeds/data/tabular/containers/_time_series/__snapshots__/test_plot_scatterplot/test_should_plot_feature.png b/tests/safeds/data/labeled/containers/_time_series_dataset/__snapshots__/test_plot_scatterplot/test_should_plot_feature.png
similarity index 100%
rename from tests/safeds/data/tabular/containers/_time_series/__snapshots__/test_plot_scatterplot/test_should_plot_feature.png
rename to tests/safeds/data/labeled/containers/_time_series_dataset/__snapshots__/test_plot_scatterplot/test_should_plot_feature.png
diff --git a/tests/safeds/data/tabular/containers/_time_series/__snapshots__/test_plot_scatterplot/test_should_plot_feature_both_set.png b/tests/safeds/data/labeled/containers/_time_series_dataset/__snapshots__/test_plot_scatterplot/test_should_plot_feature_both_set.png
similarity index 100%
rename from tests/safeds/data/tabular/containers/_time_series/__snapshots__/test_plot_scatterplot/test_should_plot_feature_both_set.png
rename to tests/safeds/data/labeled/containers/_time_series_dataset/__snapshots__/test_plot_scatterplot/test_should_plot_feature_both_set.png
diff --git a/tests/safeds/data/tabular/containers/_time_series/__snapshots__/test_plot_scatterplot/test_should_plot_feature_only_x.png b/tests/safeds/data/labeled/containers/_time_series_dataset/__snapshots__/test_plot_scatterplot/test_should_plot_feature_only_x.png
similarity index 100%
rename from tests/safeds/data/tabular/containers/_time_series/__snapshots__/test_plot_scatterplot/test_should_plot_feature_only_x.png
rename to tests/safeds/data/labeled/containers/_time_series_dataset/__snapshots__/test_plot_scatterplot/test_should_plot_feature_only_x.png
diff --git a/tests/safeds/data/tabular/containers/_time_series/__snapshots__/test_plot_scatterplot/test_should_plot_feature_only_y_optional.png b/tests/safeds/data/labeled/containers/_time_series_dataset/__snapshots__/test_plot_scatterplot/test_should_plot_feature_only_y_optional.png
similarity index 100%
rename from tests/safeds/data/tabular/containers/_time_series/__snapshots__/test_plot_scatterplot/test_should_plot_feature_only_y_optional.png
rename to tests/safeds/data/labeled/containers/_time_series_dataset/__snapshots__/test_plot_scatterplot/test_should_plot_feature_only_y_optional.png
diff --git a/tests/safeds/data/tabular/containers/_time_series/__snapshots__/test_plot_scatterplot/test_should_return_table.png b/tests/safeds/data/labeled/containers/_time_series_dataset/__snapshots__/test_plot_scatterplot/test_should_return_table.png
similarity index 100%
rename from tests/safeds/data/tabular/containers/_time_series/__snapshots__/test_plot_scatterplot/test_should_return_table.png
rename to tests/safeds/data/labeled/containers/_time_series_dataset/__snapshots__/test_plot_scatterplot/test_should_return_table.png
diff --git a/tests/safeds/data/labeled/containers/_time_series_dataset/test_eq.py b/tests/safeds/data/labeled/containers/_time_series_dataset/test_eq.py
new file mode 100644
index 000000000..7743da63a
--- /dev/null
+++ b/tests/safeds/data/labeled/containers/_time_series_dataset/test_eq.py
@@ -0,0 +1,79 @@
+from typing import Any
+
+import pytest
+from safeds.data.labeled.containers import TimeSeriesDataset
+from safeds.data.tabular.containers import Row, Table
+
+
+@pytest.mark.parametrize(
+ ("table1", "table2", "expected"),
+ [
+ (
+ TimeSeriesDataset({"a": [], "b": [], "c": []}, "b", "c"),
+ TimeSeriesDataset({"a": [], "b": [], "c": []}, "b", "c"),
+ True,
+ ),
+ (
+ TimeSeriesDataset({"a": [1, 2, 3], "b": [4, 5, 6], "c": [4, 5, 6]}, "b", "c"),
+ TimeSeriesDataset({"a": [1, 2, 3], "b": [4, 5, 6], "c": [4, 5, 6]}, "b", "c"),
+ True,
+ ),
+ (
+ TimeSeriesDataset({"a": [1, 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]}, "b", "a", ["c"]),
+ TimeSeriesDataset({"a": [1, 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]}, "c", "a", ["b"]),
+ False,
+ ),
+ (
+ TimeSeriesDataset({"a": [1, 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]}, "b", "a", ["c"]),
+ TimeSeriesDataset({"a": [1, 2, 3], "b": [4, 5, 6], "d": [7, 8, 9]}, "b", "a", ["d"]),
+ False,
+ ),
+ (
+ TimeSeriesDataset({"a": [1, 2, 3], "b": [4, 5, 6]}, "b", "a"),
+ TimeSeriesDataset({"a": [1, 1, 3], "b": [4, 5, 6]}, "b", "a"),
+ False,
+ ),
+ (
+ TimeSeriesDataset({"a": [1, 2, 3], "b": [4, 5, 6]}, "b", "a"),
+ TimeSeriesDataset({"a": ["1", "2", "3"], "b": [4, 5, 6]}, "b", "a"),
+ False,
+ ),
+ (
+ TimeSeriesDataset({"a": [1, 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]}, "b", "a", ["c"]),
+ TimeSeriesDataset({"a": [1, 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]}, "b", "c", ["a"]),
+ False,
+ ),
+ ],
+ ids=[
+ "rowless table",
+ "equal tables",
+ "different target",
+ "different column names",
+ "different values",
+ "different types",
+ "different features",
+ ],
+)
+def test_should_return_whether_two_tabular_datasets_are_equal(
+ table1: TimeSeriesDataset,
+ table2: TimeSeriesDataset,
+ expected: bool,
+) -> None:
+ assert (table1.__eq__(table2)) == expected
+
+
+@pytest.mark.parametrize(
+ ("table", "other"),
+ [
+ (TimeSeriesDataset({"a": [1, 2, 3], "b": [4, 5, 6], "c": [0, 0, 0]}, "b", "c"), None),
+ (TimeSeriesDataset({"a": [1, 2, 3], "b": [4, 5, 6], "c": [0, 0, 0]}, "b", "c"), Row()),
+ (TimeSeriesDataset({"a": [1, 2, 3], "b": [4, 5, 6], "c": [0, 0, 0]}, "b", "c"), Table()),
+ ],
+ ids=[
+ "TabularDataset vs. None",
+ "TabularDataset vs. Row",
+ "TabularDataset vs. Table",
+ ],
+)
+def test_should_return_not_implemented_if_other_is_not_tabular_dataset(table: TimeSeriesDataset, other: Any) -> None:
+ assert (table.__eq__(other)) is NotImplemented
diff --git a/tests/safeds/data/labeled/containers/_time_series_dataset/test_extras.py b/tests/safeds/data/labeled/containers/_time_series_dataset/test_extras.py
new file mode 100644
index 000000000..bd93075d6
--- /dev/null
+++ b/tests/safeds/data/labeled/containers/_time_series_dataset/test_extras.py
@@ -0,0 +1,43 @@
+import pytest
+from safeds.data.labeled.containers import TimeSeriesDataset
+from safeds.data.tabular.containers import Table
+
+
+@pytest.mark.parametrize(
+ ("tabular_dataset", "extras"),
+ [
+ (
+ TimeSeriesDataset(
+ {
+ "A": [1, 4],
+ "B": [2, 5],
+ "C": [3, 6],
+ "T": [0, 1],
+ },
+ target_name="T",
+ time_name="C",
+ ),
+ Table(),
+ ),
+ (
+ TimeSeriesDataset(
+ {
+ "A": [1, 4],
+ "B": [2, 5],
+ "C": [3, 6],
+ "T": [0, 1],
+ },
+ target_name="T",
+ time_name="B",
+ extra_names=["A", "C"],
+ ),
+ Table({"A": [1, 4], "C": [3, 6]}),
+ ),
+ ],
+ ids=[
+ "only_target_and_features",
+ "target_features_and_extras",
+ ],
+)
+def test_should_return_features(tabular_dataset: TimeSeriesDataset, extras: Table) -> None:
+ assert tabular_dataset.extras == extras
diff --git a/tests/safeds/data/tabular/containers/_time_series/test_features.py b/tests/safeds/data/labeled/containers/_time_series_dataset/test_features.py
similarity index 56%
rename from tests/safeds/data/tabular/containers/_time_series/test_features.py
rename to tests/safeds/data/labeled/containers/_time_series_dataset/test_features.py
index 5b75cb317..dcc55c06c 100644
--- a/tests/safeds/data/tabular/containers/_time_series/test_features.py
+++ b/tests/safeds/data/labeled/containers/_time_series_dataset/test_features.py
@@ -1,42 +1,41 @@
import pytest
-from safeds.data.tabular.containers import Table, TimeSeries
+from safeds.data.labeled.containers import TimeSeriesDataset
+from safeds.data.tabular.containers import Table
@pytest.mark.parametrize(
- ("time_series", "features"),
+ ("tabular_dataset", "features"),
[
(
- TimeSeries(
+ TimeSeriesDataset(
{
- "time": [0, 1],
"A": [1, 4],
"B": [2, 5],
"C": [3, 6],
"T": [0, 1],
},
target_name="T",
- time_name="time",
- feature_names=["A", "B", "C"],
+ time_name="C",
),
- Table({"A": [1, 4], "B": [2, 5], "C": [3, 6]}),
+ Table({"A": [1, 4], "B": [2, 5]}),
),
(
- TimeSeries(
+ TimeSeriesDataset(
{
- "time": [0, 1],
"A": [1, 4],
"B": [2, 5],
"C": [3, 6],
"T": [0, 1],
+ "time": [0, 0],
},
target_name="T",
time_name="time",
- feature_names=["A", "C"],
+ extra_names=["B"],
),
Table({"A": [1, 4], "C": [3, 6]}),
),
],
ids=["only_target_and_features", "target_features_and_other"],
)
-def test_should_return_features(time_series: TimeSeries, features: Table) -> None:
- assert time_series.features == features
+def test_should_return_features(tabular_dataset: TimeSeriesDataset, features: Table) -> None:
+ assert tabular_dataset.features == features
diff --git a/tests/safeds/data/labeled/containers/_time_series_dataset/test_hash.py b/tests/safeds/data/labeled/containers/_time_series_dataset/test_hash.py
new file mode 100644
index 000000000..5df6d0170
--- /dev/null
+++ b/tests/safeds/data/labeled/containers/_time_series_dataset/test_hash.py
@@ -0,0 +1,65 @@
+import pytest
+from safeds.data.labeled.containers import TimeSeriesDataset
+
+
+@pytest.mark.parametrize(
+ ("table1", "table2"),
+ [
+ (
+ TimeSeriesDataset({"a": [], "b": []}, "b", "a"),
+ TimeSeriesDataset({"a": [], "b": []}, "b", "a"),
+ ),
+ (
+ TimeSeriesDataset({"a": [1, 2, 3], "b": [4, 5, 6]}, "b", "a"),
+ TimeSeriesDataset({"a": [1, 2, 3], "b": [4, 5, 6]}, "b", "a"),
+ ),
+ (
+ TimeSeriesDataset({"a": [1, 2, 3], "b": [4, 5, 6]}, "b", "a"),
+ TimeSeriesDataset({"a": [1, 1, 3], "b": [4, 5, 6]}, "b", "a"),
+ ),
+ ],
+ ids=[
+ "rowless table",
+ "equal tables",
+ "different values",
+ ],
+)
+def test_should_return_same_hash_for_equal_tabular_datasets(
+ table1: TimeSeriesDataset,
+ table2: TimeSeriesDataset,
+) -> None:
+ assert hash(table1) == hash(table2)
+
+
+@pytest.mark.parametrize(
+ ("table1", "table2"),
+ [
+ (
+ TimeSeriesDataset({"a": [1, 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]}, "b", "a", ["c"]),
+ TimeSeriesDataset({"a": [1, 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]}, "c", "a", ["b"]),
+ ),
+ (
+ TimeSeriesDataset({"a": [1, 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]}, "b", "a", ["c"]),
+ TimeSeriesDataset({"a": [1, 2, 3], "b": [4, 5, 6], "d": [7, 8, 9]}, "b", "a", ["d"]),
+ ),
+ (
+ TimeSeriesDataset({"a": [1, 2, 3], "b": [4, 5, 6]}, "b", "a"),
+ TimeSeriesDataset({"a": ["1", "2", "3"], "b": [4, 5, 6]}, "b", "a"),
+ ),
+ (
+ TimeSeriesDataset({"a": [1, 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]}, "b", "a", ["c"]),
+ TimeSeriesDataset({"a": [1, 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]}, "b", "c", ["a"]),
+ ),
+ ],
+ ids=[
+ "different target",
+ "different column names",
+ "different types",
+ "different features",
+ ],
+)
+def test_should_return_different_hash_for_unequal_tabular_datasets(
+ table1: TimeSeriesDataset,
+ table2: TimeSeriesDataset,
+) -> None:
+ assert hash(table1) != hash(table2)
diff --git a/tests/safeds/data/labeled/containers/_time_series_dataset/test_init.py b/tests/safeds/data/labeled/containers/_time_series_dataset/test_init.py
new file mode 100644
index 000000000..99719be02
--- /dev/null
+++ b/tests/safeds/data/labeled/containers/_time_series_dataset/test_init.py
@@ -0,0 +1,245 @@
+import pytest
+from safeds.data.labeled.containers import TimeSeriesDataset
+from safeds.data.tabular.containers import Table
+from safeds.exceptions import UnknownColumnNameError
+
+
+@pytest.mark.parametrize(
+ ("data", "target_name", "time_name", "extra_names", "error", "error_msg"),
+ [
+ (
+ {
+ "A": [1, 4],
+ "B": [2, 5],
+ "C": [3, 6],
+ "T": [0, 1],
+ "time": [9, 9],
+ },
+ "T",
+ "time",
+ ["D", "E"],
+ UnknownColumnNameError,
+ r"Could not find column\(s\) 'D, E'",
+ ),
+ (
+ {
+ "A": [1, 4],
+ "B": [2, 5],
+ "C": [3, 6],
+ "T": [0, 1],
+ "time": [9, 9],
+ },
+ "D",
+ "time",
+ [],
+ UnknownColumnNameError,
+ r"Could not find column\(s\) 'D'",
+ ),
+ (
+ {
+ "A": [1, 4],
+ "B": [2, 5],
+ "C": [3, 6],
+ "T": [0, 1],
+ "time": [9, 9],
+ },
+ "A",
+ "time",
+ ["A"],
+ ValueError,
+ r"Column 'A' cannot be both target and extra.",
+ ),
+ (
+ {
+ "A": [1, 4],
+ "B": [2, 5],
+ "C": [3, 6],
+ "T": [0, 1],
+ "time": [9, 9],
+ },
+ "T",
+ "time",
+ ["A", "time", "C"],
+ ValueError,
+ r"Column 'time' cannot be both time and extra.",
+ ),
+ (
+ Table(
+ {
+ "A": [1, 4],
+ "B": [2, 5],
+ "C": [3, 6],
+ "T": [0, 1],
+ "time": [9, 9],
+ },
+ ),
+ "T",
+ "time",
+ ["D", "E"],
+ UnknownColumnNameError,
+ r"Could not find column\(s\) 'D, E'",
+ ),
+ (
+ Table(
+ {
+ "A": [1, 4],
+ "B": [2, 5],
+ "C": [3, 6],
+ "T": [0, 1],
+ "time": [9, 9],
+ },
+ ),
+ "D",
+ "time",
+ [],
+ UnknownColumnNameError,
+ r"Could not find column\(s\) 'D'",
+ ),
+ (
+ Table(
+ {
+ "A": [1, 4],
+ "B": [2, 5],
+ "C": [3, 6],
+ "T": [0, 1],
+ "time": [9, 9],
+ },
+ ),
+ "A",
+ "time",
+ ["A"],
+ ValueError,
+ r"Column 'A' cannot be both target and extra.",
+ ),
+ ],
+ ids=[
+ "dict_extra_does_not_exist",
+ "dict_target_does_not_exist",
+ "dict_target_and_extra_overlap",
+ "dict_features_are_empty_explicitly",
+ "table_extra_does_not_exist",
+ "table_target_does_not_exist",
+ "table_target_and_extra_overlap",
+ ],
+)
+def test_should_raise_error(
+ data: dict[str, list[int]],
+ target_name: str,
+ time_name: str,
+ extra_names: list[str] | None,
+ error: type[Exception],
+ error_msg: str,
+) -> None:
+ with pytest.raises(error, match=error_msg):
+ TimeSeriesDataset(data, target_name=target_name, time_name=time_name, extra_names=extra_names)
+
+
+@pytest.mark.parametrize(
+ ("data", "target_name", "time_name", "extra_names"),
+ [
+ (
+ {
+ "A": [1, 4],
+ "B": [2, 5],
+ "C": [3, 6],
+ "T": [0, 1],
+ "time": [9, 9],
+ },
+ "T",
+ "time",
+ [],
+ ),
+ (
+ {
+ "A": [1, 4],
+ "B": [2, 5],
+ "C": [3, 6],
+ "T": [0, 1],
+ "time": [9, 9],
+ },
+ "T",
+ "time",
+ ["A", "C"],
+ ),
+ (
+ {
+ "A": [1, 4],
+ "B": [2, 5],
+ "C": [3, 6],
+ "T": [0, 1],
+ "time": [9, 9],
+ },
+ "T",
+ "time",
+ None,
+ ),
+ (
+ Table(
+ {
+ "A": [1, 4],
+ "B": [2, 5],
+ "C": [3, 6],
+ "T": [0, 1],
+ "time": [9, 9],
+ },
+ ),
+ "T",
+ "time",
+ [],
+ ),
+ (
+ Table(
+ {
+ "A": [1, 4],
+ "B": [2, 5],
+ "C": [3, 6],
+ "T": [0, 1],
+ "time": [9, 9],
+ },
+ ),
+ "T",
+ "time",
+ ["A", "C"],
+ ),
+ (
+ Table(
+ {
+ "A": [1, 4],
+ "B": [2, 5],
+ "C": [3, 6],
+ "T": [0, 1],
+ "time": [9, 9],
+ },
+ ),
+ "T",
+ "time",
+ None,
+ ),
+ ],
+ ids=[
+ "dict_create_tabular_dataset",
+ "dict_tabular_dataset_not_all_columns_are_features",
+ "dict_tabular_dataset_with_extra_names_as_None",
+ "table_create_tabular_dataset",
+ "table_tabular_dataset_not_all_columns_are_features",
+ "table_tabular_dataset_with_extra_names_as_None",
+ ],
+)
+def test_should_create_a_tabular_dataset(
+ data: Table | dict[str, list[int]],
+ target_name: str,
+ time_name: str,
+ extra_names: list[str] | None,
+) -> None:
+ tabular_dataset = TimeSeriesDataset(data, target_name=target_name, time_name=time_name, extra_names=extra_names)
+ if not isinstance(data, Table):
+ data = Table(data)
+
+ if extra_names is None:
+ extra_names = []
+
+ assert isinstance(tabular_dataset, TimeSeriesDataset)
+ assert tabular_dataset._extras.column_names == extra_names
+ assert tabular_dataset._target.name == target_name
+ assert tabular_dataset._extras == data.keep_only_columns(extra_names)
+ assert tabular_dataset._target == data.get_column(target_name)
diff --git a/tests/safeds/data/labeled/containers/_time_series_dataset/test_into_dataloader.py b/tests/safeds/data/labeled/containers/_time_series_dataset/test_into_dataloader.py
new file mode 100644
index 000000000..8a9cbb393
--- /dev/null
+++ b/tests/safeds/data/labeled/containers/_time_series_dataset/test_into_dataloader.py
@@ -0,0 +1,97 @@
+import pytest
+from safeds.data.tabular.containers import Table
+from safeds.data.labeled.containers import TimeSeriesDataset
+from torch.utils.data import DataLoader
+
+
+@pytest.mark.parametrize(
+ ("data", "target_name", "time_name", "extra_names"),
+ [
+ (
+ {
+ "A": [1, 4, 3],
+ "B": [2, 5, 4],
+ "C": [3, 6, 5],
+ "T": [0, 1, 6],
+ },
+ "T",
+ "B",
+ [],
+ ),
+ ],
+ ids=[
+ "test",
+ ],
+)
+def test_should_create_dataloader(
+ data: dict[str, list[int]],
+ target_name: str,
+ time_name: str,
+ extra_names: list[str] | None,
+) -> None:
+ tabular_dataset = Table.from_dict(data).to_time_series_dataset(target_name, time_name, extra_names)
+ data_loader = tabular_dataset._into_dataloader_with_window(1, 1, 1)
+ assert isinstance(data_loader, DataLoader)
+
+
+@pytest.mark.parametrize(
+ ("data", "window_size", "forecast_horizon", "error_type", "error_msg"),
+ [
+ (
+ Table(
+ {
+ "A": [1, 4],
+ "B": [2, 5],
+ "C": [3, 6],
+ "T": [0, 1],
+ }
+ ).to_time_series_dataset("T", "B"),
+ 1,
+ 2,
+ ValueError,
+ r"Can not create windows with window size less then forecast horizon \+ window_size",
+ ),
+ (
+ Table(
+ {
+ "A": [1, 4],
+ "B": [2, 5],
+ "C": [3, 6],
+ "T": [0, 1],
+ }
+ ).to_time_series_dataset("T", "B"),
+ 1,
+ 0,
+ ValueError,
+ r"forecast_horizon must be greater than or equal to 1",
+ ),
+ (
+ Table(
+ {
+ "A": [1, 4],
+ "B": [2, 5],
+ "C": [3, 6],
+ "T": [0, 1],
+ }
+ ).to_time_series_dataset("T", "B"),
+ 0,
+ 1,
+ ValueError,
+ r"window_size must be greater than or equal to 1",
+ ),
+ ],
+ ids=[
+ "forecast_and_window",
+ "forecast",
+ "window_size",
+ ],
+)
+def test_should_create_dataloader_invalid(
+ data: TimeSeriesDataset,
+ window_size: int,
+ forecast_horizon: int,
+ error_type: ValueError,
+ error_msg: str,
+) -> None:
+ with pytest.raises(error_type, match=error_msg):
+ data._into_dataloader_with_window(window_size=window_size, forecast_horizon=forecast_horizon, batch_size=1)
diff --git a/tests/safeds/data/labeled/containers/_time_series_dataset/test_repr_html.py b/tests/safeds/data/labeled/containers/_time_series_dataset/test_repr_html.py
new file mode 100644
index 000000000..e1c40de42
--- /dev/null
+++ b/tests/safeds/data/labeled/containers/_time_series_dataset/test_repr_html.py
@@ -0,0 +1,47 @@
+import re
+
+import pytest
+from safeds.data.labeled.containers import TimeSeriesDataset
+
+
+@pytest.mark.parametrize(
+ "tabular_dataset",
+ [
+ TimeSeriesDataset({"a": [1, 2], "b": [3, 4]}, target_name="b", time_name="a"),
+ ],
+ ids=[
+ "non-empty",
+ ],
+)
+def test_should_contain_tabular_dataset_element(tabular_dataset: TimeSeriesDataset) -> None:
+ pattern = r"
.*?"
+ assert re.search(pattern, tabular_dataset._repr_html_(), flags=re.S) is not None
+
+
+@pytest.mark.parametrize(
+ "tabular_dataset",
+ [
+ TimeSeriesDataset({"a": [1, 2], "b": [3, 4]}, target_name="b", time_name="a"),
+ ],
+ ids=[
+ "non-empty",
+ ],
+)
+def test_should_contain_th_element_for_each_column_name(tabular_dataset: TimeSeriesDataset) -> None:
+ for column_name in tabular_dataset._table.column_names:
+ assert f"{column_name} | " in tabular_dataset._repr_html_()
+
+
+@pytest.mark.parametrize(
+ "tabular_dataset",
+ [
+ TimeSeriesDataset({"a": [1, 2], "b": [3, 4]}, target_name="b", time_name="a"),
+ ],
+ ids=[
+ "non-empty",
+ ],
+)
+def test_should_contain_td_element_for_each_value(tabular_dataset: TimeSeriesDataset) -> None:
+ for column in tabular_dataset._table.to_columns():
+ for value in column:
+ assert f"{value} | " in tabular_dataset._repr_html_()
diff --git a/tests/safeds/data/tabular/containers/_time_series/test_sizeof.py b/tests/safeds/data/labeled/containers/_time_series_dataset/test_sizeof.py
similarity index 51%
rename from tests/safeds/data/tabular/containers/_time_series/test_sizeof.py
rename to tests/safeds/data/labeled/containers/_time_series_dataset/test_sizeof.py
index 1a0ded04b..461f27a79 100644
--- a/tests/safeds/data/tabular/containers/_time_series/test_sizeof.py
+++ b/tests/safeds/data/labeled/containers/_time_series_dataset/test_sizeof.py
@@ -1,37 +1,36 @@
import sys
import pytest
-from safeds.data.tabular.containers import TimeSeries
+from safeds.data.labeled.containers import TimeSeriesDataset
@pytest.mark.parametrize(
- "time_series",
+ "tabular_dataset",
[
- TimeSeries(
+ TimeSeriesDataset(
{
- "time": [0, 1, 2],
"feature_1": [3, 9, 6],
"feature_2": [6, 12, 9],
"target": [1, 3, 2],
+ "time": [1, 2, 3],
},
"target",
"time",
- ["feature_1", "feature_2"],
),
- TimeSeries(
+ TimeSeriesDataset(
{
- "time": [0, 1, 2],
"feature_1": [3, 9, 6],
"feature_2": [6, 12, 9],
"other": [3, 9, 12],
"target": [1, 3, 2],
+ "time": [1, 2, 3],
},
"target",
"time",
- ["feature_1", "feature_2"],
+ ["other"],
),
],
- ids=["normal", "table_with_column_as_non_feature"],
+ ids=["normal", "table_with_extra_column"],
)
-def test_should_size_be_greater_than_normal_object(time_series: TimeSeries) -> None:
- assert sys.getsizeof(time_series) > sys.getsizeof(object())
+def test_should_size_be_greater_than_normal_object(tabular_dataset: TimeSeriesDataset) -> None:
+ assert sys.getsizeof(tabular_dataset) > sys.getsizeof(object())
diff --git a/tests/safeds/data/labeled/containers/_time_series_dataset/test_target.py b/tests/safeds/data/labeled/containers/_time_series_dataset/test_target.py
new file mode 100644
index 000000000..d4c189f71
--- /dev/null
+++ b/tests/safeds/data/labeled/containers/_time_series_dataset/test_target.py
@@ -0,0 +1,26 @@
+import pytest
+from safeds.data.labeled.containers import TimeSeriesDataset
+from safeds.data.tabular.containers import Column
+
+
+@pytest.mark.parametrize(
+ ("tabular_dataset", "target_column"),
+ [
+ (
+ TimeSeriesDataset(
+ {
+ "A": [1, 4],
+ "B": [2, 5],
+ "C": [3, 6],
+ "T": [0, 1],
+ },
+ target_name="T",
+ time_name="A",
+ ),
+ Column("T", [0, 1]),
+ ),
+ ],
+ ids=["target"],
+)
+def test_should_return_target(tabular_dataset: TimeSeriesDataset, target_column: Column) -> None:
+ assert tabular_dataset.target == target_column
diff --git a/tests/safeds/data/labeled/containers/_time_series_dataset/test_time.py b/tests/safeds/data/labeled/containers/_time_series_dataset/test_time.py
new file mode 100644
index 000000000..57f1655e9
--- /dev/null
+++ b/tests/safeds/data/labeled/containers/_time_series_dataset/test_time.py
@@ -0,0 +1,26 @@
+import pytest
+from safeds.data.labeled.containers import TimeSeriesDataset
+from safeds.data.tabular.containers import Column
+
+
+@pytest.mark.parametrize(
+ ("tabular_dataset", "time_column"),
+ [
+ (
+ TimeSeriesDataset(
+ {
+ "A": [1, 4],
+ "B": [2, 5],
+ "C": [3, 6],
+ "T": [0, 1],
+ },
+ target_name="T",
+ time_name="A",
+ ),
+ Column("A", [1, 4]),
+ ),
+ ],
+ ids=["time"],
+)
+def test_should_return_target(tabular_dataset: TimeSeriesDataset, time_column: Column) -> None:
+ assert tabular_dataset.time == time_column
diff --git a/tests/safeds/data/tabular/containers/_time_series/test_as_table.py b/tests/safeds/data/labeled/containers/_time_series_dataset/test_to_table.py
similarity index 63%
rename from tests/safeds/data/tabular/containers/_time_series/test_as_table.py
rename to tests/safeds/data/labeled/containers/_time_series_dataset/test_to_table.py
index 443e6f7cf..acdc10da3 100644
--- a/tests/safeds/data/tabular/containers/_time_series/test_as_table.py
+++ b/tests/safeds/data/labeled/containers/_time_series_dataset/test_to_table.py
@@ -1,25 +1,23 @@
import pytest
-from safeds.data.tabular.containers import Table, TimeSeries
+from safeds.data.labeled.containers import TimeSeriesDataset
+from safeds.data.tabular.containers import Table
@pytest.mark.parametrize(
- ("time_series", "expected"),
+ ("tabular_dataset", "expected"),
[
(
- TimeSeries(
+ TimeSeriesDataset(
{
- "time": [0, 1, 2],
"feature_1": [3, 9, 6],
"feature_2": [6, 12, 9],
"target": [1, 3, 2],
},
"target",
- "time",
- ["feature_1", "feature_2"],
+ "feature_1",
),
Table(
{
- "time": [0, 1, 2],
"feature_1": [3, 9, 6],
"feature_2": [6, 12, 9],
"target": [1, 3, 2],
@@ -27,21 +25,19 @@
),
),
(
- TimeSeries(
+ TimeSeriesDataset(
{
- "time": [0, 1, 2],
"feature_1": [3, 9, 6],
"feature_2": [6, 12, 9],
"other": [3, 9, 12],
"target": [1, 3, 2],
},
"target",
- "time",
- ["feature_1", "feature_2"],
+ "feature_1",
+ ["other"],
),
Table(
{
- "time": [0, 1, 2],
"feature_1": [3, 9, 6],
"feature_2": [6, 12, 9],
"other": [3, 9, 12],
@@ -50,9 +46,9 @@
),
),
],
- ids=["normal", "table_with_column_as_non_feature"],
+ ids=["normal", "table_with_extra_column"],
)
-def test_should_return_table(time_series: TimeSeries, expected: Table) -> None:
- table = time_series._as_table()
+def test_should_return_table(tabular_dataset: TimeSeriesDataset, expected: Table) -> None:
+ table = tabular_dataset.to_table()
assert table.schema == expected.schema
assert table == expected
diff --git a/tests/safeds/data/tabular/containers/_column/__snapshots__/test_plot_compare_columns/test_legit_compare.png b/tests/safeds/data/tabular/containers/_column/__snapshots__/test_plot_compare_columns/test_legit_compare.png
new file mode 100644
index 000000000..a9601890b
Binary files /dev/null and b/tests/safeds/data/tabular/containers/_column/__snapshots__/test_plot_compare_columns/test_legit_compare.png differ
diff --git a/tests/safeds/data/tabular/containers/_column/__snapshots__/test_plot_lag/test_should_return_table.png b/tests/safeds/data/tabular/containers/_column/__snapshots__/test_plot_lag/test_should_return_table.png
new file mode 100644
index 000000000..0f17b4726
Binary files /dev/null and b/tests/safeds/data/tabular/containers/_column/__snapshots__/test_plot_lag/test_should_return_table.png differ
diff --git a/tests/safeds/data/tabular/containers/_column/test_plot_compare_columns.py b/tests/safeds/data/tabular/containers/_column/test_plot_compare_columns.py
new file mode 100644
index 000000000..48820e2f3
--- /dev/null
+++ b/tests/safeds/data/tabular/containers/_column/test_plot_compare_columns.py
@@ -0,0 +1,83 @@
+import pytest
+from safeds.data.tabular.containers import Column
+from safeds.exceptions import NonNumericColumnError
+from syrupy import SnapshotAssertion
+
+
+def create_time_series_list() -> list[Column]:
+ table1 = Column(
+ "target",
+ [9, 10, 11, 12, 13, 14, 15, 16, 17, 18],
+ )
+ table2 = Column("target", [4, 5, 6, 7, 8, 9, 10, 11, 12, 13])
+ return [table1, table2]
+
+
+def create_invalid_time_series_list() -> list[Column]:
+ table1 = Column("target", ["9", 10, 11, 12, 13, 14, 15, 16, 17, 18])
+ table2 = Column("target", ["4", 5, 6, 7, 8, 9, 10, 11, 12, 13])
+ return [table1, table2]
+
+
+def test_legit_compare(snapshot_png_image: SnapshotAssertion) -> None:
+ col = Column(
+ "target",
+ [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
+ )
+ plot = col.plot_compare_columns(create_time_series_list())
+ assert plot == snapshot_png_image
+
+
+def test_should_raise_if_column_contains_non_numerical_values_x() -> None:
+ table = Column(
+ "target",
+ ["1", "2", "3", "4", "5", "6", "7", "8", "9", "10"],
+ )
+ with pytest.raises(
+ NonNumericColumnError,
+ match=(
+ r"Tried to do a numerical operation on one or multiple non-numerical columns: \nThe time series plotted"
+ r" column"
+ r" contains"
+ r" non-numerical columns."
+ ),
+ ):
+ table.plot_compare_columns(create_time_series_list())
+
+
+def test_with_non_valid_list() -> None:
+ table = Column(
+ "target",
+ [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
+ )
+ with pytest.raises(
+ NonNumericColumnError,
+ match=(
+ r"Tried to do a numerical operation on one or multiple non-numerical columns: \nThe time series plotted"
+ r" column"
+ r" contains"
+ r" non-numerical columns."
+ ),
+ ):
+ table.plot_compare_columns(create_invalid_time_series_list())
+
+
+def test_with_non_valid_size() -> None:
+ table = Column(
+ "target",
+ [
+ 1,
+ 2,
+ 3,
+ 4,
+ 5,
+ 6,
+ 7,
+ 8,
+ ],
+ )
+ with pytest.raises(
+ ValueError,
+ match=(r"The columns must have the same size."),
+ ):
+ table.plot_compare_columns(create_time_series_list())
diff --git a/tests/safeds/data/tabular/containers/_column/test_plot_lag.py b/tests/safeds/data/tabular/containers/_column/test_plot_lag.py
new file mode 100644
index 000000000..e688b4ff7
--- /dev/null
+++ b/tests/safeds/data/tabular/containers/_column/test_plot_lag.py
@@ -0,0 +1,29 @@
+import pytest
+from safeds.data.tabular.containers import Column
+from safeds.exceptions import NonNumericColumnError
+from syrupy import SnapshotAssertion
+
+
+def test_should_return_table(snapshot_png_image: SnapshotAssertion) -> None:
+ col = Column(
+ "target",
+ [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
+ )
+ lag_plot = col.plot_lagplot(1)
+ assert lag_plot == snapshot_png_image
+
+
+def test_should_raise_if_column_contains_non_numerical_values() -> None:
+ table = Column(
+ "target",
+ ["1", "2", "3", "4", "5", "6", "7", "8", "9", "10"],
+ )
+ with pytest.raises(
+ NonNumericColumnError,
+ match=(
+ r"Tried to do a numerical operation on one or multiple non-numerical columns: \nThis time series target"
+ r" contains"
+ r" non-numerical columns."
+ ),
+ ):
+ table.plot_lagplot(2)
diff --git a/tests/safeds/data/tabular/containers/_time_series/__snapshots__/test_plot_lineplot/test_should_return_table.png b/tests/safeds/data/tabular/containers/_time_series/__snapshots__/test_plot_lineplot/test_should_return_table.png
deleted file mode 100644
index f40ea7854..000000000
Binary files a/tests/safeds/data/tabular/containers/_time_series/__snapshots__/test_plot_lineplot/test_should_return_table.png and /dev/null differ
diff --git a/tests/safeds/data/tabular/containers/_time_series/test_add_column.py b/tests/safeds/data/tabular/containers/_time_series/test_add_column.py
deleted file mode 100644
index 8cb4eb7ac..000000000
--- a/tests/safeds/data/tabular/containers/_time_series/test_add_column.py
+++ /dev/null
@@ -1,38 +0,0 @@
-import pytest
-from safeds.data.tabular.containers import Column, TimeSeries
-
-from tests.helpers import assert_that_time_series_are_equal
-
-
-@pytest.mark.parametrize(
- ("time_series", "column", "expected_time_series"),
- [
- (
- TimeSeries(
- {
- "time": [0, 1, 2],
- "feature_1": [0, 1, 2],
- "target": [3, 4, 5],
- },
- target_name="target",
- time_name="time",
- feature_names=None,
- ),
- Column("other", [6, 7, 8]),
- TimeSeries(
- {
- "time": [0, 1, 2],
- "feature_1": [0, 1, 2],
- "target": [3, 4, 5],
- "other": [6, 7, 8],
- },
- target_name="target",
- time_name="time",
- feature_names=None,
- ),
- ),
- ],
- ids=["add_column_as_non_feature"],
-)
-def test_should_add_column(time_series: TimeSeries, column: Column, expected_time_series: TimeSeries) -> None:
- assert_that_time_series_are_equal(time_series.add_column(column), expected_time_series)
diff --git a/tests/safeds/data/tabular/containers/_time_series/test_add_column_as_feature.py b/tests/safeds/data/tabular/containers/_time_series/test_add_column_as_feature.py
deleted file mode 100644
index 03f157c68..000000000
--- a/tests/safeds/data/tabular/containers/_time_series/test_add_column_as_feature.py
+++ /dev/null
@@ -1,99 +0,0 @@
-import pytest
-from safeds.data.tabular.containers import Column, Table, TimeSeries
-from safeds.exceptions import ColumnSizeError, DuplicateColumnNameError
-
-from tests.helpers import assert_that_time_series_are_equal
-
-
-@pytest.mark.parametrize(
- ("time_series", "column", "time_series_with_new_column"),
- [
- (
- Table({"t": [1, 2], "f1": [1, 2], "target": [2, 3]}).time_columns(
- target_name="target",
- time_name="t",
- feature_names=["f1"],
- ),
- Column("f2", [4, 5]),
- Table({"t": [1, 2], "f1": [1, 2], "target": [2, 3], "f2": [4, 5]}).time_columns(
- target_name="target",
- time_name="t",
- feature_names=["f1", "f2"],
- ),
- ),
- (
- Table({"f1": [1, 2], "target": [2, 3], "other": [0, -1]}).time_columns(
- target_name="target",
- time_name="other",
- feature_names=["f1"],
- ),
- Column("f2", [4, 5]),
- Table({"f1": [1, 2], "target": [2, 3], "other": [0, -1], "f2": [4, 5]}).time_columns(
- target_name="target",
- time_name="other",
- feature_names=["f1", "f2"],
- ),
- ),
- ],
- ids=["new column as feature", "table contains a non feature/target column"],
-)
-def test_should_add_column_as_feature(
- time_series: TimeSeries,
- column: Column,
- time_series_with_new_column: TimeSeries,
-) -> None:
- assert_that_time_series_are_equal(
- time_series.add_column_as_feature(column),
- time_series_with_new_column,
- )
-
-
-@pytest.mark.parametrize(
- ("time_series", "column", "error_msg"),
- [
- (
- TimeSeries(
- {"time": [0, 1, 2], "A": [1, 2, 3], "B": [4, 5, 6]},
- target_name="B",
- time_name="time",
- feature_names=["A"],
- ),
- Column("A", [7, 8, 9]),
- r"Column 'A' already exists.",
- ),
- ],
- ids=["column_already_exists"],
-)
-def test_should_raise_duplicate_column_name_if_column_already_exists(
- time_series: TimeSeries,
- column: Column,
- error_msg: str,
-) -> None:
- with pytest.raises(DuplicateColumnNameError, match=error_msg):
- time_series.add_column_as_feature(column)
-
-
-# here starts the second test for errors
-@pytest.mark.parametrize(
- ("time_series", "column", "error_msg"),
- [
- (
- TimeSeries(
- {"time": [0, 1, 2], "A": [1, 2, 3], "B": [4, 5, 6]},
- target_name="B",
- time_name="time",
- feature_names=["A"],
- ),
- Column("C", [5, 7, 8, 9]),
- r"Expected a column of size 3 but got column of size 4.",
- ),
- ],
- ids=["column_is_oversize"],
-)
-def test_should_raise_column_size_error_if_column_is_oversize(
- time_series: TimeSeries,
- column: Column,
- error_msg: str,
-) -> None:
- with pytest.raises(ColumnSizeError, match=error_msg):
- time_series.add_column_as_feature(column)
diff --git a/tests/safeds/data/tabular/containers/_time_series/test_add_columns.py b/tests/safeds/data/tabular/containers/_time_series/test_add_columns.py
deleted file mode 100644
index 3433e4d28..000000000
--- a/tests/safeds/data/tabular/containers/_time_series/test_add_columns.py
+++ /dev/null
@@ -1,43 +0,0 @@
-import pytest
-from safeds.data.tabular.containers import Column, TimeSeries
-
-from tests.helpers import assert_that_time_series_are_equal
-
-
-@pytest.mark.parametrize(
- ("time_series", "columns", "expected_time_series"),
- [
- (
- TimeSeries(
- {
- "time": [0, 1, 2],
- "feature_1": [0, 1, 2],
- "target": [3, 4, 5],
- },
- "target",
- "time",
- None,
- ),
- [Column("other", [6, 7, 8]), Column("other2", [9, 6, 3])],
- TimeSeries(
- {
- "time": [0, 1, 2],
- "feature_1": [0, 1, 2],
- "target": [3, 4, 5],
- "other": [6, 7, 8],
- "other2": [9, 6, 3],
- },
- "target",
- "time",
- None,
- ),
- ),
- ],
- ids=["add_columns_as_non_feature"],
-)
-def test_should_add_columns(
- time_series: TimeSeries,
- columns: list[Column],
- expected_time_series: TimeSeries,
-) -> None:
- assert_that_time_series_are_equal(time_series.add_columns(columns), expected_time_series)
diff --git a/tests/safeds/data/tabular/containers/_time_series/test_add_columns_as_features.py b/tests/safeds/data/tabular/containers/_time_series/test_add_columns_as_features.py
deleted file mode 100644
index 4bbbacccf..000000000
--- a/tests/safeds/data/tabular/containers/_time_series/test_add_columns_as_features.py
+++ /dev/null
@@ -1,115 +0,0 @@
-import pytest
-from safeds.data.tabular.containers import Column, Table, TimeSeries
-from safeds.exceptions import ColumnSizeError, DuplicateColumnNameError
-
-from tests.helpers import assert_that_time_series_are_equal
-
-
-@pytest.mark.parametrize(
- ("time_series", "columns", "time_series_with_new_columns"),
- [
- (
- Table({"time": [0, 1], "f1": [1, 2], "target": [2, 3]}).time_columns(
- target_name="target",
- time_name="time",
- feature_names=["f1"],
- ),
- [Column("f2", [4, 5]), Column("f3", [6, 7])],
- Table({"time": [0, 1], "f1": [1, 2], "target": [2, 3], "f2": [4, 5], "f3": [6, 7]}).time_columns(
- target_name="target",
- time_name="time",
- feature_names=["f1", "f2", "f3"],
- ),
- ),
- (
- Table({"time": [0, 1], "f1": [1, 2], "target": [2, 3]}).time_columns(
- target_name="target",
- time_name="time",
- feature_names=["f1"],
- ),
- Table.from_columns([Column("f2", [4, 5]), Column("f3", [6, 7])]),
- Table({"time": [0, 1], "f1": [1, 2], "target": [2, 3], "f2": [4, 5], "f3": [6, 7]}).time_columns(
- target_name="target",
- time_name="time",
- feature_names=["f1", "f2", "f3"],
- ),
- ),
- (
- Table({"time": [0, 1], "f1": [1, 2], "target": [2, 3], "other": [0, -1]}).time_columns(
- target_name="target",
- time_name="time",
- feature_names=["f1"],
- ),
- Table.from_columns([Column("f2", [4, 5]), Column("f3", [6, 7])]),
- Table({
- "time": [0, 1],
- "f1": [1, 2],
- "target": [2, 3],
- "other": [0, -1],
- "f2": [4, 5],
- "f3": [6, 7],
- }).time_columns(
- target_name="target",
- time_name="time",
- feature_names=["f1", "f2", "f3"],
- ),
- ),
- ],
- ids=["new columns as feature", "table added as features", "table contains a non feature/target column"],
-)
-def test_add_columns_as_features(
- time_series: TimeSeries,
- columns: list[Column] | Table,
- time_series_with_new_columns: TimeSeries,
-) -> None:
- assert_that_time_series_are_equal(time_series.add_columns_as_features(columns), time_series_with_new_columns)
-
-
-@pytest.mark.parametrize(
- ("time_series", "columns", "error_msg"),
- [
- (
- TimeSeries(
- {"time": [0, 1, 2], "A": [1, 2, 3], "B": [4, 5, 6]},
- target_name="B",
- time_name="time",
- feature_names=["A"],
- ),
- [Column("A", [7, 8, 9]), Column("D", [10, 11, 12])],
- r"Column 'A' already exists.",
- ),
- ],
- ids=["column_already_exist"],
-)
-def test_add_columns_raise_duplicate_column_name_if_column_already_exist(
- time_series: TimeSeries,
- columns: list[Column] | Table,
- error_msg: str,
-) -> None:
- with pytest.raises(DuplicateColumnNameError, match=error_msg):
- time_series.add_columns_as_features(columns)
-
-
-@pytest.mark.parametrize(
- ("time_series", "columns", "error_msg"),
- [
- (
- TimeSeries(
- {"time": [0, 1, 2], "A": [1, 2, 3], "B": [4, 5, 6]},
- target_name="B",
- time_name="time",
- feature_names=["A"],
- ),
- [Column("C", [5, 7, 8, 9]), Column("D", [4, 10, 11, 12])],
- r"Expected a column of size 3 but got column of size 4.",
- ),
- ],
- ids=["columns_are_oversize"],
-)
-def test_should_raise_column_size_error_if_columns_are_oversize(
- time_series: TimeSeries,
- columns: list[Column] | Table,
- error_msg: str,
-) -> None:
- with pytest.raises(ColumnSizeError, match=error_msg):
- time_series.add_columns_as_features(columns)
diff --git a/tests/safeds/data/tabular/containers/_time_series/test_add_row.py b/tests/safeds/data/tabular/containers/_time_series/test_add_row.py
deleted file mode 100644
index 8ad2572a7..000000000
--- a/tests/safeds/data/tabular/containers/_time_series/test_add_row.py
+++ /dev/null
@@ -1,82 +0,0 @@
-import pytest
-from safeds.data.tabular.containers import Row, TimeSeries
-from safeds.exceptions import UnknownColumnNameError
-
-from tests.helpers import assert_that_time_series_are_equal
-
-
-@pytest.mark.parametrize(
- ("time_series", "row", "expected"),
- [
- (
- TimeSeries(
- {
- "time": [0, 1],
- "feature": [0, 1],
- "target": [3, 4],
- },
- "target",
- "time",
- ),
- Row(
- {
- "time": 2,
- "feature": 2,
- "target": 5,
- },
- ),
- TimeSeries(
- {
- "time": [0, 1, 2],
- "feature": [0, 1, 2],
- "target": [3, 4, 5],
- },
- "target",
- "time",
- ),
- ),
- ],
- ids=["add_row"],
-)
-def test_should_add_row(time_series: TimeSeries, row: Row, expected: TimeSeries) -> None:
- assert_that_time_series_are_equal(time_series.add_row(row), expected)
-
-
-@pytest.mark.parametrize(
- ("time_series", "row", "error_msg"),
- [
- (
- TimeSeries({"time": [], "feature": [], "target": []}, "target", "time", ["feature"]),
- Row({"feat": None, "targ": None}),
- r"Could not find column\(s\) 'time, feature, target'.",
- ),
- ],
- ids=["columns_missing"],
-)
-def test_should_raise_an_error_if_row_schema_invalid(
- time_series: TimeSeries,
- row: Row,
- error_msg: str,
-) -> None:
- with pytest.raises(UnknownColumnNameError, match=error_msg):
- time_series.add_row(row)
-
-
-# the original tests throw a warning here aswell( test_add_row in tabular_dataset)
-@pytest.mark.parametrize(
- ("time_series", "row", "expected_time_series"),
- [
- (
- TimeSeries({"time": [], "feature": [], "target": []}, "target", "time"),
- Row({"time": 0, "feature": 2, "target": 5}),
- TimeSeries({"time": [0], "feature": [2], "target": [5]}, "target", "time"),
- ),
- ],
- ids=["empty_feature_column"],
-)
-def test_should_add_row_to_empty_table(
- time_series: TimeSeries,
- row: Row,
- expected_time_series: TimeSeries,
-) -> None:
- assert_that_time_series_are_equal(time_series.add_row(row), expected_time_series)
diff --git a/tests/safeds/data/tabular/containers/_time_series/test_add_rows.py b/tests/safeds/data/tabular/containers/_time_series/test_add_rows.py
deleted file mode 100644
index 641a2ec05..000000000
--- a/tests/safeds/data/tabular/containers/_time_series/test_add_rows.py
+++ /dev/null
@@ -1,65 +0,0 @@
-import pytest
-from safeds.data.tabular.containers import Row, Table, TimeSeries
-from safeds.exceptions import UnknownColumnNameError
-
-from tests.helpers import assert_that_time_series_are_equal
-
-
-@pytest.mark.parametrize(
- ("time_series", "rows", "expected"),
- [
- (
- TimeSeries(
- {
- "time": [0, 1],
- "feature": [0, 1],
- "target": [4, 5],
- },
- "target",
- "time",
- ),
- [
- Row(
- {
- "time": 2,
- "feature": 2,
- "target": 6,
- },
- ),
- Row({"time": 3, "feature": 3, "target": 7}),
- ],
- TimeSeries(
- {
- "time": [0, 1, 2, 3],
- "feature": [0, 1, 2, 3],
- "target": [4, 5, 6, 7],
- },
- "target",
- "time",
- ),
- ),
- ],
- ids=["add_rows"],
-)
-def test_should_add_rows(time_series: TimeSeries, rows: list[Row], expected: TimeSeries) -> None:
- assert_that_time_series_are_equal(time_series.add_rows(rows), expected)
-
-
-@pytest.mark.parametrize(
- ("time_series", "rows", "error_msg"),
- [
- (
- TimeSeries({"time": [], "feature": [], "target": []}, "target", "time", ["feature"]),
- [Row({"feat": None, "targ": None}), Row({"targ": None, "feat": None})],
- r"Could not find column\(s\) 'time, feature, target'.",
- ),
- ],
- ids=["columns_missing"],
-)
-def test_should_raise_an_error_if_rows_schemas_are_invalid(
- time_series: TimeSeries,
- rows: list[Row] | Table,
- error_msg: str,
-) -> None:
- with pytest.raises(UnknownColumnNameError, match=error_msg):
- time_series.add_rows(rows)
diff --git a/tests/safeds/data/tabular/containers/_time_series/test_eq.py b/tests/safeds/data/tabular/containers/_time_series/test_eq.py
deleted file mode 100644
index 0e39f828f..000000000
--- a/tests/safeds/data/tabular/containers/_time_series/test_eq.py
+++ /dev/null
@@ -1,101 +0,0 @@
-from typing import Any
-
-import pytest
-from safeds.data.labeled.containers import TabularDataset
-from safeds.data.tabular.containers import Row, Table, TimeSeries
-
-
-@pytest.mark.parametrize(
- ("table1", "table2", "expected"),
- [
- (
- TimeSeries({"a": [], "b": [], "c": []}, "b", "c", ["a"]),
- TimeSeries({"a": [], "b": [], "c": []}, "b", "c", ["a"]),
- True,
- ),
- (
- TimeSeries({"a": [1, 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]}, "b", "c", ["a"]),
- TimeSeries({"a": [1, 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]}, "b", "c", ["a"]),
- True,
- ),
- (
- TimeSeries({"a": [1, 2, 3], "b": [4, 5, 6], "c": [7, 8, 9], "d": [10, 11, 12]}, "b", "d", ["a"]),
- TimeSeries({"a": [1, 2, 3], "b": [4, 5, 6], "c": [7, 8, 9], "d": [10, 11, 12]}, "c", "d", ["a"]),
- False,
- ),
- (
- TimeSeries({"a": [1, 2, 3], "b": [4, 5, 6], "c": [7, 8, 9], "d": [10, 11, 12]}, "b", "c", ["a"]),
- TimeSeries({"a": [1, 2, 3], "b": [4, 5, 6], "c": [7, 8, 9], "e": [10, 11, 12]}, "b", "c", ["a"]),
- False,
- ),
- (
- TimeSeries({"a": [1, 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]}, "b", "c", ["a"]),
- TimeSeries({"a": [1, 1, 3], "b": [4, 5, 6], "c": [7, 8, 9]}, "b", "c", ["a"]),
- False,
- ),
- (
- TimeSeries({"a": [1, 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]}, "b", "c", ["a"]),
- TimeSeries({"a": ["1", "2", "3"], "b": [4, 5, 6], "c": [7, 8, 9]}, "b", "c", ["a"]),
- False,
- ),
- (
- TimeSeries({"a": [1, 2, 3], "b": [4, 5, 6], "c": [7, 8, 9], "d": [10, 11, 12]}, "b", "d", ["a"]),
- TimeSeries({"a": [1, 2, 3], "b": [4, 5, 6], "c": [7, 8, 9], "d": [10, 11, 12]}, "b", "d", ["c"]),
- False,
- ),
- (
- TimeSeries({"a": [1, 2, 3], "b": [4, 5, 6], "c": [7, 8, 9], "d": [10, 11, 12]}, "b", "d", ["a"]),
- TimeSeries({"a": [1, 2, 3], "b": [4, 5, 6], "c": [7, 8, 9], "d": [10, 11, 12]}, "b", "c", ["a"]),
- False,
- ),
- ],
- ids=[
- "rowless table",
- "equal tables",
- "different target",
- "different column names",
- "different values",
- "different types",
- "different features",
- "different time",
- ],
-)
-def test_should_return_whether_two_tabular_datasets_are_equal(
- table1: TimeSeries,
- table2: TimeSeries,
- expected: bool,
-) -> None:
- assert (table1.__eq__(table2)) == expected
-
-
-@pytest.mark.parametrize(
- "table1",
- [TimeSeries({"a": [], "b": [], "c": []}, "b", "c", ["a"])],
- ids=[
- "any",
- ],
-)
-def test_should_return_true_if_objects_are_identical(table1: TimeSeries) -> None:
- assert (table1.__eq__(table1)) is True
-
-
-@pytest.mark.parametrize(
- ("table", "other"),
- [
- (TimeSeries({"a": [1, 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]}, "b", "c", ["a"]), None),
- (TimeSeries({"a": [1, 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]}, "b", "c", ["a"]), Row()),
- (TimeSeries({"a": [1, 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]}, "b", "c", ["a"]), Table()),
- (
- TimeSeries({"a": [1, 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]}, "b", "c", ["a"]),
- TabularDataset({"a": [1, 2, 3], "b": [4, 5, 6]}, "b"),
- ),
- ],
- ids=[
- "TimeSeries vs. None",
- "TimeSeries vs. Row",
- "TimeSeries vs. Table",
- "TimeSeries vs. TabularDataset",
- ],
-)
-def test_should_return_not_implemented_if_other_is_not_time_series(table: TimeSeries, other: Any) -> None:
- assert (table.__eq__(other)) is NotImplemented
diff --git a/tests/safeds/data/tabular/containers/_time_series/test_filter_rows.py b/tests/safeds/data/tabular/containers/_time_series/test_filter_rows.py
deleted file mode 100644
index a7d38e257..000000000
--- a/tests/safeds/data/tabular/containers/_time_series/test_filter_rows.py
+++ /dev/null
@@ -1,124 +0,0 @@
-from collections.abc import Callable
-
-import pytest
-from safeds.data.tabular.containers import Row, TimeSeries
-
-from tests.helpers import assert_that_time_series_are_equal
-
-
-@pytest.mark.parametrize(
- ("time_series", "expected", "query"),
- [
- (
- TimeSeries(
- {
- "time": [0, 1, 2],
- "feature_1": [3, 9, 6],
- "feature_2": [6, 12, 9],
- "target": [1, 3, 2],
- },
- "target",
- "time",
- ),
- TimeSeries(
- {
- "time": [0, 2],
- "feature_1": [3, 6],
- "feature_2": [6, 9],
- "target": [1, 2],
- },
- "target",
- "time",
- ),
- lambda row: all(row.get_value(col) < 10 for col in row.column_names),
- ),
- (
- TimeSeries(
- {
- "time": [0, 1, 2, 3],
- "feature_1": [3, 9, 6, 2],
- "feature_2": [6, 12, 9, 3],
- "other": [1, 2, 3, 10],
- "target": [1, 3, 2, 4],
- },
- "target",
- "time",
- ["feature_1", "feature_2"],
- ),
- TimeSeries(
- {
- "time": [
- 0,
- 2,
- ],
- "feature_1": [3, 6],
- "feature_2": [6, 9],
- "other": [1, 3],
- "target": [1, 2],
- },
- "target",
- "time",
- ["feature_1", "feature_2"],
- ),
- lambda row: all(row.get_value(col) < 10 for col in row.column_names),
- ),
- (
- TimeSeries(
- {
- "time": [0, 1, 2],
- "feature_1": [3, 9, 6],
- "feature_2": [6, 12, 9],
- "target": [1, 3, 2],
- },
- "target",
- "time",
- ),
- TimeSeries(
- {
- "time": [0, 1, 2],
- "feature_1": [3, 9, 6],
- "feature_2": [6, 12, 9],
- "target": [1, 3, 2],
- },
- "target",
- "time",
- ),
- lambda row: all(row.get_value(col) < 20 for col in row.column_names),
- ),
- (
- TimeSeries(
- {
- "time": [0, 1, 2, 3],
- "feature_1": [3, 9, 6, 2],
- "feature_2": [6, 12, 9, 3],
- "other": [1, 2, 3, 10],
- "target": [1, 3, 2, 4],
- },
- "target",
- "time",
- ["feature_1", "feature_2"],
- ),
- TimeSeries(
- {
- "time": [0, 1, 2, 3],
- "feature_1": [3, 9, 6, 2],
- "feature_2": [6, 12, 9, 3],
- "other": [1, 2, 3, 10],
- "target": [1, 3, 2, 4],
- },
- "target",
- "time",
- ["feature_1", "feature_2"],
- ),
- lambda row: all(row.get_value(col) < 20 for col in row.column_names),
- ),
- ],
- ids=[
- "remove_rows_with_values_greater_9",
- "remove_rows_with_values_greater_9_non_feature_columns",
- "remove_no_rows",
- "remove_no_rows_non_feature_columns",
- ],
-)
-def test_should_filter_rows(time_series: TimeSeries, expected: TimeSeries, query: Callable[[Row], bool]) -> None:
- assert_that_time_series_are_equal(time_series.filter_rows(query), expected)
diff --git a/tests/safeds/data/tabular/containers/_time_series/test_from_table_to_time_series.py b/tests/safeds/data/tabular/containers/_time_series/test_from_table_to_time_series.py
deleted file mode 100644
index b404d4c18..000000000
--- a/tests/safeds/data/tabular/containers/_time_series/test_from_table_to_time_series.py
+++ /dev/null
@@ -1,187 +0,0 @@
-import pytest
-from safeds.data.tabular.containers import Table, TimeSeries
-from safeds.exceptions import UnknownColumnNameError
-
-
-@pytest.mark.parametrize(
- ("table", "target_name", "time_name", "feature_names", "error", "error_msg"),
- [
- (
- Table(
- {
- "time": [0, 1],
- "A": [1, 4],
- "B": [2, 5],
- "C": [3, 6],
- "T": [0, 1],
- },
- ),
- "T",
- "time",
- ["A", "B", "C", "D", "E"],
- UnknownColumnNameError,
- r"Could not find column\(s\) 'D, E'",
- ),
- (
- Table(
- {
- "time": [0, 1],
- "A": [1, 4],
- "B": [2, 5],
- "C": [3, 6],
- "T": [0, 1],
- },
- ),
- "D",
- "time",
- ["A", "B", "C"],
- UnknownColumnNameError,
- r"Could not find column\(s\) 'D'",
- ),
- (
- Table(
- {
- "time": [0, 1],
- "A": [1, 4],
- "B": [2, 5],
- "C": [3, 6],
- "T": [0, 1],
- },
- ),
- "A",
- "time",
- ["A", "B", "C"],
- ValueError,
- r"Column 'A' can not be target and feature column.",
- ),
- (
- Table(
- {
- "r": [0, 1],
- "A": [1, 4],
- "B": [2, 5],
- "C": [3, 6],
- "T": [0, 1],
- },
- ),
- "T",
- "time",
- ["A", "B", "C"],
- UnknownColumnNameError,
- r"Could not find column\(s\) 'time'",
- ),
- (
- Table(
- {
- "time": [0, 1],
- "A": [1, 4],
- "B": [2, 5],
- "C": [3, 6],
- "T": [0, 1],
- },
- ),
- "T",
- "A",
- ["A", "B", "C"],
- ValueError,
- r"Column 'A' can not be time and feature column.",
- ),
- ],
- ids=[
- "feature_does_not_exist",
- "target_does_not_exist",
- "target_and_feature_overlap",
- "time_does_not_exist",
- "time_is_also_feature",
- ],
-)
-def test_should_raise_error(
- table: Table,
- target_name: str,
- time_name: str,
- feature_names: list[str] | None,
- error: type[Exception],
- error_msg: str,
-) -> None:
- with pytest.raises(error, match=error_msg):
- TimeSeries._from_table(
- table,
- target_name=target_name,
- time_name=time_name,
- feature_names=feature_names,
- )
-
-
-@pytest.mark.parametrize(
- ("table", "target_name", "time_name", "feature_names"),
- [
- (
- Table(
- {
- "time": [0, 1],
- "A": [1, 4],
- "B": [2, 5],
- "C": [3, 6],
- "T": [0, 1],
- },
- ),
- "T",
- "time",
- ["A", "B", "C"],
- ),
- (
- Table(
- {
- "time": [0, 1],
- "A": [1, 4],
- "B": [2, 5],
- "C": [3, 6],
- "T": [0, 1],
- },
- ),
- "T",
- "time",
- ["A", "C"],
- ),
- (
- Table(
- {
- "time": [0, 1],
- "A": [1, 4],
- "B": [2, 5],
- "C": [3, 6],
- "T": [0, 1],
- },
- ),
- "T",
- "time",
- ["B"],
- ),
- ],
- ids=[
- "create_tabular_dataset",
- "tabular_dataset_not_all_columns_are_features",
- "tabular_dataset_with_feature_names_as_None",
- ],
-)
-def test_should_create_a_tabular_dataset(
- table: Table,
- target_name: str,
- time_name: str,
- feature_names: list[str] | None,
-) -> None:
- time_series = TimeSeries._from_table(
- table,
- target_name=target_name,
- time_name=time_name,
- feature_names=feature_names,
- )
- feature_names = (
- feature_names if feature_names is not None else table.remove_columns([target_name, time_name]).column_names
- )
- assert isinstance(time_series, TimeSeries)
- assert time_series._features.column_names == feature_names
- assert time_series._target.name == target_name
- assert time_series._features == table.keep_only_columns(feature_names)
- assert time_series._target == table.get_column(target_name)
- assert time_series.time == table.get_column(time_name)
diff --git a/tests/safeds/data/tabular/containers/_time_series/test_hash.py b/tests/safeds/data/tabular/containers/_time_series/test_hash.py
deleted file mode 100644
index 94015139b..000000000
--- a/tests/safeds/data/tabular/containers/_time_series/test_hash.py
+++ /dev/null
@@ -1,64 +0,0 @@
-import pytest
-from safeds.data.tabular.containers import TimeSeries
-
-
-@pytest.mark.parametrize(
- ("table1", "table2"),
- [
- (
- TimeSeries({"a": [], "b": [], "c": []}, "b", "c", ["a"]),
- TimeSeries({"a": [], "b": [], "c": []}, "b", "c", ["a"]),
- ),
- (
- TimeSeries({"a": [1, 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]}, "b", "c", ["a"]),
- TimeSeries({"a": [1, 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]}, "b", "c", ["a"]),
- ),
- (
- TimeSeries({"a": [1, 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]}, "b", "c", ["a"]),
- TimeSeries({"a": [1, 1, 3], "b": [4, 5, 6], "c": [7, 8, 9]}, "b", "c", ["a"]),
- ),
- ],
- ids=[
- "rowless table",
- "equal tables",
- "different values",
- ],
-)
-def test_should_return_same_hash_for_equal_time_series(table1: TimeSeries, table2: TimeSeries) -> None:
- assert hash(table1) == hash(table2)
-
-
-@pytest.mark.parametrize(
- ("table1", "table2"),
- [
- (
- TimeSeries({"a": [1, 2, 3], "b": [4, 5, 6], "c": [7, 8, 9], "d": [10, 11, 12]}, "b", "d", ["a"]),
- TimeSeries({"a": [1, 2, 3], "b": [4, 5, 6], "c": [7, 8, 9], "d": [10, 11, 12]}, "c", "d", ["a"]),
- ),
- (
- TimeSeries({"a": [1, 2, 3], "b": [4, 5, 6], "c": [7, 8, 9], "d": [10, 11, 12]}, "b", "c", ["a"]),
- TimeSeries({"a": [1, 2, 3], "b": [4, 5, 6], "c": [7, 8, 9], "e": [10, 11, 12]}, "b", "c", ["a"]),
- ),
- (
- TimeSeries({"a": [1, 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]}, "b", "c", ["a"]),
- TimeSeries({"a": ["1", "2", "3"], "b": [4, 5, 6], "c": [7, 8, 9]}, "b", "c", ["a"]),
- ),
- (
- TimeSeries({"a": [1, 2, 3], "b": [4, 5, 6], "c": [7, 8, 9], "d": [10, 11, 12]}, "b", "d", ["a"]),
- TimeSeries({"a": [1, 2, 3], "b": [4, 5, 6], "c": [7, 8, 9], "d": [10, 11, 12]}, "b", "d", ["c"]),
- ),
- (
- TimeSeries({"a": [1, 2, 3], "b": [4, 5, 6], "c": [7, 8, 9], "d": [10, 11, 12]}, "b", "d", ["a"]),
- TimeSeries({"a": [1, 2, 3], "b": [4, 5, 6], "c": [7, 8, 9], "d": [10, 11, 12]}, "b", "c", ["a"]),
- ),
- ],
- ids=[
- "different target",
- "different column names",
- "different types",
- "different features",
- "different time",
- ],
-)
-def test_should_return_different_hash_for_unequal_time_series(table1: TimeSeries, table2: TimeSeries) -> None:
- assert hash(table1) != hash(table2)
diff --git a/tests/safeds/data/tabular/containers/_time_series/test_init.py b/tests/safeds/data/tabular/containers/_time_series/test_init.py
deleted file mode 100644
index c46801cce..000000000
--- a/tests/safeds/data/tabular/containers/_time_series/test_init.py
+++ /dev/null
@@ -1,161 +0,0 @@
-import pytest
-from safeds.data.tabular.containers import Table, TimeSeries
-from safeds.exceptions import UnknownColumnNameError
-
-
-@pytest.mark.parametrize(
- ("data", "time_name", "target_name", "feature_names", "error", "error_msg"),
- [
- (
- {
- "time": [0, 1],
- "A": [1, 4],
- "B": [2, 5],
- "C": [3, 6],
- "T": [0, 1],
- },
- "time",
- "T",
- ["A", "B", "C", "D", "E"],
- UnknownColumnNameError,
- r"Could not find column\(s\) 'D, E'",
- ),
- (
- {
- "time": [0, 1],
- "A": [1, 4],
- "B": [2, 5],
- "C": [3, 6],
- "T": [0, 1],
- },
- "time",
- "D",
- ["A", "B", "C"],
- UnknownColumnNameError,
- r"Could not find column\(s\) 'D'",
- ),
- (
- {
- "time": [0, 1],
- "A": [1, 4],
- "B": [2, 5],
- "C": [3, 6],
- "T": [0, 1],
- },
- "time",
- "A",
- ["A", "B", "C"],
- ValueError,
- r"Column 'A' can not be time and feature column.",
- ),
- (
- {
- "time": [0, 1],
- "A": [1, 4],
- "B": [2, 5],
- "C": [3, 6],
- "T": [0, 1],
- },
- "random",
- "B",
- ["A"],
- UnknownColumnNameError,
- r"Could not find column\(s\) 'random'.",
- ),
- (
- {
- "time": [0, 1],
- "A": [1, 4],
- "B": [2, 5],
- "C": [3, 6],
- "T": [0, 1],
- },
- "time",
- "T",
- ["A", "B", "C", "time"],
- ValueError,
- "Column 'time' can not be time and feature column.",
- ),
- ],
- ids=[
- "feature_does_not_exist",
- "target_does_not_exist",
- "target_and_feature_overlap",
- "time_column_does_not_exist",
- "time_is_also_feature",
- ],
-)
-def test_should_raise_error(
- data: dict[str, list[int]],
- time_name: str,
- target_name: str,
- feature_names: list[str] | None,
- error: type[Exception],
- error_msg: str,
-) -> None:
- with pytest.raises(error, match=error_msg):
- TimeSeries(data, target_name=target_name, time_name=time_name, feature_names=feature_names)
-
-
-@pytest.mark.parametrize(
- ("data", "time_name", "target_name", "feature_names"),
- [
- (
- {
- "time": [0, 1],
- "A": [1, 4],
- "B": [2, 5],
- "C": [3, 6],
- "T": [0, 1],
- },
- "time",
- "T",
- ["A", "B", "C"],
- ),
- (
- {
- "time": [0, 1],
- "A": [1, 4],
- "B": [2, 5],
- "C": [3, 6],
- "T": [0, 1],
- },
- "time",
- "T",
- ["A", "C"],
- ),
- (
- {
- "time": [0, 1],
- "A": [1, 4],
- "B": [2, 5],
- "C": [3, 6],
- "T": [0, 1],
- },
- "time",
- "T",
- None,
- ),
- ],
- ids=[
- "create_tabular_dataset",
- "tabular_dataset_not_all_columns_are_features",
- "tabular_dataset_with_feature_names_as_None",
- ],
-)
-def test_should_create_a_time_series(
- data: dict[str, list[int]],
- time_name: str,
- target_name: str,
- feature_names: list[str] | None,
-) -> None:
- time_series = TimeSeries(data, target_name=target_name, time_name=time_name, feature_names=feature_names)
- if feature_names is None:
- feature_names = []
-
- assert isinstance(time_series, TimeSeries)
- assert time_series._feature_names == feature_names
- assert time_series._target.name == target_name
- assert time_series._features == Table(data).keep_only_columns(feature_names)
- assert time_series._target == Table(data).get_column(target_name)
- assert time_series.time == Table(data).get_column(time_name)
diff --git a/tests/safeds/data/tabular/containers/_time_series/test_keep_only_columns.py b/tests/safeds/data/tabular/containers/_time_series/test_keep_only_columns.py
deleted file mode 100644
index c6a7ac051..000000000
--- a/tests/safeds/data/tabular/containers/_time_series/test_keep_only_columns.py
+++ /dev/null
@@ -1,147 +0,0 @@
-import pytest
-from safeds.data.tabular.containers import Table, TimeSeries
-from safeds.exceptions import IllegalSchemaModificationError
-
-from tests.helpers import assert_that_time_series_are_equal
-
-
-@pytest.mark.parametrize(
- ("table", "column_names", "expected"),
- [
- (
- TimeSeries._from_table(
- Table(
- {
- "time": [0, 1, 2],
- "feat1": [1, 2, 3],
- "feat2": [4, 5, 6],
- "target": [7, 8, 9],
- },
- ),
- "target",
- "time",
- ),
- ["feat1", "target", "time"],
- TimeSeries._from_table(
- Table(
- {
- "time": [0, 1, 2],
- "feat1": [1, 2, 3],
- "target": [7, 8, 9],
- },
- ),
- "target",
- "time",
- ),
- ),
- (
- TimeSeries._from_table(
- Table(
- {
- "time": [0, 1, 2],
- "feat1": [1, 2, 3],
- "feat2": [4, 5, 6],
- "other": [3, 4, 5],
- "target": [7, 8, 9],
- },
- ),
- "target",
- "time",
- ),
- ["feat1", "other", "target", "time"],
- TimeSeries._from_table(
- Table(
- {
- "time": [0, 1, 2],
- "feat1": [1, 2, 3],
- "other": [3, 4, 5],
- "target": [7, 8, 9],
- },
- ),
- "target",
- "time",
- ),
- ),
- (
- TimeSeries._from_table(
- Table(
- {
- "time": [0, 1, 2],
- "feat1": [1, 2, 3],
- "feat2": [4, 5, 6],
- "other": [3, 4, 5],
- "target": [7, 8, 9],
- },
- ),
- "target",
- "time",
- ),
- ["feat1", "target", "time"],
- TimeSeries._from_table(
- Table(
- {
- "time": [0, 1, 2],
- "feat1": [1, 2, 3],
- "target": [7, 8, 9],
- },
- ),
- "target",
- "time",
- ),
- ),
- ],
- ids=["keep_feature_and_target_column", "keep_non_feature_column", "don't_keep_non_feature_column"],
-)
-def test_should_return_table(table: TimeSeries, column_names: list[str], expected: TimeSeries) -> None:
- new_table = table.keep_only_columns(column_names)
- assert_that_time_series_are_equal(new_table, expected)
-
-
-@pytest.mark.parametrize(
- ("table", "column_names", "error_msg"),
- [
- (
- TimeSeries._from_table(
- Table(
- {
- "time": [0, 1, 2],
- "feat1": [1, 2, 3],
- "feat2": [4, 5, 6],
- "other": [3, 5, 7],
- "target": [7, 8, 9],
- },
- ),
- "target",
- "time",
- ["feat1", "feat2"],
- ),
- ["feat1", "feat2"],
- r"Illegal schema modification: Must keep the target column.",
- ),
- (
- TimeSeries._from_table(
- Table(
- {
- "time": [0, 1, 2],
- "feat1": [1, 2, 3],
- "feat2": [4, 5, 6],
- "other": [3, 5, 7],
- "target": [7, 8, 9],
- },
- ),
- "target",
- "time",
- ["feat1", "feat2"],
- ),
- ["target", "feat1", "other"],
- r"Illegal schema modification: Must keep the time column.",
- ),
- ],
- ids=["table_remove_target", "table_remove_time"],
-)
-def test_should_raise_illegal_schema_modification(table: TimeSeries, column_names: list[str], error_msg: str) -> None:
- with pytest.raises(
- IllegalSchemaModificationError,
- match=error_msg,
- ):
- table.keep_only_columns(column_names)
diff --git a/tests/safeds/data/tabular/containers/_time_series/test_plot_compare_time_series.py b/tests/safeds/data/tabular/containers/_time_series/test_plot_compare_time_series.py
deleted file mode 100644
index 4d114cd55..000000000
--- a/tests/safeds/data/tabular/containers/_time_series/test_plot_compare_time_series.py
+++ /dev/null
@@ -1,113 +0,0 @@
-import pytest
-from safeds.data.tabular.containers import TimeSeries
-from safeds.exceptions import NonNumericColumnError
-from syrupy import SnapshotAssertion
-
-
-def create_time_series_list() -> list[TimeSeries]:
- table1 = TimeSeries(
- {
- "time": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
- "feature_1": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
- "target": [9, 10, 11, 12, 13, 14, 15, 16, 17, 18],
- },
- target_name="target",
- time_name="time",
- feature_names=None,
- )
- table2 = TimeSeries(
- {
- "time": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
- "feature_1": [4, 5, 6, 7, 8, 9, 10, 11, 12, 13],
- "target": [4, 5, 6, 7, 8, 9, 10, 11, 12, 13],
- },
- target_name="target",
- time_name="time",
- feature_names=None,
- )
- return [table1, table2]
-
-
-def create_invalid_time_series_list() -> list[TimeSeries]:
- table1 = TimeSeries(
- {
- "time": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
- "feature_1": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
- "target": ["9", 10, 11, 12, 13, 14, 15, 16, 17, 18],
- },
- target_name="target",
- time_name="time",
- feature_names=None,
- )
- table2 = TimeSeries(
- {
- "time": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
- "feature_1": [4, 5, 6, 7, 8, 9, 10, 11, 12, 13],
- "target": ["4", 5, 6, 7, 8, 9, 10, 11, 12, 13],
- },
- target_name="target",
- time_name="time",
- feature_names=None,
- )
- return [table1, table2]
-
-
-def test_legit_compare(snapshot_png_image: SnapshotAssertion) -> None:
- table = TimeSeries(
- {
- "time": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
- "feature_1": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
- "target": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
- },
- target_name="target",
- time_name="time",
- feature_names=None,
- )
- plot = table.plot_compare_time_series(create_time_series_list())
- assert plot == snapshot_png_image
-
-
-def test_should_raise_if_column_contains_non_numerical_values_x() -> None:
- table = TimeSeries(
- {
- "time": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
- "feature_1": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
- "target": ["1", "2", "3", "4", "5", "6", "7", "8", "9", "10"],
- },
- target_name="target",
- time_name="time",
- feature_names=None,
- )
- with pytest.raises(
- NonNumericColumnError,
- match=(
- r"Tried to do a numerical operation on one or multiple non-numerical columns: \nThe time series plotted"
- r" column"
- r" contains"
- r" non-numerical columns."
- ),
- ):
- table.plot_compare_time_series(create_time_series_list())
-
-
-def test_with_non_valid_list() -> None:
- table = TimeSeries(
- {
- "time": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
- "feature_1": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
- "target": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
- },
- target_name="target",
- time_name="time",
- feature_names=None,
- )
- with pytest.raises(
- NonNumericColumnError,
- match=(
- r"Tried to do a numerical operation on one or multiple non-numerical columns: \nThe time series plotted"
- r" column"
- r" contains"
- r" non-numerical columns."
- ),
- ):
- table.plot_compare_time_series(create_invalid_time_series_list())
diff --git a/tests/safeds/data/tabular/containers/_time_series/test_plot_lag.py b/tests/safeds/data/tabular/containers/_time_series/test_plot_lag.py
deleted file mode 100644
index 29c69a2e3..000000000
--- a/tests/safeds/data/tabular/containers/_time_series/test_plot_lag.py
+++ /dev/null
@@ -1,41 +0,0 @@
-import pytest
-from safeds.data.tabular.containers import TimeSeries
-from safeds.exceptions import NonNumericColumnError
-from syrupy import SnapshotAssertion
-
-
-def test_should_return_table(snapshot_png_image: SnapshotAssertion) -> None:
- table = TimeSeries(
- {
- "time": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
- "feature_1": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
- "target": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
- },
- target_name="target",
- time_name="time",
- feature_names=None,
- )
- lag_plot = table.plot_lagplot(lag=1)
- assert lag_plot == snapshot_png_image
-
-
-def test_should_raise_if_column_contains_non_numerical_values() -> None:
- table = TimeSeries(
- {
- "time": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
- "feature_1": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
- "target": ["1", "2", "3", "4", "5", "6", "7", "8", "9", "10"],
- },
- target_name="target",
- time_name="time",
- feature_names=None,
- )
- with pytest.raises(
- NonNumericColumnError,
- match=(
- r"Tried to do a numerical operation on one or multiple non-numerical columns: \nThis time series target"
- r" contains"
- r" non-numerical columns."
- ),
- ):
- table.plot_lagplot(2)
diff --git a/tests/safeds/data/tabular/containers/_time_series/test_plot_lineplot.py b/tests/safeds/data/tabular/containers/_time_series/test_plot_lineplot.py
deleted file mode 100644
index ff3ad83c0..000000000
--- a/tests/safeds/data/tabular/containers/_time_series/test_plot_lineplot.py
+++ /dev/null
@@ -1,265 +0,0 @@
-import pytest
-from safeds.data.tabular.containers import TimeSeries
-from safeds.exceptions import NonNumericColumnError, UnknownColumnNameError
-from syrupy import SnapshotAssertion
-
-
-def test_should_return_table(snapshot_png_image: SnapshotAssertion) -> None:
- table = TimeSeries(
- {
- "time": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
- "feature_1": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
- "target": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
- },
- target_name="target",
- time_name="time",
- feature_names=None,
- )
- plot = table.plot_lineplot()
- assert plot == snapshot_png_image
-
-
-def test_should_raise_if_column_contains_non_numerical_values_x() -> None:
- table = TimeSeries(
- {
- "time": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
- "feature_1": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
- "target": ["1", "2", "3", "4", "5", "6", "7", "8", "9", "10"],
- },
- target_name="target",
- time_name="time",
- feature_names=None,
- )
- with pytest.raises(
- NonNumericColumnError,
- match=(
- r"Tried to do a numerical operation on one or multiple non-numerical columns: \nThe time series plotted"
- r" column"
- r" contains"
- r" non-numerical columns."
- ),
- ):
- table.plot_lineplot(x_column_name="feature_1")
-
-
-def test_should_return_table_both(snapshot_png_image: SnapshotAssertion) -> None:
- table = TimeSeries(
- {
- "time": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
- "feature_1": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
- "target": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
- },
- target_name="target",
- time_name="time",
- feature_names=None,
- )
- plot = table.plot_lineplot(x_column_name="feature_1", y_column_name="target")
- assert plot == snapshot_png_image
-
-
-def test_should_plot_feature_y(snapshot_png_image: SnapshotAssertion) -> None:
- table = TimeSeries(
- {
- "time": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
- "feature_1": [10, 9, 8, 7, 6, 5, 4, 3, 2, 1],
- "target": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
- },
- target_name="target",
- time_name="time",
- feature_names=None,
- )
- plot = table.plot_lineplot(y_column_name="feature_1")
- assert plot == snapshot_png_image
-
-
-def test_should_plot_feature_x(snapshot_png_image: SnapshotAssertion) -> None:
- table = TimeSeries(
- {
- "time": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
- "feature_1": [10, 9, 8, 7, 6, 5, 4, 3, 2, 1],
- "target": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
- },
- target_name="target",
- time_name="time",
- feature_names=None,
- )
- plot = table.plot_lineplot(x_column_name="feature_1")
- assert plot == snapshot_png_image
-
-
-def test_should_plot_feature(snapshot_png_image: SnapshotAssertion) -> None:
- table = TimeSeries(
- {
- "time": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
- "feature_1": [2, 4, 6, 8, 10, 12, 14, 16, 18, 20],
- "target": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
- },
- target_name="target",
- time_name="time",
- feature_names=None,
- )
- plot = table.plot_lineplot(x_column_name="feature_1")
- assert plot == snapshot_png_image
-
-
-def test_should_raise_if_column_contains_non_numerical_values() -> None:
- table = TimeSeries(
- {
- "time": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
- "feature_1": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
- "target": ["1", "2", "3", "4", "5", "6", "7", "8", "9", "10"],
- },
- target_name="target",
- time_name="time",
- feature_names=None,
- )
- with pytest.raises(
- NonNumericColumnError,
- match=(
- r"Tried to do a numerical operation on one or multiple non-numerical columns: \nThe time series plotted"
- r" column"
- r" contains"
- r" non-numerical columns."
- ),
- ):
- table.plot_lineplot(x_column_name="target")
-
-
-@pytest.mark.parametrize(
- ("time_series", "name", "error", "error_msg"),
- [
- (
- TimeSeries(
- {
- "time": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
- "feature_1": ["1", "2", "3", "4", "5", "6", "7", "8", "9", "10"],
- "target": ["1", "2", "3", "4", "5", "6", "7", "8", "9", "10"],
- },
- target_name="target",
- time_name="time",
- feature_names=None,
- ),
- "feature_1",
- NonNumericColumnError,
- r"Tried to do a numerical operation on one or multiple non-numerical columns: \nThe time series plotted"
- r" column"
- r" contains"
- r" non-numerical columns.",
- ),
- (
- TimeSeries(
- {
- "time": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
- "feature_1": ["1", "2", "3", "4", "5", "6", "7", "8", "9", "10"],
- "target": ["1", "2", "3", "4", "5", "6", "7", "8", "9", "10"],
- },
- target_name="target",
- time_name="time",
- feature_names=None,
- ),
- "feature_3",
- UnknownColumnNameError,
- r"Could not find column\(s\) 'feature_3'.",
- ),
- ],
- ids=["feature_not_numerical", "feature_does_not_exist"],
-)
-def test_should_raise_error_optional_parameter(
- time_series: TimeSeries,
- name: str,
- error: type[Exception],
- error_msg: str,
-) -> None:
- with pytest.raises(
- error,
- match=error_msg,
- ):
- time_series.plot_lineplot(x_column_name=name)
-
-
-@pytest.mark.parametrize(
- ("time_series", "name", "error", "error_msg"),
- [
- (
- TimeSeries(
- {
- "time": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
- "feature_1": ["1", "2", "3", "4", "5", "6", "7", "8", "9", "10"],
- "target": ["1", "2", "3", "4", "5", "6", "7", "8", "9", "10"],
- },
- target_name="target",
- time_name="time",
- feature_names=None,
- ),
- "feature_1",
- NonNumericColumnError,
- r"Tried to do a numerical operation on one or multiple non-numerical columns: \nThe time series plotted"
- r" column"
- r" contains"
- r" non-numerical columns.",
- ),
- (
- TimeSeries(
- {
- "time": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
- "feature_1": ["1", "2", "3", "4", "5", "6", "7", "8", "9", "10"],
- "target": ["1", "2", "3", "4", "5", "6", "7", "8", "9", "10"],
- },
- target_name="target",
- time_name="time",
- feature_names=None,
- ),
- "feature_3",
- UnknownColumnNameError,
- r"Could not find column\(s\) 'feature_3'.",
- ),
- ],
- ids=["feature_not_numerical", "feature_does_not_exist"],
-)
-def test_should_raise_error_optional_parameter_y(
- time_series: TimeSeries,
- name: str,
- error: type[Exception],
- error_msg: str,
-) -> None:
- with pytest.raises(
- error,
- match=error_msg,
- ):
- time_series.plot_lineplot(y_column_name=name)
-
-
-def test_should_raise_if_column_does_not_exist_x() -> None:
- table = TimeSeries(
- {
- "time": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
- "feature_1": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
- "target": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
- },
- target_name="target",
- time_name="time",
- feature_names=None,
- )
- with pytest.raises(
- UnknownColumnNameError,
- match=r"Could not find column\(s\) '2'.",
- ):
- table.plot_lineplot(x_column_name="target", y_column_name="2")
-
-
-def test_should_raise_if_column_does_not_exist_y() -> None:
- table = TimeSeries(
- {
- "time": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
- "feature_1": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
- "target": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
- },
- target_name="target",
- time_name="time",
- feature_names=None,
- )
- with pytest.raises(
- UnknownColumnNameError,
- match=r"Could not find column\(s\) '2'.",
- ):
- table.plot_lineplot(x_column_name="2", y_column_name="target")
diff --git a/tests/safeds/data/tabular/containers/_time_series/test_plot_scatterplot.py b/tests/safeds/data/tabular/containers/_time_series/test_plot_scatterplot.py
deleted file mode 100644
index 739e9d135..000000000
--- a/tests/safeds/data/tabular/containers/_time_series/test_plot_scatterplot.py
+++ /dev/null
@@ -1,265 +0,0 @@
-import pytest
-from safeds.data.tabular.containers import TimeSeries
-from safeds.exceptions import NonNumericColumnError, UnknownColumnNameError
-from syrupy import SnapshotAssertion
-
-
-def test_should_return_table(snapshot_png_image: SnapshotAssertion) -> None:
- table = TimeSeries(
- {
- "time": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
- "feature_1": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
- "target": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
- },
- target_name="target",
- time_name="time",
- feature_names=None,
- )
- plot = table.plot_scatterplot()
- assert plot == snapshot_png_image
-
-
-def test_should_plot_feature(snapshot_png_image: SnapshotAssertion) -> None:
- table = TimeSeries(
- {
- "time": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
- "feature_1": [10, 9, 8, 7, 6, 5, 4, 3, 2, 1],
- "target": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
- },
- target_name="target",
- time_name="time",
- feature_names=None,
- )
- plot = table.plot_scatterplot(y_column_name="feature_1")
- assert plot == snapshot_png_image
-
-
-def test_should_plot_feature_only_x(snapshot_png_image: SnapshotAssertion) -> None:
- table = TimeSeries(
- {
- "time": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
- "feature_1": [10, 9, 8, 7, 6, 5, 4, 3, 2, 1],
- "target": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
- },
- target_name="target",
- time_name="time",
- feature_names=None,
- )
- plot = table.plot_scatterplot(x_column_name="feature_1")
- assert plot == snapshot_png_image
-
-
-def test_should_plot_feature_only_y_optional(snapshot_png_image: SnapshotAssertion) -> None:
- table = TimeSeries(
- {
- "time": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
- "feature_1": [10, 9, 8, 7, 6, 5, 4, 3, 2, 1],
- "target": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
- },
- target_name="target",
- time_name="time",
- feature_names=None,
- )
- plot = table.plot_scatterplot(y_column_name="feature_1")
- assert plot == snapshot_png_image
-
-
-def test_should_plot_feature_both_set(snapshot_png_image: SnapshotAssertion) -> None:
- table = TimeSeries(
- {
- "time": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
- "feature_1": [1, 2, 1, 2, 1, 2, 1, 2, 1, 1],
- "target": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
- },
- target_name="target",
- time_name="time",
- feature_names=None,
- )
- plot = table.plot_scatterplot(x_column_name="feature_1", y_column_name="target")
- assert plot == snapshot_png_image
-
-
-def test_should_raise_if_column_contains_non_numerical_values() -> None:
- table = TimeSeries(
- {
- "time": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
- "feature_1": ["1", "2", "3", "4", "5", "6", "7", "8", "9", "10"],
- "target": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
- },
- target_name="target",
- time_name="time",
- feature_names=None,
- )
- with pytest.raises(
- NonNumericColumnError,
- match=(
- r"Tried to do a numerical operation on one or multiple non-numerical columns: \nThe time series plotted"
- r" column"
- r" contains"
- r" non-numerical columns."
- ),
- ):
- table.plot_scatterplot(y_column_name="feature_1")
-
-
-def test_should_raise_if_column_contains_non_numerical_values_x() -> None:
- table = TimeSeries(
- {
- "time": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
- "feature_1": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
- "target": ["1", "2", "3", "4", "5", "6", "7", "8", "9", "10"],
- },
- target_name="target",
- time_name="time",
- feature_names=None,
- )
- with pytest.raises(
- NonNumericColumnError,
- match=(
- r"Tried to do a numerical operation on one or multiple non-numerical columns: \nThe time series plotted"
- r" column"
- r" contains"
- r" non-numerical columns."
- ),
- ):
- table.plot_scatterplot(x_column_name="feature_1")
-
-
-@pytest.mark.parametrize(
- ("time_series", "name", "error", "error_msg"),
- [
- (
- TimeSeries(
- {
- "time": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
- "feature_1": ["1", "2", "3", "4", "5", "6", "7", "8", "9", "10"],
- "target": ["1", "2", "3", "4", "5", "6", "7", "8", "9", "10"],
- },
- target_name="target",
- time_name="time",
- feature_names=None,
- ),
- "feature_1",
- NonNumericColumnError,
- r"Tried to do a numerical operation on one or multiple non-numerical columns: \nThe time series plotted"
- r" column"
- r" contains"
- r" non-numerical columns.",
- ),
- (
- TimeSeries(
- {
- "time": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
- "feature_1": ["1", "2", "3", "4", "5", "6", "7", "8", "9", "10"],
- "target": ["1", "2", "3", "4", "5", "6", "7", "8", "9", "10"],
- },
- target_name="target",
- time_name="time",
- feature_names=None,
- ),
- "feature_3",
- UnknownColumnNameError,
- r"Could not find column\(s\) 'feature_3'.",
- ),
- ],
- ids=["feature_not_numerical", "feature_does_not_exist"],
-)
-def test_should_raise_error_optional_parameter(
- time_series: TimeSeries,
- name: str,
- error: type[Exception],
- error_msg: str,
-) -> None:
- with pytest.raises(
- error,
- match=error_msg,
- ):
- time_series.plot_scatterplot(x_column_name=name)
-
-
-@pytest.mark.parametrize(
- ("time_series", "name", "error", "error_msg"),
- [
- (
- TimeSeries(
- {
- "time": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
- "feature_1": ["1", "2", "3", "4", "5", "6", "7", "8", "9", "10"],
- "target": ["1", "2", "3", "4", "5", "6", "7", "8", "9", "10"],
- },
- target_name="target",
- time_name="time",
- feature_names=None,
- ),
- "feature_1",
- NonNumericColumnError,
- r"Tried to do a numerical operation on one or multiple non-numerical columns: \nThe time series plotted"
- r" column"
- r" contains"
- r" non-numerical columns.",
- ),
- (
- TimeSeries(
- {
- "time": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
- "feature_1": ["1", "2", "3", "4", "5", "6", "7", "8", "9", "10"],
- "target": ["1", "2", "3", "4", "5", "6", "7", "8", "9", "10"],
- },
- target_name="target",
- time_name="time",
- feature_names=None,
- ),
- "feature_3",
- UnknownColumnNameError,
- r"Could not find column\(s\) 'feature_3'.",
- ),
- ],
- ids=["feature_not_numerical", "feature_does_not_exist"],
-)
-def test_should_raise_error_optional_parameter_y(
- time_series: TimeSeries,
- name: str,
- error: type[Exception],
- error_msg: str,
-) -> None:
- with pytest.raises(
- error,
- match=error_msg,
- ):
- time_series.plot_scatterplot(y_column_name=name)
-
-
-def test_should_raise_if_column_does_not_exist_y() -> None:
- table = TimeSeries(
- {
- "time": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
- "feature_1": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
- "target": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
- },
- target_name="target",
- time_name="time",
- feature_names=None,
- )
- with pytest.raises(
- UnknownColumnNameError,
- match=r"Could not find column\(s\) '2'.",
- ):
- table.plot_scatterplot(x_column_name="target", y_column_name="2")
-
-
-def test_should_raise_if_column_does_not_exist_x() -> None:
- table = TimeSeries(
- {
- "time": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
- "feature_1": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
- "target": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
- },
- target_name="target",
- time_name="time",
- feature_names=None,
- )
- with pytest.raises(
- UnknownColumnNameError,
- match=r"Could not find column\(s\) '2'.",
- ):
- table.plot_scatterplot(x_column_name="2")
diff --git a/tests/safeds/data/tabular/containers/_time_series/test_remove_columns.py b/tests/safeds/data/tabular/containers/_time_series/test_remove_columns.py
deleted file mode 100644
index 5a51e70e1..000000000
--- a/tests/safeds/data/tabular/containers/_time_series/test_remove_columns.py
+++ /dev/null
@@ -1,205 +0,0 @@
-import pytest
-from safeds.data.tabular.containers import Table, TimeSeries
-from safeds.exceptions import ColumnIsTargetError, ColumnIsTimeError
-
-from tests.helpers import assert_that_time_series_are_equal
-
-
-@pytest.mark.parametrize(
- ("table", "columns", "expected"),
- [
- (
- TimeSeries._from_table(
- Table(
- {
- "time": [0, 1, 2],
- "feat_1": [1, 2, 3],
- "feat_2": [4, 5, 6],
- "non_feat_1": [2, 4, 6],
- "non_feat_2": [3, 6, 9],
- "target": [7, 8, 9],
- },
- ),
- "target",
- "time",
- ["feat_1", "feat_2"],
- ),
- ["feat_2"],
- TimeSeries._from_table(
- Table({
- "time": [0, 1, 2],
- "feat_1": [1, 2, 3],
- "non_feat_1": [2, 4, 6],
- "non_feat_2": [3, 6, 9],
- "target": [7, 8, 9],
- }),
- "target",
- "time",
- ["feat_1"],
- ),
- ),
- (
- TimeSeries._from_table(
- Table(
- {
- "time": [0, 1, 2],
- "feat_1": [1, 2, 3],
- "feat_2": [4, 5, 6],
- "non_feat_1": [2, 4, 6],
- "non_feat_2": [3, 6, 9],
- "target": [7, 8, 9],
- },
- ),
- "target",
- "time",
- ["feat_1", "feat_2"],
- ),
- ["non_feat_2"],
- TimeSeries._from_table(
- Table({
- "time": [0, 1, 2],
- "feat_1": [1, 2, 3],
- "feat_2": [4, 5, 6],
- "non_feat_1": [2, 4, 6],
- "target": [7, 8, 9],
- }),
- "target",
- "time",
- ["feat_1", "feat_2"],
- ),
- ),
- (
- TimeSeries._from_table(
- Table(
- {
- "time": [0, 1, 2],
- "feat_1": [1, 2, 3],
- "feat_2": [4, 5, 6],
- "non_feat_1": [2, 4, 6],
- "non_feat_2": [3, 6, 9],
- "target": [7, 8, 9],
- },
- ),
- "target",
- "time",
- ["feat_1", "feat_2"],
- ),
- ["non_feat_1", "non_feat_2"],
- TimeSeries._from_table(
- Table({"time": [0, 1, 2], "feat_1": [1, 2, 3], "feat_2": [4, 5, 6], "target": [7, 8, 9]}),
- "target",
- "time",
- ["feat_1", "feat_2"],
- ),
- ),
- (
- TimeSeries._from_table(
- Table(
- {
- "time": [0, 1, 2],
- "feat_1": [1, 2, 3],
- "feat_2": [4, 5, 6],
- "non_feat_1": [2, 4, 6],
- "non_feat_2": [3, 6, 9],
- "target": [7, 8, 9],
- },
- ),
- "target",
- "time",
- ["feat_1", "feat_2"],
- ),
- ["feat_2", "non_feat_2"],
- TimeSeries._from_table(
- Table({"time": [0, 1, 2], "feat_1": [1, 2, 3], "non_feat_1": [2, 4, 6], "target": [7, 8, 9]}),
- "target",
- "time",
- ["feat_1"],
- ),
- ),
- (
- TimeSeries._from_table(
- Table(
- {
- "time": [0, 1, 2],
- "feat_1": [1, 2, 3],
- "non_feat_1": [2, 4, 6],
- "target": [7, 8, 9],
- },
- ),
- "target",
- "time",
- ["feat_1"],
- ),
- [],
- TimeSeries._from_table(
- Table({"time": [0, 1, 2], "feat_1": [1, 2, 3], "non_feat_1": [2, 4, 6], "target": [7, 8, 9]}),
- "target",
- "time",
- ["feat_1"],
- ),
- ),
- ],
- ids=[
- "remove_feature",
- "remove_non_feature",
- "remove_all_non_features",
- "remove_some_feat_and_some_non_feat",
- "remove_nothing",
- ],
-)
-def test_should_remove_columns(table: TimeSeries, columns: list[str], expected: TimeSeries) -> None:
- new_table = table.remove_columns(columns)
- assert_that_time_series_are_equal(new_table, expected)
-
-
-@pytest.mark.parametrize(
- ("table", "columns", "error", "error_msg"),
- [
- (
- TimeSeries._from_table(
- Table({"time": [0, 1, 2], "feat": [1, 2, 3], "non_feat": [1, 2, 3], "target": [4, 5, 6]}),
- "target",
- "time",
- ["feat"],
- ),
- ["target"],
- ColumnIsTargetError,
- r'Illegal schema modification: Column "target" is the target column and cannot be removed.',
- ),
- (
- TimeSeries._from_table(
- Table({"time": [0, 1, 2], "feat": [1, 2, 3], "non_feat": [1, 2, 3], "target": [4, 5, 6]}),
- "target",
- "time",
- ["feat"],
- ),
- ["non_feat", "target"],
- ColumnIsTargetError,
- r'Illegal schema modification: Column "target" is the target column and cannot be removed.',
- ),
- (
- TimeSeries._from_table(
- Table({"time": [0, 1, 2], "feat": [1, 2, 3], "non_feat": [1, 2, 3], "target": [4, 5, 6]}),
- "target",
- "time",
- ["feat"],
- ),
- ["time"],
- ColumnIsTimeError,
- r'Illegal schema modification: Column "time" is the time column and cannot be removed.',
- ),
- ],
- ids=[
- "remove_only_target",
- "remove_non_feat_and_target",
- "remove_time_column",
- ],
-)
-def test_should_raise_in_remove_columns(
- table: TimeSeries,
- columns: list[str],
- error: type[Exception],
- error_msg: str,
-) -> None:
- with pytest.raises(error, match=error_msg):
- table.remove_columns(columns)
diff --git a/tests/safeds/data/tabular/containers/_time_series/test_remove_columns_with_missing_values.py b/tests/safeds/data/tabular/containers/_time_series/test_remove_columns_with_missing_values.py
deleted file mode 100644
index 319e27c5f..000000000
--- a/tests/safeds/data/tabular/containers/_time_series/test_remove_columns_with_missing_values.py
+++ /dev/null
@@ -1,189 +0,0 @@
-import pytest
-from safeds.data.tabular.containers import TimeSeries
-from safeds.exceptions import ColumnIsTargetError, ColumnIsTimeError
-
-from tests.helpers import assert_that_time_series_are_equal
-
-
-@pytest.mark.parametrize(
- ("table", "expected"),
- [
- (
- TimeSeries(
- {
- "time": [0, 1, 2],
- "feature_complete": [0, 1, 2],
- "feature_incomplete": [3, None, 5],
- "non_feature_complete": [7, 8, 9],
- "target": [3, 4, 5],
- },
- "target",
- "time",
- ["feature_complete", "feature_incomplete"],
- ),
- TimeSeries(
- {
- "time": [0, 1, 2],
- "feature_complete": [0, 1, 2],
- "non_feature_complete": [7, 8, 9],
- "target": [3, 4, 5],
- },
- "target",
- "time",
- ["feature_complete"],
- ),
- ),
- (
- TimeSeries(
- {
- "time": [0, 1, 2],
- "feature_complete": [0, 1, 2],
- "non_feature_complete": [7, 8, 9],
- "non_feature_incomplete": [3, None, 5],
- "target": [3, 4, 5],
- },
- "target",
- "time",
- ["feature_complete"],
- ),
- TimeSeries(
- {
- "time": [0, 1, 2],
- "feature_complete": [0, 1, 2],
- "non_feature_complete": [7, 8, 9],
- "target": [3, 4, 5],
- },
- "target",
- "time",
- ["feature_complete"],
- ),
- ),
- (
- TimeSeries(
- {
- "time": [0, 1, 2],
- "feature_complete": [0, 1, 2],
- "non_feature_complete": [7, 8, 9],
- "target": [3, 4, 5],
- },
- "target",
- "time",
- ["feature_complete"],
- ),
- TimeSeries(
- {
- "time": [0, 1, 2],
- "feature_complete": [0, 1, 2],
- "non_feature_complete": [7, 8, 9],
- "target": [3, 4, 5],
- },
- "target",
- "time",
- ["feature_complete"],
- ),
- ),
- ],
- ids=["incomplete_feature", "incomplete_non_feature", "all_complete"],
-)
-def test_should_remove_columns_with_non_numerical_values(table: TimeSeries, expected: TimeSeries) -> None:
- new_table = table.remove_columns_with_missing_values()
- assert_that_time_series_are_equal(new_table, expected)
-
-
-@pytest.mark.parametrize(
- ("table", "error", "error_msg"),
- [
- (
- TimeSeries(
- {
- "time": [0, 1, 2],
- "feature": [0, 1, 2],
- "non_feature": [1, 2, 3],
- "target": [3, None, 5],
- },
- "target",
- "time",
- ["feature"],
- ),
- ColumnIsTargetError,
- 'Illegal schema modification: Column "target" is the target column and cannot be removed.',
- ),
- (
- TimeSeries(
- {
- "time": [0, 1, 2],
- "feature": [0, None, 2],
- "non_feature": [1, 2, 3],
- "target": [None, 4, 5],
- },
- "target",
- "time",
- ["feature"],
- ),
- ColumnIsTargetError,
- 'Illegal schema modification: Column "target" is the target column and cannot be removed.',
- ),
- (
- TimeSeries(
- {
- "time": [0, None, 2],
- "feature": [0, 1, 2],
- "non_feature": [1, 2, 3],
- "target": [3, 4, 5],
- },
- "target",
- "time",
- ["feature"],
- ),
- ColumnIsTimeError,
- 'Illegal schema modification: Column "time" is the time column and cannot be removed.',
- ),
- (
- TimeSeries(
- {
- "time": [0, 1, 2],
- "feature": [0, 1, 2],
- "non_feature": [1, 2, 3],
- "target": [3, 4, None],
- },
- "target",
- "time",
- ["feature"],
- ),
- ColumnIsTargetError,
- 'Illegal schema modification: Column "target" is the target column and cannot be removed.',
- ),
- (
- TimeSeries(
- {
- "time": [0, 1, 2],
- "feature": [0, None, 2],
- "non_feature": [1, None, 3],
- "target": [3, None, 5],
- },
- "target",
- "time",
- ["feature"],
- ),
- ColumnIsTargetError,
- 'Illegal schema modification: Column "target" is the target column and cannot be removed.',
- ),
- ],
- ids=[
- "only_target_incomplete",
- "also_feature_incomplete",
- "time_is_incomplete",
- "also_non_feature_incomplete",
- "all_incomplete",
- ],
-)
-def test_should_raise_in_remove_columns_with_missing_values(
- table: TimeSeries,
- error: type[Exception],
- error_msg: str,
-) -> None:
- with pytest.raises(
- error,
- match=error_msg,
- ):
- table.remove_columns_with_missing_values()
diff --git a/tests/safeds/data/tabular/containers/_time_series/test_remove_columns_with_non_numerical_values.py b/tests/safeds/data/tabular/containers/_time_series/test_remove_columns_with_non_numerical_values.py
deleted file mode 100644
index 03d6e8572..000000000
--- a/tests/safeds/data/tabular/containers/_time_series/test_remove_columns_with_non_numerical_values.py
+++ /dev/null
@@ -1,186 +0,0 @@
-import pytest
-from safeds.data.tabular.containers import TimeSeries
-from safeds.exceptions import ColumnIsTargetError, ColumnIsTimeError
-
-from tests.helpers import assert_that_time_series_are_equal
-
-
-@pytest.mark.parametrize(
- ("table", "expected"),
- [
- (
- TimeSeries(
- {
- "time": [0, 1, 2],
- "feature_numerical": [0, 1, 2],
- "feature_non_numerical": ["a", "b", "c"],
- "non_feature_numerical": [7, 8, 9],
- "target": [3, 4, 5],
- },
- "target",
- "time",
- ["feature_numerical", "feature_non_numerical"],
- ),
- TimeSeries(
- {
- "time": [0, 1, 2],
- "feature_numerical": [0, 1, 2],
- "non_feature_numerical": [7, 8, 9],
- "target": [3, 4, 5],
- },
- "target",
- "time",
- ["feature_numerical"],
- ),
- ),
- (
- TimeSeries(
- {
- "time": [0, 1, 2],
- "feature_numerical": [0, 1, 2],
- "non_feature_numerical": [7, 8, 9],
- "non_feature_non_numerical": ["a", "b", "c"],
- "target": [3, 4, 5],
- },
- "target",
- "time",
- ["feature_numerical"],
- ),
- TimeSeries(
- {
- "time": [0, 1, 2],
- "feature_numerical": [0, 1, 2],
- "non_feature_numerical": [7, 8, 9],
- "target": [3, 4, 5],
- },
- "target",
- "time",
- ["feature_numerical"],
- ),
- ),
- (
- TimeSeries(
- {
- "time": [0, 1, 2],
- "feature_numerical": [0, 1, 2],
- "non_feature_numerical": [7, 8, 9],
- "target": [3, 4, 5],
- },
- "target",
- "time",
- ["feature_numerical"],
- ),
- TimeSeries(
- {
- "time": [0, 1, 2],
- "feature_numerical": [0, 1, 2],
- "non_feature_numerical": [7, 8, 9],
- "target": [3, 4, 5],
- },
- "target",
- "time",
- ["feature_numerical"],
- ),
- ),
- ],
- ids=["non_numerical_feature", "non_numerical_non_feature", "all_numerical"],
-)
-def test_should_remove_columns_with_non_numerical_values(table: TimeSeries, expected: TimeSeries) -> None:
- new_table = table.remove_columns_with_non_numerical_values()
- assert_that_time_series_are_equal(new_table, expected)
-
-
-@pytest.mark.parametrize(
- ("table", "error", "error_msg"),
- [
- (
- TimeSeries(
- {
- "time": [0, 1, 2],
- "feature": [0, 1, 2],
- "non_feature": [1, 2, 3],
- "target": ["a", "b", "c"],
- },
- "target",
- "time",
- ["feature"],
- ),
- ColumnIsTargetError,
- r'Illegal schema modification: Column "target" is the target column and cannot be removed.',
- ),
- (
- TimeSeries(
- {
- "time": [0, 1, 2],
- "feature": [0, "x", 2],
- "non_feature": [1, 2, 3],
- "target": ["a", "b", "c"],
- },
- "target",
- "time",
- ["feature"],
- ),
- ColumnIsTargetError,
- r'Illegal schema modification: Column "target" is the target column and cannot be removed.',
- ),
- (
- TimeSeries(
- {
- "time": [0, 1, 2],
- "feature": [0, 1, 2],
- "non_feature": [1, "x", 3],
- "target": ["a", "b", "c"],
- },
- "target",
- "time",
- ["feature"],
- ),
- ColumnIsTargetError,
- r'Illegal schema modification: Column "target" is the target column and cannot be removed.',
- ),
- (
- TimeSeries(
- {
- "time": ["!", "x", "2"],
- "feature": [0, 1, 2],
- "non_feature": [1, "x", 3],
- "target": [1, 2, 3],
- },
- "target",
- "time",
- ["feature"],
- ),
- ColumnIsTimeError,
- r'Illegal schema modification: Column "time" is the time column and cannot be removed.',
- ),
- (
- TimeSeries(
- {
- "time": [0, 1, 2],
- "feature": [0, "x", 2],
- "non_feature": [1, "x", 3],
- "target": ["a", "b", "c"],
- },
- "target",
- "time",
- ["feature"],
- ),
- ColumnIsTargetError,
- r'Illegal schema modification: Column "target" is the target column and cannot be removed.',
- ),
- ],
- ids=[
- "only_target_non_numerical",
- "also_feature_non_numerical",
- "also_non_feature_non_numerical",
- "time_non_numerical",
- "all_non_numerical",
- ],
-)
-def test_should_raise_in_remove_columns_with_non_numerical_values(
- table: TimeSeries,
- error: type[Exception],
- error_msg: str,
-) -> None:
- with pytest.raises(error, match=error_msg):
- table.remove_columns_with_non_numerical_values()
diff --git a/tests/safeds/data/tabular/containers/_time_series/test_remove_duplicate_rows.py b/tests/safeds/data/tabular/containers/_time_series/test_remove_duplicate_rows.py
deleted file mode 100644
index a4e0a3426..000000000
--- a/tests/safeds/data/tabular/containers/_time_series/test_remove_duplicate_rows.py
+++ /dev/null
@@ -1,55 +0,0 @@
-import pytest
-from safeds.data.tabular.containers import TimeSeries
-
-from tests.helpers import assert_that_time_series_are_equal
-
-
-@pytest.mark.parametrize(
- ("table", "expected"),
- [
- (
- TimeSeries(
- {
- "time": [0, 0, 1],
- "feature": [0, 0, 1],
- "target": [2, 2, 3],
- },
- "target",
- "time",
- ),
- TimeSeries(
- {
- "time": [0, 1],
- "feature": [0, 1],
- "target": [2, 3],
- },
- "target",
- "time",
- ),
- ),
- (
- TimeSeries(
- {
- "time": [0, 0, 1],
- "feature": [0, 1, 2],
- "target": [2, 2, 3],
- },
- "target",
- "time",
- ),
- TimeSeries(
- {
- "time": [0, 0, 1],
- "feature": [0, 1, 2],
- "target": [2, 2, 3],
- },
- "target",
- "time",
- ),
- ),
- ],
- ids=["with_duplicate_rows", "without_duplicate_rows"],
-)
-def test_should_remove_duplicate_rows(table: TimeSeries, expected: TimeSeries) -> None:
- new_table = table.remove_duplicate_rows()
- assert_that_time_series_are_equal(new_table, expected)
diff --git a/tests/safeds/data/tabular/containers/_time_series/test_remove_rows_with_missing_values.py b/tests/safeds/data/tabular/containers/_time_series/test_remove_rows_with_missing_values.py
deleted file mode 100644
index 078151ac9..000000000
--- a/tests/safeds/data/tabular/containers/_time_series/test_remove_rows_with_missing_values.py
+++ /dev/null
@@ -1,55 +0,0 @@
-import pytest
-from safeds.data.tabular.containers import TimeSeries
-
-from tests.helpers import assert_that_time_series_are_equal
-
-
-@pytest.mark.parametrize(
- ("table", "expected"),
- [
- (
- TimeSeries(
- {
- "time": [0, 1, 2],
- "feature": [0.0, None, 2.0],
- "target": [3.0, 4.0, 5.0],
- },
- "target",
- "time",
- ),
- TimeSeries(
- {
- "time": [0, 2],
- "feature": [0.0, 2.0],
- "target": [3.0, 5.0],
- },
- "target",
- "time",
- ),
- ),
- (
- TimeSeries(
- {
- "time": [0, 1, 2],
- "feature": [0.0, 1.0, 2.0],
- "target": [3.0, 4.0, 5.0],
- },
- "target",
- "time",
- ),
- TimeSeries(
- {
- "time": [0, 1, 2],
- "feature": [0.0, 1.0, 2.0],
- "target": [3.0, 4.0, 5.0],
- },
- "target",
- "time",
- ),
- ),
- ],
- ids=["with_missing_values", "without_missing_values"],
-)
-def test_should_remove_rows_with_missing_values(table: TimeSeries, expected: TimeSeries) -> None:
- new_table = table.remove_rows_with_missing_values()
- assert_that_time_series_are_equal(new_table, expected)
diff --git a/tests/safeds/data/tabular/containers/_time_series/test_remove_rows_with_outliers.py b/tests/safeds/data/tabular/containers/_time_series/test_remove_rows_with_outliers.py
deleted file mode 100644
index 8d206c65c..000000000
--- a/tests/safeds/data/tabular/containers/_time_series/test_remove_rows_with_outliers.py
+++ /dev/null
@@ -1,55 +0,0 @@
-import pytest
-from safeds.data.tabular.containers import TimeSeries
-
-from tests.helpers import assert_that_time_series_are_equal
-
-
-@pytest.mark.parametrize(
- ("table", "expected"),
- [
- (
- TimeSeries(
- {
- "time": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9],
- "feature": [1.0, 11.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0],
- "target": [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0],
- },
- "target",
- "time",
- ),
- TimeSeries(
- {
- "time": [0, 2, 3, 4, 5, 6, 7, 8, 9],
- "feature": [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0],
- "target": [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0],
- },
- "target",
- "time",
- ),
- ),
- (
- TimeSeries(
- {
- "time": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9],
- "feature": [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0],
- "target": [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0],
- },
- "target",
- "time",
- ),
- TimeSeries(
- {
- "time": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9],
- "feature": [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0],
- "target": [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0],
- },
- "target",
- "time",
- ),
- ),
- ],
- ids=["with_outliers", "no_outliers"],
-)
-def test_should_remove_rows_with_outliers(table: TimeSeries, expected: TimeSeries) -> None:
- new_table = table.remove_rows_with_outliers()
- assert_that_time_series_are_equal(new_table, expected)
diff --git a/tests/safeds/data/tabular/containers/_time_series/test_rename_column.py b/tests/safeds/data/tabular/containers/_time_series/test_rename_column.py
deleted file mode 100644
index a0214b4ab..000000000
--- a/tests/safeds/data/tabular/containers/_time_series/test_rename_column.py
+++ /dev/null
@@ -1,124 +0,0 @@
-import pytest
-from safeds.data.tabular.containers import TimeSeries
-
-from tests.helpers import assert_that_time_series_are_equal
-
-
-@pytest.mark.parametrize(
- ("original_table", "old_column_name", "new_column_name", "result_table"),
- [
- (
- TimeSeries(
- {
- "time": [0, 1, 2],
- "feature_old": [0, 1, 2],
- "no_feature": [2, 3, 4],
- "target": [3, 4, 5],
- },
- target_name="target",
- time_name="time",
- feature_names=["feature_old"],
- ),
- "feature_old",
- "feature_new",
- TimeSeries(
- {
- "time": [0, 1, 2],
- "feature_new": [0, 1, 2],
- "no_feature": [2, 3, 4],
- "target": [3, 4, 5],
- },
- target_name="target",
- time_name="time",
- feature_names=["feature_new"],
- ),
- ),
- (
- TimeSeries(
- {
- "time": [0, 1, 2],
- "feature": [0, 1, 2],
- "no_feature": [2, 3, 4],
- "target_old": [3, 4, 5],
- },
- target_name="target_old",
- time_name="time",
- feature_names=["feature"],
- ),
- "target_old",
- "target_new",
- TimeSeries(
- {
- "time": [0, 1, 2],
- "feature": [0, 1, 2],
- "no_feature": [2, 3, 4],
- "target_new": [3, 4, 5],
- },
- target_name="target_new",
- time_name="time",
- feature_names=["feature"],
- ),
- ),
- (
- TimeSeries(
- {
- "time": [0, 1, 2],
- "feature": [0, 1, 2],
- "no_feature_old": [2, 3, 4],
- "target": [3, 4, 5],
- },
- target_name="target",
- time_name="time",
- feature_names=["feature"],
- ),
- "no_feature_old",
- "no_feature_new",
- TimeSeries(
- {
- "time": [0, 1, 2],
- "feature": [0, 1, 2],
- "no_feature_new": [2, 3, 4],
- "target": [3, 4, 5],
- },
- target_name="target",
- time_name="time",
- feature_names=["feature"],
- ),
- ),
- (
- TimeSeries(
- {
- "time": [0, 1, 2],
- "feature": [0, 1, 2],
- "no_feature_old": [2, 3, 4],
- "target": [3, 4, 5],
- },
- target_name="target",
- time_name="time",
- feature_names=["feature"],
- ),
- "time",
- "new_time",
- TimeSeries(
- {
- "new_time": [0, 1, 2],
- "feature": [0, 1, 2],
- "no_feature_old": [2, 3, 4],
- "target": [3, 4, 5],
- },
- target_name="target",
- time_name="new_time",
- feature_names=["feature"],
- ),
- ),
- ],
- ids=["rename_feature_column", "rename_target_column", "rename_non_feature_column", "rename_time_column"],
-)
-def test_should_rename_column(
- original_table: TimeSeries,
- old_column_name: str,
- new_column_name: str,
- result_table: TimeSeries,
-) -> None:
- new_table = original_table.rename_column(old_column_name, new_column_name)
- assert_that_time_series_are_equal(new_table, result_table)
diff --git a/tests/safeds/data/tabular/containers/_time_series/test_replace_column.py b/tests/safeds/data/tabular/containers/_time_series/test_replace_column.py
deleted file mode 100644
index 818f6580a..000000000
--- a/tests/safeds/data/tabular/containers/_time_series/test_replace_column.py
+++ /dev/null
@@ -1,248 +0,0 @@
-import pytest
-from safeds.data.tabular.containers import Column, TimeSeries
-from safeds.exceptions import IllegalSchemaModificationError
-
-from tests.helpers import assert_that_time_series_are_equal
-
-
-@pytest.mark.parametrize(
- ("original_table", "new_columns", "column_name_to_be_replaced", "result_table"),
- [
- (
- TimeSeries(
- {
- "time": [0, 1, 2],
- "feature_old": [0, 1, 2],
- "no_feature_old": [2, 3, 4],
- "target_old": [3, 4, 5],
- },
- "target_old",
- "time",
- ["feature_old"],
- ),
- [Column("feature_new", [2, 1, 0])],
- "feature_old",
- TimeSeries(
- {
- "time": [0, 1, 2],
- "feature_new": [2, 1, 0],
- "no_feature_old": [2, 3, 4],
- "target_old": [3, 4, 5],
- },
- "target_old",
- "time",
- ["feature_new"],
- ),
- ),
- (
- TimeSeries(
- {
- "time": [0, 1, 2],
- "feature_old": [0, 1, 2],
- "no_feature_old": [2, 3, 4],
- "target_old": [3, 4, 5],
- },
- "target_old",
- "time",
- ["feature_old"],
- ),
- [Column("feature_new_a", [2, 1, 0]), Column("feature_new_b", [4, 2, 0])],
- "feature_old",
- TimeSeries(
- {
- "time": [0, 1, 2],
- "feature_new_a": [2, 1, 0],
- "feature_new_b": [4, 2, 0],
- "no_feature_old": [2, 3, 4],
- "target_old": [3, 4, 5],
- },
- "target_old",
- "time",
- ["feature_new_a", "feature_new_b"],
- ),
- ),
- (
- TimeSeries(
- {
- "time": [0, 1, 2],
- "feature_old": [0, 1, 2],
- "no_feature_old": [2, 3, 4],
- "target_old": [3, 4, 5],
- },
- "target_old",
- "time",
- ["feature_old"],
- ),
- [Column("no_feature_new", [2, 1, 0])],
- "no_feature_old",
- TimeSeries(
- {
- "time": [0, 1, 2],
- "feature_old": [0, 1, 2],
- "no_feature_new": [2, 1, 0],
- "target_old": [3, 4, 5],
- },
- "target_old",
- "time",
- ["feature_old"],
- ),
- ),
- (
- TimeSeries(
- {
- "time": [0, 1, 2],
- "feature_old": [0, 1, 2],
- "no_feature_old": [2, 3, 4],
- "target_old": [3, 4, 5],
- },
- "target_old",
- "time",
- ["feature_old"],
- ),
- [Column("no_feature_new_a", [2, 1, 0]), Column("no_feature_new_b", [4, 2, 0])],
- "no_feature_old",
- TimeSeries(
- {
- "time": [0, 1, 2],
- "feature_old": [0, 1, 2],
- "no_feature_new_a": [2, 1, 0],
- "no_feature_new_b": [4, 2, 0],
- "target_old": [3, 4, 5],
- },
- "target_old",
- "time",
- ["feature_old"],
- ),
- ),
- (
- TimeSeries(
- {
- "time": [0, 1, 2],
- "feature_old": [0, 1, 2],
- "no_feature_old": [2, 3, 4],
- "target_old": [3, 4, 5],
- },
- "target_old",
- "time",
- ["feature_old"],
- ),
- [Column("target_new", [2, 1, 0])],
- "target_old",
- TimeSeries(
- {
- "time": [0, 1, 2],
- "feature_old": [0, 1, 2],
- "no_feature_old": [2, 3, 4],
- "target_new": [2, 1, 0],
- },
- "target_new",
- "time",
- ["feature_old"],
- ),
- ),
- (
- TimeSeries(
- {
- "time_old": [0, 1, 2],
- "feature_old": [0, 1, 2],
- "no_feature_old": [2, 3, 4],
- "target_old": [3, 4, 5],
- },
- "target_old",
- "time_old",
- ["feature_old"],
- ),
- [Column("time_new", [1, 2, 3])],
- "time_old",
- TimeSeries(
- {
- "time_new": [1, 2, 3],
- "feature_old": [0, 1, 2],
- "no_feature_old": [2, 3, 4],
- "target_old": [3, 4, 5],
- },
- "target_old",
- "time_new",
- ["feature_old"],
- ),
- ),
- ],
- ids=[
- "replace_feature_column_with_one",
- "replace_feature_column_with_multiple",
- "replace_non_feature_column_with_one",
- "replace_non_feature_column_with_multiple",
- "replace_target_column",
- "replace_time_column",
- ],
-)
-def test_should_replace_column(
- original_table: TimeSeries,
- new_columns: list[Column],
- column_name_to_be_replaced: str,
- result_table: TimeSeries,
-) -> None:
- new_table = original_table.replace_column(column_name_to_be_replaced, new_columns)
- assert_that_time_series_are_equal(new_table, result_table)
-
-
-@pytest.mark.parametrize(
- ("original_table", "new_columns", "column_name_to_be_replaced", "error"),
- [
- (
- TimeSeries(
- {
- "time": [0, 1, 2],
- "feature_old": [0, 1, 2],
- "target_old": [3, 4, 5],
- },
- "target_old",
- "time",
- ),
- [],
- "target_old",
- 'Target column "target_old" can only be replaced by exactly one new column.',
- ),
- (
- TimeSeries(
- {
- "time": [0, 1, 2],
- "feature_old": [0, 1, 2],
- "target_old": [3, 4, 5],
- },
- "target_old",
- "time",
- ),
- [Column("target_new_a", [2, 1, 0]), Column("target_new_b"), [4, 2, 0]],
- "target_old",
- 'Target column "target_old" can only be replaced by exactly one new column.',
- ),
- (
- TimeSeries(
- {
- "time": [0, 1, 2],
- "feature_old": [0, 1, 2],
- "target_old": [3, 4, 5],
- },
- "target_old",
- "time",
- ),
- [Column("target_new_a", [2, 1, 0]), Column("target_new_b"), [4, 2, 0]],
- "time",
- 'Time column "time" can only be replaced by exactly one new column.',
- ),
- ],
- ids=["zero_columns", "multiple_columns", "time_column"],
-)
-# here should be tested with time column as well but the test is weird to be extended
-def test_should_throw_illegal_schema_modification(
- original_table: TimeSeries,
- new_columns: list[Column],
- column_name_to_be_replaced: str,
- error: str,
-) -> None:
- with pytest.raises(
- IllegalSchemaModificationError,
- match=error,
- ):
- original_table.replace_column(column_name_to_be_replaced, new_columns)
diff --git a/tests/safeds/data/tabular/containers/_time_series/test_slice_rows.py b/tests/safeds/data/tabular/containers/_time_series/test_slice_rows.py
deleted file mode 100644
index e8788e52d..000000000
--- a/tests/safeds/data/tabular/containers/_time_series/test_slice_rows.py
+++ /dev/null
@@ -1,58 +0,0 @@
-import pytest
-from _pytest.python_api import raises
-from safeds.data.tabular.containers import TimeSeries
-from safeds.exceptions import IndexOutOfBoundsError
-
-from tests.helpers import assert_that_time_series_are_equal
-
-
-@pytest.mark.parametrize(
- ("table", "test_table", "second_test_table"),
- [
- (
- TimeSeries(
- data={"time": [0, 1, 2], "feature": [1, 2, 1], "non_feature": [0, 2, 4], "target": [1, 2, 4]},
- target_name="target",
- time_name="time",
- feature_names=["non_feature"],
- ),
- TimeSeries(
- data={"time": [0, 1], "feature": [1, 2], "non_feature": [0, 2], "target": [1, 2]},
- target_name="target",
- time_name="time",
- feature_names=["non_feature"],
- ),
- TimeSeries(
- {"time": [0, 2], "feature": [1, 1], "non_feature": [0, 4], "target": [1, 4]},
- target_name="target",
- time_name="time",
- feature_names=["non_feature"],
- ),
- ),
- ],
- ids=["Table with three rows"],
-)
-def test_should_slice_rows(table: TimeSeries, test_table: TimeSeries, second_test_table: TimeSeries) -> None:
- new_table = table.slice_rows(0, 2, 1)
- second_new_table = table.slice_rows(0, 3, 2)
- third_new_table = table.slice_rows()
- assert_that_time_series_are_equal(new_table, test_table)
- assert_that_time_series_are_equal(second_new_table, second_test_table)
- assert_that_time_series_are_equal(third_new_table, table)
-
-
-@pytest.mark.parametrize(
- ("start", "end", "step", "error_message"),
- [
- (3, 2, 1, r"There is no element in the range \[3, 2\]"),
- (4, 0, 1, r"There is no element in the range \[4, 0\]"),
- (0, 4, 1, r"There is no element at index '4'"),
- (-4, 0, 1, r"There is no element at index '-4'"),
- (0, -4, 1, r"There is no element in the range \[0, -4\]"),
- ],
-)
-def test_should_raise_if_index_out_of_bounds(start: int, end: int, step: int, error_message: str) -> None:
- table = TimeSeries({"time": [0, 1, 2], "feature": [1, 2, 1], "target": [1, 2, 4]}, "target", "time")
-
- with raises(IndexOutOfBoundsError, match=error_message):
- table.slice_rows(start, end, step)
diff --git a/tests/safeds/data/tabular/containers/_time_series/test_sort_columns.py b/tests/safeds/data/tabular/containers/_time_series/test_sort_columns.py
deleted file mode 100644
index 679816069..000000000
--- a/tests/safeds/data/tabular/containers/_time_series/test_sort_columns.py
+++ /dev/null
@@ -1,62 +0,0 @@
-from collections.abc import Callable
-
-import pytest
-from safeds.data.tabular.containers import Column, TimeSeries
-
-
-@pytest.mark.parametrize(
- ("query", "col1", "col2", "col3", "col4", "col5"),
- [
- (None, 0, 1, 2, 3, 4),
- (
- lambda col1, col2: (col1.name < col2.name) - (col1.name > col2.name),
- 4,
- 3,
- 2,
- 1,
- 0,
- ),
- ],
- ids=["no query", "with query"],
-)
-def test_should_return_sorted_table(
- query: Callable[[Column, Column], int],
- col1: int,
- col2: int,
- col3: int,
- col4: int,
- col5: int,
-) -> None:
- columns = [
- Column("col1", ["A", "B", "C", "A", "D"]),
- Column("col2", ["Test1", "Test1", "Test3", "Test1", "Test4"]),
- Column("col3", [1, 2, 3, 4, 5]),
- Column("col4", [2, 3, 1, 4, 6]),
- Column("time", [0, 1, 2, 3, 4]),
- ]
- table1 = TimeSeries(
- {
- "col2": ["Test1", "Test1", "Test3", "Test1", "Test4"],
- "col3": [1, 2, 3, 4, 5],
- "col4": [2, 3, 1, 4, 6],
- "col1": ["A", "B", "C", "A", "D"],
- "time": [0, 1, 2, 3, 4],
- },
- target_name="col1",
- time_name="time",
- feature_names=["col4", "col3"],
- )
- if query is not None:
- table_sorted = table1.sort_columns(query)
- else:
- table_sorted = table1.sort_columns()
- table_sorted_columns = table_sorted.to_columns()
- assert table_sorted.schema == table1.schema
- assert table_sorted_columns[0] == columns[col1]
- assert table_sorted_columns[1] == columns[col2]
- assert table_sorted_columns[2] == columns[col3]
- assert table_sorted_columns[3] == columns[col4]
- assert table_sorted_columns[4] == columns[col5]
- assert table_sorted._features == table1._features
- assert table_sorted._target == table1._target
- assert table_sorted.time == table1.time
diff --git a/tests/safeds/data/tabular/containers/_time_series/test_split_rows.py b/tests/safeds/data/tabular/containers/_time_series/test_split_rows.py
deleted file mode 100644
index faee9fc23..000000000
--- a/tests/safeds/data/tabular/containers/_time_series/test_split_rows.py
+++ /dev/null
@@ -1,69 +0,0 @@
-import pandas as pd
-import pytest
-from safeds.data.tabular.containers import Table, TimeSeries
-from safeds.data.tabular.typing import Integer, Nothing, Schema
-
-
-@pytest.mark.parametrize(
- ("table", "result_train_table", "result_test_table", "percentage_in_first"),
- [
- (
- TimeSeries({"col1": [1, 2, 1], "col2": [1, 2, 4]}, time_name="col1", target_name="col2"),
- TimeSeries({"col1": [1, 2], "col2": [1, 2]}, time_name="col1", target_name="col2"),
- TimeSeries({"col1": [1], "col2": [4]}, time_name="col1", target_name="col2"),
- 2 / 3,
- ),
- (
- TimeSeries({"col1": [1, 2, 1], "col2": [1, 2, 4]}, time_name="col1", target_name="col2"),
- TimeSeries._from_table(
- Table._from_pandas_dataframe(pd.DataFrame(), Schema({"col1": Nothing(), "col2": Nothing()})),
- time_name="col1",
- target_name="col2",
- ),
- TimeSeries({"col1": [1, 2, 1], "col2": [1, 2, 4]}, time_name="col1", target_name="col2"),
- 0,
- ),
- (
- TimeSeries({"col1": [1, 2, 1], "col2": [1, 2, 4]}, time_name="col1", target_name="col2"),
- TimeSeries({"col1": [1, 2, 1], "col2": [1, 2, 4]}, time_name="col1", target_name="col2"),
- TimeSeries._from_table(
- Table._from_pandas_dataframe(pd.DataFrame(), Schema({"col1": Integer(), "col2": Integer()})),
- time_name="col1",
- target_name="col2",
- ),
- 1,
- ),
- ],
- ids=["2/3%", "0%", "100%"],
-)
-def test_should_split_table(
- table: TimeSeries,
- result_train_table: TimeSeries,
- result_test_table: TimeSeries,
- percentage_in_first: int,
-) -> None:
- train_table, test_table = table.split_rows(percentage_in_first)
- assert result_test_table == test_table
- assert result_train_table.schema == train_table.schema
- assert result_train_table == train_table
-
-
-@pytest.mark.parametrize(
- "percentage_in_first",
- [
- -1.0,
- 2.0,
- ],
- ids=["-100%", "200%"],
-)
-def test_should_raise_if_value_not_in_range(percentage_in_first: float) -> None:
- table = Table({"col1": [1, 2, 1], "col2": [1, 2, 4]})
-
- with pytest.raises(ValueError, match=r"The given percentage is not between 0 and 1"):
- table.split_rows(percentage_in_first)
-
-
-def test_should_split_empty_table() -> None:
- t1, t2 = Table().split_rows(0.4)
- assert t1.number_of_rows == 0
- assert t2.number_of_rows == 0
diff --git a/tests/safeds/data/tabular/containers/_time_series/test_time.py b/tests/safeds/data/tabular/containers/_time_series/test_time.py
deleted file mode 100644
index f1a65de0f..000000000
--- a/tests/safeds/data/tabular/containers/_time_series/test_time.py
+++ /dev/null
@@ -1,42 +0,0 @@
-import pytest
-from safeds.data.tabular.containers import Column, TimeSeries
-
-
-@pytest.mark.parametrize(
- ("time_series", "time"),
- [
- (
- TimeSeries(
- {
- "time": [0, 1],
- "A": [1, 4],
- "B": [2, 5],
- "C": [3, 6],
- "T": [0, 1],
- },
- target_name="T",
- time_name="time",
- feature_names=["A", "B", "C"],
- ),
- Column("time", [0, 1]),
- ),
- (
- TimeSeries(
- {
- "time": [1, 2],
- "A": [1, 4],
- "B": [2, 5],
- "C": [3, 6],
- "T": [0, 1],
- },
- target_name="T",
- time_name="time",
- feature_names=["A", "C"],
- ),
- Column("time", [1, 2]),
- ),
- ],
- ids=["only_target_and_features", "target_features_and_other"],
-)
-def test_should_return_features(time_series: TimeSeries, time: Column) -> None:
- assert time_series.time == time
diff --git a/tests/safeds/data/tabular/containers/_time_series/test_time_target.py b/tests/safeds/data/tabular/containers/_time_series/test_time_target.py
deleted file mode 100644
index 31dc2b899..000000000
--- a/tests/safeds/data/tabular/containers/_time_series/test_time_target.py
+++ /dev/null
@@ -1,30 +0,0 @@
-import pytest
-from safeds.data.tabular.containers import Column, TimeSeries
-
-# test
-
-
-@pytest.mark.parametrize(
- ("time_series", "target_column", "time_column"),
- [
- (
- TimeSeries(
- {
- "time": [0, 1],
- "A": [1, 4],
- "B": [2, 5],
- "C": [3, 6],
- "T": [0, 1],
- },
- target_name="T",
- time_name="time",
- ),
- Column("T", [0, 1]),
- Column("time", [0, 1]),
- ),
- ],
- ids=["target"],
-)
-def test_should_return_target(time_series: TimeSeries, target_column: Column, time_column: Column) -> None:
- assert time_series.target == target_column
- assert time_series.time == time_column
diff --git a/tests/safeds/data/tabular/containers/_time_series/test_timeseries_from_csv_file.py b/tests/safeds/data/tabular/containers/_time_series/test_timeseries_from_csv_file.py
deleted file mode 100644
index 0c26e21ae..000000000
--- a/tests/safeds/data/tabular/containers/_time_series/test_timeseries_from_csv_file.py
+++ /dev/null
@@ -1,62 +0,0 @@
-from pathlib import Path
-
-import pytest
-from safeds.data.tabular.containers import TimeSeries
-from safeds.exceptions import WrongFileExtensionError
-
-from tests.helpers import resolve_resource_path
-
-
-@pytest.mark.parametrize(
- ("path", "expected"),
- [
- (
- "table.csv",
- TimeSeries({"A": ["❔"], "B": [2]}, time_name="A", target_name="B"),
- ),
- (Path("table.csv"), TimeSeries({"A": ["❔"], "B": [2]}, time_name="A", target_name="B")),
- ],
- ids=["by String", "by path"],
-)
-def test_should_create_table_from_csv_file(path: str | Path, expected: TimeSeries) -> None:
- table = TimeSeries.timeseries_from_csv_file(resolve_resource_path(path), time_name="A", target_name="B")
- assert table.schema == expected.schema
- assert table == expected
-
-
-@pytest.mark.parametrize(
- ("path", "expected_error_message"),
- [
- ("test_table_from_csv_file_invalid.csv", r"test_table_from_csv_file_invalid.csv\" does not exist"),
- (Path("test_table_from_csv_file_invalid.csv"), r"test_table_from_csv_file_invalid.csv\" does not exist"),
- ],
- ids=["by String", "by path"],
-)
-def test_should_raise_error_if_file_not_found(path: str | Path, expected_error_message: str) -> None:
- with pytest.raises(FileNotFoundError, match=expected_error_message):
- TimeSeries.timeseries_from_csv_file(resolve_resource_path(path), time_name="A", target_name="B")
-
-
-@pytest.mark.parametrize(
- ("path", "expected_error_message"),
- [
- (
- "invalid_file_extension.file_extension",
- (
- r"invalid_file_extension.file_extension has a wrong file extension. Please provide a file with the"
- r" following extension\(s\): .csv"
- ),
- ),
- (
- Path("invalid_file_extension.file_extension"),
- (
- r"invalid_file_extension.file_extension has a wrong file extension. Please provide a file with the"
- r" following extension\(s\): .csv"
- ),
- ),
- ],
- ids=["by String", "by path"],
-)
-def test_should_raise_error_if_wrong_file_extension(path: str | Path, expected_error_message: str) -> None:
- with pytest.raises(WrongFileExtensionError, match=expected_error_message):
- TimeSeries.timeseries_from_csv_file(resolve_resource_path(path), time_name="A", target_name="B")
diff --git a/tests/safeds/data/tabular/containers/_time_series/test_transform_column.py b/tests/safeds/data/tabular/containers/_time_series/test_transform_column.py
deleted file mode 100644
index 176533570..000000000
--- a/tests/safeds/data/tabular/containers/_time_series/test_transform_column.py
+++ /dev/null
@@ -1,116 +0,0 @@
-import pytest
-from safeds.data.tabular.containers import TimeSeries
-from safeds.exceptions import UnknownColumnNameError
-
-from tests.helpers import assert_that_time_series_are_equal
-
-
-# here is the time column transformable
-@pytest.mark.parametrize(
- ("table", "column_name", "table_transformed"),
- [
- (
- TimeSeries(
- {"time": [0, 1, 2], "feature_a": [1, 2, 3], "feature_b": [4, 5, 6], "target": [1, 2, 3]},
- "target",
- "time",
- ),
- "feature_a",
- TimeSeries(
- {"time": [0, 1, 2], "feature_a": [2, 4, 6], "feature_b": [4, 5, 6], "target": [1, 2, 3]},
- "target",
- "time",
- ),
- ),
- (
- TimeSeries(
- {"time": [0, 1, 2], "feature_a": [1, 2, 3], "feature_b": [4, 5, 6], "target": [1, 2, 3]},
- "target",
- "time",
- ),
- "target",
- TimeSeries(
- {"time": [0, 1, 2], "feature_a": [1, 2, 3], "feature_b": [4, 5, 6], "target": [2, 4, 6]},
- "target",
- "time",
- ),
- ),
- (
- TimeSeries(
- {"time": [0, 1, 2], "feature_a": [1, 2, 3], "b": [4, 5, 6], "target": [1, 2, 3]},
- target_name="target",
- time_name="time",
- feature_names=["feature_a"],
- ),
- "b",
- TimeSeries(
- {"time": [0, 1, 2], "feature_a": [1, 2, 3], "b": [8, 10, 12], "target": [1, 2, 3]},
- target_name="target",
- time_name="time",
- feature_names=["feature_a"],
- ),
- ),
- (
- TimeSeries(
- {"time": [0, 1, 2], "feature_a": [1, 2, 3], "b": [4, 5, 6], "target": [1, 2, 3]},
- target_name="target",
- time_name="time",
- feature_names=["feature_a"],
- ),
- "time",
- TimeSeries(
- {"time": [0, 2, 4], "feature_a": [1, 2, 3], "b": [4, 5, 6], "target": [1, 2, 3]},
- target_name="target",
- time_name="time",
- feature_names=["feature_a"],
- ),
- ),
- ],
- ids=[
- "transform_feature_column",
- "transform_target_column",
- "transform_column_that_is_neither",
- "transform_time_col",
- ],
-)
-def test_should_transform_column(table: TimeSeries, column_name: str, table_transformed: TimeSeries) -> None:
- result = table.transform_column(column_name, lambda row: row.get_value(column_name) * 2)
- assert_that_time_series_are_equal(result, table_transformed)
-
-
-@pytest.mark.parametrize(
- ("table", "column_name"),
- [
- (
- TimeSeries(
- {
- "time": [0, 1, 2],
- "A": [1, 2, 3],
- "B": [4, 5, 6],
- "C": ["a", "b", "c"],
- },
- "C",
- "time",
- ),
- "D",
- ),
- (
- TimeSeries(
- {
- "time": [0, 1, 2],
- "A": [1, 2, 3],
- "B": [4, 5, 6],
- "C": ["a", "b", "c"],
- },
- target_name="C",
- time_name="time",
- feature_names=["A"],
- ),
- "D",
- ),
- ],
- ids=["has_only_features_and_target", "has_columns_that_are_neither"],
-)
-def test_should_raise_if_column_not_found(table: TimeSeries, column_name: str) -> None:
- with pytest.raises(UnknownColumnNameError, match=rf"Could not find column\(s\) '{column_name}'"):
- table.transform_column(column_name, lambda row: row.get_value("A") * 2)
diff --git a/tests/safeds/ml/classical/regression/test_arima_model.py b/tests/safeds/ml/classical/regression/test_arima_model.py
index b7a04e899..5a317e07a 100644
--- a/tests/safeds/ml/classical/regression/test_arima_model.py
+++ b/tests/safeds/ml/classical/regression/test_arima_model.py
@@ -1,13 +1,13 @@
from typing import Any
import pytest
-from safeds.data.tabular.containers import Table, TimeSeries
+from safeds.data.labeled.containers import TimeSeriesDataset
+from safeds.data.tabular.containers import Table
from safeds.exceptions import (
DatasetMissesDataError,
MissingValuesColumnError,
ModelNotFittedError,
NonNumericColumnError,
- NonTimeSeriesError,
)
from safeds.ml.classical.regression import ArimaModelRegressor, LassoRegressor
@@ -17,30 +17,27 @@
def test_arima_model() -> None:
# Create a DataFrame
_inflation_path = "_datas/US_Inflation_rates.csv"
- time_series = TimeSeries.timeseries_from_csv_file(
+ time_series = Table.from_csv_file(
path=resolve_resource_path(_inflation_path),
- target_name="value",
- time_name="date",
)
train_ts, test_ts = time_series.split_rows(0.8)
model = ArimaModelRegressor()
- trained_model = model.fit(train_ts)
- predicted_ts = trained_model.predict(test_ts)
- predicted_ts.plot_compare_time_series([test_ts])
+ trained_model = model.fit(train_ts.to_time_series_dataset("value", "date"))
+ trained_model.predict(test_ts.to_time_series_dataset("value", "date"))
# suggest it ran through
assert True
-def create_test_data() -> TimeSeries:
- return TimeSeries(
+def create_test_data() -> TimeSeriesDataset:
+ return TimeSeriesDataset(
{"time": [1, 2, 3, 4, 5, 6, 7, 8, 9], "value": [1, 2, 3, 4, 5, 6, 7, 8, 9]},
time_name="time",
target_name="value",
)
-def create_test_data_with_feature() -> TimeSeries:
- return TimeSeries(
+def create_test_data_with_feature() -> TimeSeriesDataset:
+ return TimeSeriesDataset(
{
"time": [1, 2, 3, 4, 5, 6, 7, 8, 9],
"value": [1, 2, 3, 4, 5, 6, 7, 8, 9],
@@ -92,7 +89,7 @@ def test_should_succeed_on_valid_data_plot() -> None:
"feat2": [3, 6],
"target": ["0", 1],
},
- ).time_columns(target_name="target", feature_names=["feat1", "feat2"], time_name="id"),
+ ).to_time_series_dataset(target_name="target", time_name="id"),
NonNumericColumnError,
r"Tried to do a numerical operation on one or multiple non-numerical columns: \ntarget",
),
@@ -104,7 +101,7 @@ def test_should_succeed_on_valid_data_plot() -> None:
"feat2": [3, 6],
"target": [None, 1],
},
- ).time_columns(target_name="target", feature_names=["feat1", "feat2"], time_name="id"),
+ ).to_time_series_dataset(target_name="target", time_name="id"),
MissingValuesColumnError,
r"Tried to do an operation on one or multiple columns containing missing values: \ntarget\nYou can use the Imputer to replace the missing values based on different strategies.\nIf you want toremove the missing values entirely you can use the method `TimeSeries.remove_rows_with_missing_values`.",
),
@@ -116,7 +113,7 @@ def test_should_succeed_on_valid_data_plot() -> None:
"feat2": [],
"target": [],
},
- ).time_columns(target_name="target", feature_names=["feat1", "feat2"], time_name="id"),
+ ).to_time_series_dataset(target_name="target", time_name="id"),
DatasetMissesDataError,
r"Dataset contains no rows",
),
@@ -124,7 +121,7 @@ def test_should_succeed_on_valid_data_plot() -> None:
ids=["non-numerical data", "missing values in data", "no rows in data"],
)
def test_should_raise_on_invalid_data(
- invalid_data: TimeSeries,
+ invalid_data: TimeSeriesDataset,
expected_error: Any,
expected_error_msg: str,
) -> None:
@@ -133,25 +130,6 @@ def test_should_raise_on_invalid_data(
model.fit(invalid_data)
-@pytest.mark.parametrize(
- "table",
- [
- Table(
- {
- "a": [1.0, 0.0, 0.0, 0.0],
- "b": [0.0, 1.0, 1.0, 0.0],
- "c": [0.0, 0.0, 0.0, 1.0],
- },
- ),
- ],
- ids=["table"],
-)
-def test_should_raise_if_given_normal_table(table: Table) -> None:
- model = ArimaModelRegressor()
- with pytest.raises(NonTimeSeriesError):
- model.fit(table) # type: ignore[arg-type]
-
-
def test_correct_structure_of_time_series_with_features() -> None:
data = create_test_data_with_feature()
model = ArimaModelRegressor()
diff --git a/tests/safeds/ml/nn/__snapshots__/test_cnn_workflow/TestImageToImageRegressor.test_should_train_and_predict_model[seed-1234-cpu].png b/tests/safeds/ml/nn/__snapshots__/test_cnn_workflow/TestImageToImageRegressor.test_should_train_and_predict_model[seed-1234-cpu].png
deleted file mode 100644
index c931271a1..000000000
Binary files a/tests/safeds/ml/nn/__snapshots__/test_cnn_workflow/TestImageToImageRegressor.test_should_train_and_predict_model[seed-1234-cpu].png and /dev/null differ
diff --git a/tests/safeds/ml/nn/__snapshots__/test_cnn_workflow/TestImageToImageRegressor.test_should_train_and_predict_model[seed-1234-cuda].png b/tests/safeds/ml/nn/__snapshots__/test_cnn_workflow/TestImageToImageRegressor.test_should_train_and_predict_model[seed-1234-cuda].png
deleted file mode 100644
index 4954d7600..000000000
Binary files a/tests/safeds/ml/nn/__snapshots__/test_cnn_workflow/TestImageToImageRegressor.test_should_train_and_predict_model[seed-1234-cuda].png and /dev/null differ
diff --git a/tests/safeds/ml/nn/__snapshots__/test_cnn_workflow/TestImageToImageRegressor.test_should_train_and_predict_model[seed-4711-cpu].png b/tests/safeds/ml/nn/__snapshots__/test_cnn_workflow/TestImageToImageRegressor.test_should_train_and_predict_model[seed-4711-cpu].png
deleted file mode 100644
index ea361e931..000000000
Binary files a/tests/safeds/ml/nn/__snapshots__/test_cnn_workflow/TestImageToImageRegressor.test_should_train_and_predict_model[seed-4711-cpu].png and /dev/null differ
diff --git a/tests/safeds/ml/nn/__snapshots__/test_cnn_workflow/TestImageToImageRegressor.test_should_train_and_predict_model[seed-4711-cuda].png b/tests/safeds/ml/nn/__snapshots__/test_cnn_workflow/TestImageToImageRegressor.test_should_train_and_predict_model[seed-4711-cuda].png
deleted file mode 100644
index 703799cbb..000000000
Binary files a/tests/safeds/ml/nn/__snapshots__/test_cnn_workflow/TestImageToImageRegressor.test_should_train_and_predict_model[seed-4711-cuda].png and /dev/null differ
diff --git a/tests/safeds/ml/nn/test_cnn_workflow.py b/tests/safeds/ml/nn/test_cnn_workflow.py
index 058ee9d78..164440b7e 100644
--- a/tests/safeds/ml/nn/test_cnn_workflow.py
+++ b/tests/safeds/ml/nn/test_cnn_workflow.py
@@ -32,30 +32,26 @@
class TestImageToTableClassifier:
@pytest.mark.parametrize(
- ("seed", "device", "layer_3_bias", "prediction_label"),
+ ("seed", "device", "prediction_label"),
[
(
1234,
device_cuda,
- [0.5809096097946167, -0.32418742775917053, 0.026058292016386986, 0.5801554918289185],
["grayscale"] * 7,
),
(
4711,
device_cuda,
- [-0.8114155530929565, -0.9443624019622803, 0.8557258248329163, -0.848240852355957],
["white_square"] * 7,
),
(
1234,
device_cpu,
- [-0.6926110982894897, 0.33004942536354065, -0.32962560653686523, 0.5768553614616394],
["grayscale"] * 7,
),
(
4711,
device_cpu,
- [-0.9051575660705566, -0.8625037670135498, 0.24682046473026276, -0.2612163722515106],
["white_square"] * 7,
),
],
@@ -64,7 +60,6 @@ class TestImageToTableClassifier:
def test_should_train_and_predict_model(
self,
seed: int,
- layer_3_bias: list[float],
prediction_label: list[str],
device: Device,
) -> None:
@@ -92,7 +87,12 @@ def test_should_train_and_predict_model(
)
nn = nn_original.fit(image_dataset, epoch_size=2)
assert str(nn_original._model.state_dict().values()) != str(nn._model.state_dict().values())
- assert nn._model.state_dict()["_pytorch_layers.3._layer.bias"].tolist() == layer_3_bias
+ assert not torch.all(
+ torch.eq(
+ nn_original._model.state_dict()["_pytorch_layers.3._layer.bias"],
+ nn._model.state_dict()["_pytorch_layers.3._layer.bias"],
+ )
+ ).item()
prediction: ImageDataset = nn.predict(image_dataset.get_input())
assert one_hot_encoder.inverse_transform(prediction.get_output()) == Table({"class": prediction_label})
@@ -100,30 +100,26 @@ def test_should_train_and_predict_model(
class TestImageToColumnClassifier:
@pytest.mark.parametrize(
- ("seed", "device", "layer_3_bias", "prediction_label"),
+ ("seed", "device", "prediction_label"),
[
(
1234,
device_cuda,
- [0.5805736780166626, -0.32432740926742554, 0.02629312314093113, 0.5803964138031006],
["grayscale"] * 7,
),
(
4711,
device_cuda,
- [-0.8114045262336731, -0.9443488717079163, 0.8557113409042358, -0.8482510447502136],
["white_square"] * 7,
),
(
1234,
device_cpu,
- [-0.69260174036026, 0.33002084493637085, -0.32964015007019043, 0.5768893957138062],
["grayscale"] * 7,
),
(
4711,
device_cpu,
- [-0.9051562547683716, -0.8625034093856812, 0.24682027101516724, -0.26121777296066284],
["white_square"] * 7,
),
],
@@ -132,7 +128,6 @@ class TestImageToColumnClassifier:
def test_should_train_and_predict_model(
self,
seed: int,
- layer_3_bias: list[float],
prediction_label: list[str],
device: Device,
) -> None:
@@ -159,7 +154,12 @@ def test_should_train_and_predict_model(
)
nn = nn_original.fit(image_dataset, epoch_size=2)
assert str(nn_original._model.state_dict().values()) != str(nn._model.state_dict().values())
- assert nn._model.state_dict()["_pytorch_layers.3._layer.bias"].tolist() == layer_3_bias
+ assert not torch.all(
+ torch.eq(
+ nn_original._model.state_dict()["_pytorch_layers.3._layer.bias"],
+ nn._model.state_dict()["_pytorch_layers.3._layer.bias"],
+ )
+ ).item()
prediction: ImageDataset = nn.predict(image_dataset.get_input())
assert prediction.get_output() == Column("class", prediction_label)
@@ -167,12 +167,12 @@ def test_should_train_and_predict_model(
class TestImageToImageRegressor:
@pytest.mark.parametrize(
- ("seed", "device", "layer_3_bias"),
+ ("seed", "device"),
[
- (1234, device_cuda, [0.13570494949817657, 0.02420804090797901, -0.1311846673488617, 0.22676928341388702]),
- (4711, device_cuda, [0.11234158277511597, 0.13972002267837524, -0.07925988733768463, 0.07342307269573212]),
- (1234, device_cpu, [-0.1637762188911438, 0.02012808807194233, -0.22295698523521423, 0.1689515858888626]),
- (4711, device_cpu, [-0.030541712418198586, -0.15364733338356018, 0.1741572618484497, 0.015837203711271286]),
+ (1234, device_cuda),
+ (4711, device_cuda),
+ (1234, device_cpu),
+ (4711, device_cpu),
],
ids=["seed-1234-cuda", "seed-4711-cuda", "seed-1234-cpu", "seed-4711-cpu"],
)
@@ -180,7 +180,6 @@ def test_should_train_and_predict_model(
self,
seed: int,
snapshot_png_image_list: SnapshotAssertion,
- layer_3_bias: list[float],
device: Device,
) -> None:
skip_if_device_not_available(device)
@@ -205,6 +204,11 @@ def test_should_train_and_predict_model(
)
nn = nn_original.fit(image_dataset, epoch_size=20)
assert str(nn_original._model.state_dict().values()) != str(nn._model.state_dict().values())
- assert nn._model.state_dict()["_pytorch_layers.3._layer.bias"].tolist() == layer_3_bias
- prediction: ImageDataset = nn.predict(image_dataset.get_input())
- assert prediction.get_output() == snapshot_png_image_list
+ assert not torch.all(
+ torch.eq(
+ nn_original._model.state_dict()["_pytorch_layers.3._layer.bias"],
+ nn._model.state_dict()["_pytorch_layers.3._layer.bias"],
+ )
+ ).item()
+ prediction = nn.predict(image_dataset.get_input())
+ assert isinstance(prediction.get_output(), ImageList)
diff --git a/tests/safeds/ml/nn/test_forward_workflow.py b/tests/safeds/ml/nn/test_forward_workflow.py
new file mode 100644
index 000000000..87a282383
--- /dev/null
+++ b/tests/safeds/ml/nn/test_forward_workflow.py
@@ -0,0 +1,35 @@
+from safeds.data.tabular.containers import Table
+from safeds.data.tabular.transformation import StandardScaler
+from safeds.ml.nn import (
+ ForwardLayer,
+ InputConversionTable,
+ NeuralNetworkRegressor,
+ OutputConversionTable,
+)
+
+from tests.helpers import resolve_resource_path
+
+
+def test_lstm_model() -> None:
+ # Create a DataFrame
+ _inflation_path = "_datas/US_Inflation_rates.csv"
+ table_1 = Table.from_csv_file(
+ path=resolve_resource_path(_inflation_path),
+ )
+ table_1 = table_1.remove_columns(["date"])
+ table_2 = Table.from_rows(table_1.to_rows()[:-14])
+ table_2 = table_2.add_columns([Table.from_rows(table_1.to_rows()[14:]).get_column("value").rename("target")])
+ train_table, test_table = table_2.split_rows(0.8)
+
+ ss = StandardScaler()
+ _, train_table = ss.fit_and_transform(train_table, ["value"])
+ _, test_table = ss.fit_and_transform(test_table, ["value"])
+ model = NeuralNetworkRegressor(
+ InputConversionTable(),
+ [ForwardLayer(input_size=1, output_size=1)],
+ OutputConversionTable("predicted"),
+ )
+
+ fitted_model = model.fit(train_table.to_tabular_dataset("target"), epoch_size=1, learning_rate=0.01)
+ fitted_model.predict(test_table.keep_only_columns(["value"]))
+ assert True
diff --git a/tests/safeds/ml/nn/test_input_conversion_time_series.py b/tests/safeds/ml/nn/test_input_conversion_time_series.py
new file mode 100644
index 000000000..c40c0b941
--- /dev/null
+++ b/tests/safeds/ml/nn/test_input_conversion_time_series.py
@@ -0,0 +1,30 @@
+from safeds.data.tabular.containers import Table
+from safeds.ml.nn import (
+ InputConversionTimeSeries,
+ LSTMLayer,
+ NeuralNetworkRegressor,
+ OutputConversionTimeSeries,
+)
+
+
+def test_should_raise_if_is_fitted_is_set_correctly_lstm() -> None:
+ model = NeuralNetworkRegressor(
+ InputConversionTimeSeries(1, 1),
+ [LSTMLayer(input_size=2, output_size=1)],
+ OutputConversionTimeSeries("predicted"),
+ )
+ ts = Table.from_dict({"target": [1, 1, 1, 1], "time": [0, 0, 0, 0], "feat": [0, 0, 0, 0]}).to_time_series_dataset(
+ "target",
+ "time",
+ )
+ assert not model.is_fitted
+ model = model.fit(ts)
+ model.predict(ts)
+ assert model.is_fitted
+
+
+def test_get_output_config() -> None:
+ test_val = {"window_size": 1, "forecast_horizon": 1}
+ it = InputConversionTimeSeries(1, 1)
+ di = it._get_output_configuration()
+ assert di == test_val
diff --git a/tests/safeds/ml/nn/test_lstm_layer.py b/tests/safeds/ml/nn/test_lstm_layer.py
new file mode 100644
index 000000000..e876da4e1
--- /dev/null
+++ b/tests/safeds/ml/nn/test_lstm_layer.py
@@ -0,0 +1,192 @@
+import sys
+from typing import Any
+
+import pytest
+from safeds.data.image.typing import ImageSize
+from safeds.exceptions import OutOfBoundsError
+from safeds.ml.nn import LSTMLayer
+from torch import nn
+
+
+@pytest.mark.parametrize(
+ "input_size",
+ [
+ 0,
+ ],
+ ids=["input_size_out_of_bounds"],
+)
+def test_should_raise_if_input_size_out_of_bounds(input_size: int) -> None:
+ with pytest.raises(
+ OutOfBoundsError,
+ match=rf"input_size \(={input_size}\) is not inside \[1, \u221e\)\.",
+ ):
+ LSTMLayer(output_size=1, input_size=input_size)
+
+
+@pytest.mark.parametrize(
+ "input_size",
+ [
+ 1,
+ 20,
+ ],
+ ids=["one", "twenty"],
+)
+def test_should_raise_if_input_size_doesnt_match(input_size: int) -> None:
+
+ assert LSTMLayer(output_size=1, input_size=input_size).input_size == input_size
+
+
+@pytest.mark.parametrize(
+ ("activation_function", "expected_activation_function"),
+ [
+ ("sigmoid", nn.Sigmoid),
+ ("relu", nn.ReLU),
+ ("softmax", nn.Softmax),
+ ("none", None),
+ ],
+ ids=["sigmoid", "relu", "softmax", "none"],
+)
+def test_should_accept_activation_function(activation_function: str, expected_activation_function: type | None) -> None:
+ forward_layer = LSTMLayer(output_size=1, input_size=1)._get_internal_layer(
+ activation_function=activation_function,
+ )
+ assert (
+ forward_layer._fn is None
+ if expected_activation_function is None
+ else isinstance(forward_layer._fn, expected_activation_function)
+ )
+
+
+@pytest.mark.parametrize(
+ "activation_function",
+ [
+ "unknown_string",
+ ],
+ ids=["unknown"],
+)
+def test_should_raise_if_unknown_activation_function_is_passed(activation_function: str) -> None:
+ with pytest.raises(
+ ValueError,
+ match=rf"Unknown Activation Function: {activation_function}",
+ ):
+ LSTMLayer(output_size=1, input_size=1)._get_internal_layer(activation_function=activation_function)
+
+
+@pytest.mark.parametrize(
+ "output_size",
+ [
+ 0,
+ ],
+ ids=["output_size_out_of_bounds"],
+)
+def test_should_raise_if_output_size_out_of_bounds(output_size: int) -> None:
+ with pytest.raises(
+ OutOfBoundsError,
+ match=rf"output_size \(={output_size}\) is not inside \[1, \u221e\)\.",
+ ):
+ LSTMLayer(output_size=output_size, input_size=1)
+
+
+@pytest.mark.parametrize(
+ "output_size",
+ [
+ 1,
+ 20,
+ ],
+ ids=["one", "twenty"],
+)
+def test_should_raise_if_output_size_doesnt_match(output_size: int) -> None:
+ assert LSTMLayer(output_size=output_size, input_size=1).output_size == output_size
+
+
+def test_should_raise_if_input_size_is_set_with_image_size() -> None:
+ layer = LSTMLayer(1)
+ with pytest.raises(TypeError, match=r"The input_size of a forward layer has to be of type int."):
+ layer._set_input_size(ImageSize(1, 2, 3))
+
+
+def test_should_raise_if_activation_function_not_set() -> None:
+ layer = LSTMLayer(1)
+ with pytest.raises(
+ ValueError,
+ match=r"The activation_function is not set. The internal layer can only be created when the activation_function is provided in the kwargs.",
+ ):
+ layer._get_internal_layer()
+
+
+@pytest.mark.parametrize(
+ ("layer1", "layer2", "equal"),
+ [
+ (
+ LSTMLayer(input_size=1, output_size=2),
+ LSTMLayer(input_size=1, output_size=2),
+ True,
+ ),
+ (
+ LSTMLayer(input_size=1, output_size=2),
+ LSTMLayer(input_size=2, output_size=1),
+ False,
+ ),
+ ],
+ ids=["equal", "not equal"],
+)
+def test_should_compare_forward_layers(layer1: LSTMLayer, layer2: LSTMLayer, equal: bool) -> None:
+ assert (layer1.__eq__(layer2)) == equal
+
+
+def test_should_assert_that_forward_layer_is_equal_to_itself() -> None:
+ layer = LSTMLayer(input_size=1, output_size=1)
+ assert layer.__eq__(layer)
+
+
+@pytest.mark.parametrize(
+ ("layer", "other"),
+ [
+ (LSTMLayer(input_size=1, output_size=1), None),
+ ],
+ ids=["ForwardLayer vs. None"],
+)
+def test_should_return_not_implemented_if_other_is_not_forward_layer(layer: LSTMLayer, other: Any) -> None:
+ assert (layer.__eq__(other)) is NotImplemented
+
+
+@pytest.mark.parametrize(
+ ("layer1", "layer2"),
+ [
+ (
+ LSTMLayer(input_size=1, output_size=2),
+ LSTMLayer(input_size=1, output_size=2),
+ ),
+ ],
+ ids=["equal"],
+)
+def test_should_assert_that_equal_forward_layers_have_equal_hash(layer1: LSTMLayer, layer2: LSTMLayer) -> None:
+ assert layer1.__hash__() == layer2.__hash__()
+
+
+@pytest.mark.parametrize(
+ ("layer1", "layer2"),
+ [
+ (
+ LSTMLayer(input_size=1, output_size=2),
+ LSTMLayer(input_size=2, output_size=1),
+ ),
+ ],
+ ids=["not equal"],
+)
+def test_should_assert_that_different_forward_layers_have_different_hash(
+ layer1: LSTMLayer,
+ layer2: LSTMLayer,
+) -> None:
+ assert layer1.__hash__() != layer2.__hash__()
+
+
+@pytest.mark.parametrize(
+ "layer",
+ [
+ LSTMLayer(input_size=1, output_size=1),
+ ],
+ ids=["one"],
+)
+def test_should_assert_that_layer_size_is_greater_than_normal_object(layer: LSTMLayer) -> None:
+ assert sys.getsizeof(layer) > sys.getsizeof(object())
diff --git a/tests/safeds/ml/nn/test_lstm_workflow.py b/tests/safeds/ml/nn/test_lstm_workflow.py
new file mode 100644
index 000000000..33e3f1b49
--- /dev/null
+++ b/tests/safeds/ml/nn/test_lstm_workflow.py
@@ -0,0 +1,29 @@
+from safeds.data.tabular.containers import Table
+from safeds.data.tabular.transformation import RangeScaler
+from safeds.ml.nn import (
+ ForwardLayer,
+ InputConversionTimeSeries,
+ LSTMLayer,
+ NeuralNetworkRegressor,
+ OutputConversionTimeSeries,
+)
+
+from tests.helpers import resolve_resource_path
+
+
+def test_lstm_model() -> None:
+ # Create a DataFrame
+ _inflation_path = "_datas/US_Inflation_rates.csv"
+ table = Table.from_csv_file(path=resolve_resource_path(_inflation_path))
+ rs = RangeScaler()
+ _, table = rs.fit_and_transform(table, ["value"])
+ train_table, test_table = table.split_rows(0.8)
+
+ model = NeuralNetworkRegressor(
+ InputConversionTimeSeries(window_size=7, forecast_horizon=12),
+ [ForwardLayer(input_size=7, output_size=256), LSTMLayer(input_size=256, output_size=1)],
+ OutputConversionTimeSeries("predicted"),
+ )
+ trained_model = model.fit(train_table.to_time_series_dataset("value", "date"), epoch_size=1)
+
+ trained_model.predict(test_table.to_time_series_dataset("value", "date"))
diff --git a/tests/safeds/ml/nn/test_model.py b/tests/safeds/ml/nn/test_model.py
index 3e03ad2fc..d4a72d492 100644
--- a/tests/safeds/ml/nn/test_model.py
+++ b/tests/safeds/ml/nn/test_model.py
@@ -18,6 +18,7 @@
InputConversion,
InputConversionImage,
InputConversionTable,
+ LSTMLayer,
Layer,
MaxPooling2DLayer,
NeuralNetworkClassifier,
@@ -44,7 +45,7 @@ def test_should_raise_if_epoch_size_out_of_bounds(self, epoch_size: int) -> None
match=rf"epoch_size \(={epoch_size}\) is not inside \[1, \u221e\)\.",
):
NeuralNetworkClassifier(
- InputConversionTable(["b"], "a"),
+ InputConversionTable(),
[ForwardLayer(1, 1)],
OutputConversionTable(),
).fit(
@@ -65,7 +66,7 @@ def test_should_raise_if_batch_size_out_of_bounds(self, batch_size: int) -> None
match=rf"batch_size \(={batch_size}\) is not inside \[1, \u221e\)\.",
):
NeuralNetworkClassifier(
- InputConversionTable(["b"], "a"),
+ InputConversionTable(),
[ForwardLayer(input_size=1, output_size=1)],
OutputConversionTable(),
).fit(
@@ -75,7 +76,7 @@ def test_should_raise_if_batch_size_out_of_bounds(self, batch_size: int) -> None
def test_should_raise_if_fit_function_returns_wrong_datatype(self) -> None:
fitted_model = NeuralNetworkClassifier(
- InputConversionTable(["b"], "a"),
+ InputConversionTable(),
[ForwardLayer(input_size=1, output_size=8), ForwardLayer(output_size=1)],
OutputConversionTable(),
).fit(
@@ -93,7 +94,7 @@ def test_should_raise_if_fit_function_returns_wrong_datatype(self) -> None:
)
def test_should_raise_if_predict_function_returns_wrong_datatype(self, batch_size: int) -> None:
fitted_model = NeuralNetworkClassifier(
- InputConversionTable(["b"], "a"),
+ InputConversionTable(),
[ForwardLayer(input_size=1, output_size=8), ForwardLayer(output_size=1)],
OutputConversionTable(),
).fit(
@@ -116,20 +117,28 @@ def test_should_raise_if_predict_function_returns_wrong_datatype_for_multiclass_
batch_size: int,
) -> None:
fitted_model = NeuralNetworkClassifier(
- InputConversionTable(["b"], "a"),
+ InputConversionTable(),
[ForwardLayer(input_size=1, output_size=8), ForwardLayer(output_size=3)],
OutputConversionTable(),
).fit(
Table.from_dict({"a": [0, 1, 2], "b": [0, 15, 51]}).to_tabular_dataset("a"),
batch_size=batch_size,
)
+ NeuralNetworkClassifier(
+ InputConversionTable(),
+ [ForwardLayer(input_size=1, output_size=8), LSTMLayer(output_size=3)],
+ OutputConversionTable(),
+ ).fit(
+ Table.from_dict({"a": [0, 1, 2], "b": [0, 15, 51]}).to_tabular_dataset("a"),
+ batch_size=batch_size,
+ )
predictions = fitted_model.predict(Table.from_dict({"b": [1, 4, 124]}))
assert isinstance(predictions, TabularDataset)
def test_should_raise_if_model_has_not_been_fitted(self) -> None:
with pytest.raises(ModelNotFittedError, match="The model has not been fitted yet."):
NeuralNetworkClassifier(
- InputConversionTable(["b"], "a"),
+ InputConversionTable(),
[ForwardLayer(input_size=1, output_size=1)],
OutputConversionTable(),
).predict(
@@ -138,31 +147,51 @@ def test_should_raise_if_model_has_not_been_fitted(self) -> None:
def test_should_raise_if_is_fitted_is_set_correctly_for_binary_classification(self) -> None:
model = NeuralNetworkClassifier(
- InputConversionTable(["b"], "a"),
+ InputConversionTable(),
[ForwardLayer(input_size=1, output_size=1)],
OutputConversionTable(),
)
+ model_2 = NeuralNetworkClassifier(
+ InputConversionTable(),
+ [LSTMLayer(input_size=1, output_size=1)],
+ OutputConversionTable(),
+ )
assert not model.is_fitted
+ assert not model_2.is_fitted
model = model.fit(
Table.from_dict({"a": [1], "b": [0]}).to_tabular_dataset("a"),
)
+ model_2 = model_2.fit(
+ Table.from_dict({"a": [1], "b": [0]}).to_tabular_dataset("a"),
+ )
assert model.is_fitted
+ assert model_2.is_fitted
def test_should_raise_if_is_fitted_is_set_correctly_for_multiclass_classification(self) -> None:
model = NeuralNetworkClassifier(
- InputConversionTable(["b"], "a"),
+ InputConversionTable(),
[ForwardLayer(input_size=1, output_size=1), ForwardLayer(output_size=3)],
OutputConversionTable(),
)
+ model_2 = NeuralNetworkClassifier(
+ InputConversionTable(),
+ [ForwardLayer(input_size=1, output_size=1), LSTMLayer(output_size=3)],
+ OutputConversionTable(),
+ )
assert not model.is_fitted
+ assert not model_2.is_fitted
model = model.fit(
Table.from_dict({"a": [1, 0, 2], "b": [0, 15, 5]}).to_tabular_dataset("a"),
)
+ model_2 = model_2.fit(
+ Table.from_dict({"a": [1, 0, 2], "b": [0, 15, 5]}).to_tabular_dataset("a"),
+ )
assert model.is_fitted
+ assert model_2.is_fitted
def test_should_raise_if_test_features_mismatch(self) -> None:
model = NeuralNetworkClassifier(
- InputConversionTable(["b"], "a"),
+ InputConversionTable(),
[ForwardLayer(input_size=1, output_size=1), ForwardLayer(output_size=3)],
OutputConversionTable(),
)
@@ -179,21 +208,22 @@ def test_should_raise_if_test_features_mismatch(self) -> None:
def test_should_raise_if_train_features_mismatch(self) -> None:
model = NeuralNetworkClassifier(
- InputConversionTable(["b"], "a"),
- [ForwardLayer(input_size=1, output_size=1), ForwardLayer(output_size=3)],
+ InputConversionTable(),
+ [ForwardLayer(input_size=1, output_size=1), ForwardLayer(output_size=1)],
OutputConversionTable(),
)
with pytest.raises(
FeatureDataMismatchError,
match="The features in the given table do not match with the specified feature columns names of the neural network.",
):
- model.fit(
- Table.from_dict({"a": [1, 0, 2], "b": [0, 15, 5]}).to_tabular_dataset("b"),
+ learned_model = model.fit(
+ Table.from_dict({"a": [0.1, 0, 0.2], "b": [0, 0.15, 0.5]}).to_tabular_dataset("b"),
)
+ learned_model.fit(Table.from_dict({"k": [0.1, 0, 0.2], "l": [0, 0.15, 0.5]}).to_tabular_dataset("k"))
def test_should_raise_if_table_size_and_input_size_mismatch(self) -> None:
model = NeuralNetworkClassifier(
- InputConversionTable(["b", "c"], "a"),
+ InputConversionTable(),
[ForwardLayer(input_size=1, output_size=1), ForwardLayer(output_size=3)],
OutputConversionTable(),
)
@@ -206,7 +236,7 @@ def test_should_raise_if_table_size_and_input_size_mismatch(self) -> None:
def test_should_raise_if_fit_doesnt_batch_callback(self) -> None:
model = NeuralNetworkClassifier(
- InputConversionTable(["b"], "a"),
+ InputConversionTable(),
[ForwardLayer(input_size=1, output_size=1)],
OutputConversionTable(),
)
@@ -228,7 +258,7 @@ def callback_was_called(self) -> bool:
def test_should_raise_if_fit_doesnt_epoch_callback(self) -> None:
model = NeuralNetworkClassifier(
- InputConversionTable(["b"], "a"),
+ InputConversionTable(),
[ForwardLayer(input_size=1, output_size=1)],
OutputConversionTable(),
)
@@ -252,49 +282,49 @@ def callback_was_called(self) -> bool:
("input_conversion", "layers", "output_conversion", "error_msg"),
[
(
- InputConversionTable([], ""),
+ InputConversionTable(),
[FlattenLayer()],
OutputConversionImageToTable(),
r"The defined model uses an output conversion for images but no input conversion for images.",
),
(
- InputConversionTable([], ""),
+ InputConversionTable(),
[FlattenLayer()],
OutputConversionImageToColumn(),
r"The defined model uses an output conversion for images but no input conversion for images.",
),
(
- InputConversionTable([], ""),
+ InputConversionTable(),
[FlattenLayer()],
OutputConversionImageToImage(),
r"A NeuralNetworkClassifier cannot be used with images as output.",
),
(
- InputConversionTable([], ""),
+ InputConversionTable(),
[Convolutional2DLayer(1, 1)],
OutputConversionTable(),
r"You cannot use a 2-dimensional layer with 1-dimensional data.",
),
(
- InputConversionTable([], ""),
+ InputConversionTable(),
[ConvolutionalTranspose2DLayer(1, 1)],
OutputConversionTable(),
r"You cannot use a 2-dimensional layer with 1-dimensional data.",
),
(
- InputConversionTable([], ""),
+ InputConversionTable(),
[MaxPooling2DLayer(1)],
OutputConversionTable(),
r"You cannot use a 2-dimensional layer with 1-dimensional data.",
),
(
- InputConversionTable([], ""),
+ InputConversionTable(),
[AvgPooling2DLayer(1)],
OutputConversionTable(),
r"You cannot use a 2-dimensional layer with 1-dimensional data.",
),
(
- InputConversionTable([], ""),
+ InputConversionTable(),
[FlattenLayer()],
OutputConversionTable(),
r"You cannot use a 2-dimensional layer with 1-dimensional data.",
@@ -464,7 +494,7 @@ def test_should_raise_if_epoch_size_out_of_bounds(self, epoch_size: int) -> None
match=rf"epoch_size \(={epoch_size}\) is not inside \[1, \u221e\)\.",
):
NeuralNetworkRegressor(
- InputConversionTable(["b"], "a"),
+ InputConversionTable(),
[ForwardLayer(input_size=1, output_size=1)],
OutputConversionTable(),
).fit(
@@ -485,7 +515,7 @@ def test_should_raise_if_batch_size_out_of_bounds(self, batch_size: int) -> None
match=rf"batch_size \(={batch_size}\) is not inside \[1, \u221e\)\.",
):
NeuralNetworkRegressor(
- InputConversionTable(["b"], "a"),
+ InputConversionTable(),
[ForwardLayer(input_size=1, output_size=1)],
OutputConversionTable(),
).fit(
@@ -503,7 +533,7 @@ def test_should_raise_if_batch_size_out_of_bounds(self, batch_size: int) -> None
)
def test_should_raise_if_fit_function_returns_wrong_datatype(self, batch_size: int) -> None:
fitted_model = NeuralNetworkRegressor(
- InputConversionTable(["b"], "a"),
+ InputConversionTable(),
[ForwardLayer(input_size=1, output_size=1)],
OutputConversionTable(),
).fit(
@@ -522,7 +552,7 @@ def test_should_raise_if_fit_function_returns_wrong_datatype(self, batch_size: i
)
def test_should_raise_if_predict_function_returns_wrong_datatype(self, batch_size: int) -> None:
fitted_model = NeuralNetworkRegressor(
- InputConversionTable(["b"], "a"),
+ InputConversionTable(),
[ForwardLayer(input_size=1, output_size=1)],
OutputConversionTable(),
).fit(
@@ -535,7 +565,7 @@ def test_should_raise_if_predict_function_returns_wrong_datatype(self, batch_siz
def test_should_raise_if_model_has_not_been_fitted(self) -> None:
with pytest.raises(ModelNotFittedError, match="The model has not been fitted yet."):
NeuralNetworkRegressor(
- InputConversionTable(["b"], "a"),
+ InputConversionTable(),
[ForwardLayer(input_size=1, output_size=1)],
OutputConversionTable(),
).predict(
@@ -544,7 +574,7 @@ def test_should_raise_if_model_has_not_been_fitted(self) -> None:
def test_should_raise_if_is_fitted_is_set_correctly(self) -> None:
model = NeuralNetworkRegressor(
- InputConversionTable(["b"], "a"),
+ InputConversionTable(),
[ForwardLayer(input_size=1, output_size=1)],
OutputConversionTable(),
)
@@ -556,7 +586,7 @@ def test_should_raise_if_is_fitted_is_set_correctly(self) -> None:
def test_should_raise_if_test_features_mismatch(self) -> None:
model = NeuralNetworkRegressor(
- InputConversionTable(["b"], "a"),
+ InputConversionTable(),
[ForwardLayer(input_size=1, output_size=1)],
OutputConversionTable(),
)
@@ -573,7 +603,7 @@ def test_should_raise_if_test_features_mismatch(self) -> None:
def test_should_raise_if_train_features_mismatch(self) -> None:
model = NeuralNetworkRegressor(
- InputConversionTable(["b"], "a"),
+ InputConversionTable(),
[ForwardLayer(input_size=1, output_size=1)],
OutputConversionTable(),
)
@@ -581,13 +611,16 @@ def test_should_raise_if_train_features_mismatch(self) -> None:
FeatureDataMismatchError,
match="The features in the given table do not match with the specified feature columns names of the neural network.",
):
- model.fit(
+ trained_model = model.fit(
Table.from_dict({"a": [1, 0, 2], "b": [0, 15, 5]}).to_tabular_dataset("b"),
)
+ trained_model.fit(
+ Table.from_dict({"k": [1, 0, 2], "l": [0, 15, 5]}).to_tabular_dataset("l"),
+ )
def test_should_raise_if_table_size_and_input_size_mismatch(self) -> None:
model = NeuralNetworkRegressor(
- InputConversionTable(["b", "c"], "a"),
+ InputConversionTable(),
[ForwardLayer(input_size=1, output_size=1), ForwardLayer(output_size=3)],
OutputConversionTable(),
)
@@ -600,7 +633,7 @@ def test_should_raise_if_table_size_and_input_size_mismatch(self) -> None:
def test_should_raise_if_fit_doesnt_batch_callback(self) -> None:
model = NeuralNetworkRegressor(
- InputConversionTable(["b"], "a"),
+ InputConversionTable(),
[ForwardLayer(input_size=1, output_size=1)],
OutputConversionTable(),
)
@@ -622,7 +655,7 @@ def callback_was_called(self) -> bool:
def test_should_raise_if_fit_doesnt_epoch_callback(self) -> None:
model = NeuralNetworkRegressor(
- InputConversionTable(["b"], "a"),
+ InputConversionTable(),
[ForwardLayer(input_size=1, output_size=1)],
OutputConversionTable(),
)
@@ -646,37 +679,37 @@ def callback_was_called(self) -> bool:
("input_conversion", "layers", "output_conversion", "error_msg"),
[
(
- InputConversionTable([], ""),
+ InputConversionTable(),
[FlattenLayer()],
OutputConversionImageToImage(),
r"The defined model uses an output conversion for images but no input conversion for images.",
),
(
- InputConversionTable([], ""),
+ InputConversionTable(),
[Convolutional2DLayer(1, 1)],
OutputConversionTable(),
r"You cannot use a 2-dimensional layer with 1-dimensional data.",
),
(
- InputConversionTable([], ""),
+ InputConversionTable(),
[ConvolutionalTranspose2DLayer(1, 1)],
OutputConversionTable(),
r"You cannot use a 2-dimensional layer with 1-dimensional data.",
),
(
- InputConversionTable([], ""),
+ InputConversionTable(),
[MaxPooling2DLayer(1)],
OutputConversionTable(),
r"You cannot use a 2-dimensional layer with 1-dimensional data.",
),
(
- InputConversionTable([], ""),
+ InputConversionTable(),
[AvgPooling2DLayer(1)],
OutputConversionTable(),
r"You cannot use a 2-dimensional layer with 1-dimensional data.",
),
(
- InputConversionTable([], ""),
+ InputConversionTable(),
[FlattenLayer()],
OutputConversionTable(),
r"You cannot use a 2-dimensional layer with 1-dimensional data.",
diff --git a/tests/safeds/ml/nn/test_output_conversion_time_series.py b/tests/safeds/ml/nn/test_output_conversion_time_series.py
new file mode 100644
index 000000000..4267c9827
--- /dev/null
+++ b/tests/safeds/ml/nn/test_output_conversion_time_series.py
@@ -0,0 +1,111 @@
+import pytest
+import sys
+from safeds.data.tabular.containers import Table
+from safeds.ml.nn import OutputConversionTimeSeries
+
+
+def test_output_conversion_time_series() -> None:
+ import torch
+
+ with pytest.raises(
+ ValueError,
+ match=r"The window_size is not set. The data can only be converted if the window_size is provided as `int` in the kwargs.",
+ ):
+ ot = OutputConversionTimeSeries()
+ ot._data_conversion(
+ input_data=Table({"a": [1], "c": [1], "b": [1]}).to_time_series_dataset("a", "b"),
+ output_data=torch.Tensor([0]),
+ win=2,
+ kappa=3,
+ )
+
+
+def test_output_conversion_time_series_2() -> None:
+ import torch
+
+ with pytest.raises(
+ ValueError,
+ match=r"The forecast_horizon is not set. The data can only be converted if the forecast_horizon is provided as `int` in the kwargs.",
+ ):
+ ot = OutputConversionTimeSeries()
+ ot._data_conversion(
+ input_data=Table({"a": [1], "c": [1], "b": [1]}).to_time_series_dataset("a", "b"),
+ output_data=torch.Tensor([0]),
+ window_size=2,
+ kappa=3,
+ )
+
+
+class TestEq:
+
+ @pytest.mark.parametrize(
+ ("output_conversion_ts1", "output_conversion_ts2"),
+ [
+ (OutputConversionTimeSeries(), OutputConversionTimeSeries()),
+ (OutputConversionTimeSeries(), OutputConversionTimeSeries()),
+ (OutputConversionTimeSeries(), OutputConversionTimeSeries()),
+ ],
+ )
+ def test_should_be_equal(
+ self,
+ output_conversion_ts1: OutputConversionTimeSeries,
+ output_conversion_ts2: OutputConversionTimeSeries,
+ ) -> None:
+ assert output_conversion_ts1 == output_conversion_ts2
+
+ @pytest.mark.parametrize(
+ ("output_conversion_ts1", "output_conversion_ts2"),
+ [
+ (OutputConversionTimeSeries(), Table()),
+ (OutputConversionTimeSeries("2"), OutputConversionTimeSeries("1")),
+ ],
+ )
+ def test_should_not_be_equal(
+ self,
+ output_conversion_ts1: OutputConversionTimeSeries,
+ output_conversion_ts2: OutputConversionTimeSeries,
+ ) -> None:
+ assert output_conversion_ts1 != output_conversion_ts2
+
+
+class TestHash:
+
+ @pytest.mark.parametrize(
+ ("output_conversion_ts1", "output_conversion_ts2"),
+ [
+ (OutputConversionTimeSeries(), OutputConversionTimeSeries()),
+ (OutputConversionTimeSeries(), OutputConversionTimeSeries()),
+ (OutputConversionTimeSeries(), OutputConversionTimeSeries()),
+ ],
+ )
+ def test_hash_should_be_equal(
+ self,
+ output_conversion_ts1: OutputConversionTimeSeries,
+ output_conversion_ts2: OutputConversionTimeSeries,
+ ) -> None:
+ assert hash(output_conversion_ts1) == hash(output_conversion_ts2)
+
+ def test_hash_should_not_be_equal(self) -> None:
+ output_conversion_ts1 = OutputConversionTimeSeries("1")
+ output_conversion_ts2 = OutputConversionTimeSeries("2")
+ output_conversion_ts3 = OutputConversionTimeSeries("3")
+ assert hash(output_conversion_ts1) != hash(output_conversion_ts3)
+ assert hash(output_conversion_ts2) != hash(output_conversion_ts1)
+ assert hash(output_conversion_ts3) != hash(output_conversion_ts2)
+
+
+class TestSizeOf:
+
+ @pytest.mark.parametrize(
+ "output_conversion_ts",
+ [
+ OutputConversionTimeSeries("1"),
+ OutputConversionTimeSeries("2"),
+ OutputConversionTimeSeries("3"),
+ ],
+ )
+ def test_should_size_be_greater_than_normal_object(
+ self,
+ output_conversion_ts: OutputConversionTimeSeries,
+ ) -> None:
+ assert sys.getsizeof(output_conversion_ts) > sys.getsizeof(object())
diff --git a/tests/safeds/ml/nn/test_table_conversion.py b/tests/safeds/ml/nn/test_table_conversion.py
new file mode 100644
index 000000000..d0bee7b33
--- /dev/null
+++ b/tests/safeds/ml/nn/test_table_conversion.py
@@ -0,0 +1,10 @@
+from safeds.data.labeled.containers import TabularDataset
+from safeds.ml.nn import (
+ InputConversionTable,
+)
+
+
+def test_should_raise_if_is_fitted_is_set_correctly_lstm() -> None:
+ it = InputConversionTable()
+ it._feature_names = ["b"]
+ assert it._is_fit_data_valid(TabularDataset({"a": [1], "b": [1]}, "a"))