From 205da70352b01896d5bd446b600e447f622cc7a2 Mon Sep 17 00:00:00 2001 From: baraline Date: Sat, 2 Aug 2025 14:37:44 +0200 Subject: [PATCH] Moving functions to utils --- aeon/base/_base_collection.py | 203 +--------------- aeon/base/_base_series.py | 166 +------------ aeon/utils/preprocessing.py | 432 ++++++++++++++++++++++++++++++++++ 3 files changed, 454 insertions(+), 347 deletions(-) create mode 100644 aeon/utils/preprocessing.py diff --git a/aeon/base/_base_collection.py b/aeon/base/_base_collection.py index 60996e7d8a..ff1b856772 100644 --- a/aeon/base/_base_collection.py +++ b/aeon/base/_base_collection.py @@ -17,26 +17,16 @@ class name: BaseCollectionEstimator fitted model/strategy - by convention, any attributes ending in "_" fitted state flag - is_fitted (property) fitted state inspection - check_is_fitted() + """ from abc import abstractmethod -import numpy as np - from aeon.base._base import BaseAeonEstimator -from aeon.utils.conversion import ( - convert_collection, - resolve_equal_length_inner_type, - resolve_unequal_length_inner_type, -) +from aeon.utils.preprocessing import preprocess_collection from aeon.utils.validation.collection import ( - get_n_cases, get_n_channels, get_n_timepoints, - get_type, - has_missing, - is_equal_length, - is_univariate, ) @@ -63,6 +53,7 @@ class BaseCollectionEstimator(BaseAeonEstimator): @abstractmethod def __init__(self): self.metadata_ = {} # metadata/properties of data seen in fit + self._n_jobs = 1 super().__init__() @@ -110,156 +101,17 @@ def _preprocess_collection(self, X, store_metadata=True): >>> X2.shape (10, 1, 20) """ - if isinstance(X, list) and isinstance(X[0], np.ndarray): - X = self._reshape_np_list(X) - - meta = self._check_X(X) - if len(self.metadata_) == 0 and store_metadata: + result = preprocess_collection( + X, + self.get_tags(), + return_metadata=store_metadata, + ) + if store_metadata: + X, meta = result self.metadata_ = meta - - return self._convert_X(X) - - def _check_X(self, X): - """ - Check classifier input X is valid. - - Check if the input data is a compatible type, and that this estimator is - able to handle the data characteristics. - This is done by matching the capabilities of the estimator against the metadata - for X i.e., univariate/multivariate, equal length/unequal length and no missing - values/missing values. - - Parameters - ---------- - X : collection - See aeon.utils.COLLECTIONS_DATA_TYPES for details on aeon supported - data structures. - - Returns - ------- - metadata : dict - Metadata about X, with flags: - metadata["multivariate"] : whether X has more than one channel or not - metadata["missing_values"] : whether X has missing values or not - metadata["unequal_length"] : whether X contains unequal length series. - metadata["n_cases"] : number of cases in X - metadata["n_channels"] : number of channels in X - metadata["n_timepoints"] : number of timepoints in X if equal length, else - None - - Raises - ------ - ValueError - If X is an invalid type or has characteristics that the estimator cannot - handle. - - See Also - -------- - _convert_X : - Function that converts X after it has been checked. - - Examples - -------- - >>> from aeon.testing.mock_estimators import MockClassifierFullTags - >>> from aeon.testing.data_generation import make_example_3d_numpy - >>> clf = MockClassifierFullTags() - >>> X, _ = make_example_3d_numpy(n_channels=3) # X is equal length, multivariate - >>> meta = clf._check_X(X) # Classifier can handle this - """ - # check if X is a valid type - get_type(X) - - metadata = self._get_X_metadata(X) - # Check classifier capabilities for X - allow_multivariate = self.get_tag("capability:multivariate") - allow_missing = self.get_tag("capability:missing_values") - allow_unequal = self.get_tag("capability:unequal_length") - - # Check capabilities vs input - problems = [] - if metadata["missing_values"] and not allow_missing: - problems += ["missing values"] - if metadata["multivariate"] and not allow_multivariate: - problems += ["multivariate series"] - if metadata["unequal_length"] and not allow_unequal: - problems += ["unequal length series"] - - if problems: - # construct error message - problems_and = " and ".join(problems) - msg = ( - f"Data seen by instance of {type(self).__name__} has {problems_and}, " - f"but {type(self).__name__} cannot handle these characteristics. " - ) - raise ValueError(msg) - - return metadata - - def _convert_X(self, X): - """ - Convert X to type defined by tag X_inner_type. - - If the input data is already an allowed type, it is returned unchanged. - - If multiple types are allowed by self, then the best one for the type of input - data is selected. So, for example, if X_inner_tag is ["np-list", "numpy3D"] - and an df-list is passed, it will be converted to numpy3D if the series - are equal length, and np-list if the series are unequal length. - - Parameters - ---------- - X : collection - See aeon.utils.COLLECTIONS_DATA_TYPES for details on aeon supported - data structures. - - Returns - ------- - X : collection - Converted X. A data structure of type self.get_tag("X_inner_type"). - - See Also - -------- - _check_X : - Function that checks X is valid and finds metadata. - - Examples - -------- - >>> from aeon.testing.mock_estimators import MockClassifier - >>> from aeon.testing.data_generation import make_example_3d_numpy_list - >>> from aeon.utils.validation import get_type - >>> clf = MockClassifier() - >>> X, _ = make_example_3d_numpy_list(max_n_timepoints=8) - >>> get_type(X) - 'np-list' - >>> clf.get_tag("X_inner_type") - 'numpy3D' - >>> X2 = clf._convert_X(X) - >>> get_type(X2) - 'numpy3D' - """ - inner_type = self.get_tag("X_inner_type") - if not isinstance(inner_type, list): - inner_type = [inner_type] - input_type = get_type(X) - - # Check if we need to convert X, return if not - if input_type in inner_type: - return X - - if len(self.metadata_) == 0: - metadata = self._get_X_metadata(X) else: - metadata = self.metadata_ - - # Convert X to X_inner_type if possible - # If self can handle more than one internal type, resolve correct conversion - # If unequal, choose data structure that can hold unequal - if metadata["unequal_length"]: - inner_type = resolve_unequal_length_inner_type(inner_type) - else: - inner_type = resolve_equal_length_inner_type(inner_type) - - return convert_collection(X, inner_type) + X = result + return X def _check_shape(self, X): """ @@ -297,34 +149,3 @@ def _check_shape(self, X): "number of channels in train set was ", f"{self.metadata_['n_channels']} but in predict it is {nc}.", ) - - @staticmethod - def _get_X_metadata(X): - # Get and store X meta data. - metadata = {} - metadata["multivariate"] = not is_univariate(X) - metadata["missing_values"] = has_missing(X) - metadata["unequal_length"] = not is_equal_length(X) - metadata["n_cases"] = get_n_cases(X) - metadata["n_channels"] = get_n_channels(X) - metadata["n_timepoints"] = ( - None if metadata["unequal_length"] else get_n_timepoints(X) - ) - return metadata - - @staticmethod - def _reshape_np_list(X): - """Reshape 1D numpy to be 2D.""" - reshape = False - for x in X: - if x.ndim == 1: - reshape = True - break - if reshape: - X2 = [] - for x in X: - if x.ndim == 1: - x = x.reshape(1, -1) - X2.append(x) - return X2 - return X diff --git a/aeon/base/_base_series.py b/aeon/base/_base_series.py index c4fbb5aa30..204dc6c2f0 100644 --- a/aeon/base/_base_series.py +++ b/aeon/base/_base_series.py @@ -33,11 +33,8 @@ from abc import abstractmethod -import numpy as np -import pandas as pd - from aeon.base._base import BaseAeonEstimator -from aeon.utils.data_types import VALID_SERIES_INNER_TYPES +from aeon.utils.preprocessing import preprocess_series class BaseSeriesEstimator(BaseAeonEstimator): @@ -112,159 +109,16 @@ def _preprocess_series(self, X, axis, store_metadata): X: one of aeon.base._base_series.VALID_SERIES_INPUT_TYPES Input time series with data structure of type self.get_tag("X_inner_type"). """ - meta = self._check_X(X, axis) + result = preprocess_series( + X, + axis=axis, + tags=self.get_tags(), + estimator_axis=self.axis, + return_metadata=store_metadata, + ) if store_metadata: + X, meta = result self.metadata_ = meta - return self._convert_X(X, axis) - - def _check_X(self, X, axis: int = 0): - """Check input X is valid. - - Check if the input data is a compatible type, and that this estimator is - able to handle the data characteristics. This is done by matching the - capabilities of the estimator against the metadata for X for - univariate/multivariate and no missing values/missing values. - - Parameters - ---------- - X: one of aeon.base._base_series.VALID_SERIES_INPUT_TYPES - A valid aeon time series data structure. See - aeon.base._base_series.VALID_SERIES_INPUT_TYPES for aeon supported types. - axis: int - The time point axis of the input series if it is 2D. If ``axis==0``, it is - assumed each column is a time series and each row is a time point. i.e. the - shape of the data is ``(n_timepoints,n_channels)``. ``axis==1`` indicates - the time series are in rows, i.e. the shape of the data is - ``(n_channels,n_timepoints)``. - - Returns - ------- - metadata: dict - Metadata about X, with flags: - metadata["multivariate"]: whether X has more than one channel or not - metadata["n_channels"]: number of channels in X - metadata["missing_values"]: whether X has missing values or not - """ - if axis > 1 or axis < 0: - raise ValueError(f"Input axis should be 0 or 1, saw {axis}") - - # Checks: check valid dtype - if isinstance(X, np.ndarray): - if not ( - issubclass(X.dtype.type, np.integer) - or issubclass(X.dtype.type, np.floating) - ): - raise ValueError("dtype for np.ndarray must be float or int") - elif isinstance(X, pd.Series): - if not pd.api.types.is_numeric_dtype(X): - raise ValueError("pd.Series dtype must be numeric") - elif isinstance(X, pd.DataFrame): - if not all(pd.api.types.is_numeric_dtype(X[col]) for col in X.columns): - raise ValueError("pd.DataFrame dtype must be numeric") else: - raise ValueError( - f"Input type of X should be one of {VALID_SERIES_INNER_TYPES}, " - f"saw {type(X)}" - ) - - metadata = {} - - # check if multivariate - channel_idx = 0 if axis == 1 else 1 - if X.ndim > 2: - raise ValueError( - "X must have at most 2 dimensions for multivariate data, optionally 1 " - f"for univarate data. Found {X.ndim} dimensions" - ) - elif X.ndim > 1 and X.shape[channel_idx] > 1: - metadata["multivariate"] = True - else: - metadata["multivariate"] = False - - metadata["n_channels"] = X.shape[channel_idx] if X.ndim > 1 else 1 - - # check if has missing values - if isinstance(X, np.ndarray): - metadata["missing_values"] = np.isnan(X).any() - elif isinstance(X, pd.Series): - metadata["missing_values"] = X.isna().any() - else: - metadata["missing_values"] = X.isna().any().any() - - allow_multivariate = self.get_tag("capability:multivariate") - allow_univariate = self.get_tag("capability:univariate") - allow_missing = self.get_tag("capability:missing_values") - if metadata["missing_values"] and not allow_missing: - raise ValueError( - f"Missing values not supported by {self.__class__.__name__}" - ) - if metadata["multivariate"] and not allow_multivariate: - raise ValueError( - f"Multivariate data not supported by {self.__class__.__name__}" - ) - if not metadata["multivariate"] and not allow_univariate: - raise ValueError( - f"Univariate data not supported by {self.__class__.__name__}" - ) - - return metadata - - def _convert_X(self, X, axis): - """Convert input X to internal estimator datatype. - - Converts input X to the internal data type of the estimator using - self.get_tag("X_inner_type"). 1D numpy arrays are converted to 2D, - and the data will be transposed if the input axis does not match that of the - estimator. - - Attempting to convert to a pd.Series for multivariate data or estimators will - raise an error. - - Parameters - ---------- - X: one of aeon.base._base_series.VALID_SERIES_INPUT_TYPES - A valid aeon time series data structure. See - aeon.base._base_series.VALID_SERIES_INPUT_TYPES for aeon supported types. - axis: int - The time point axis of the input series if it is 2D. If ``axis==0``, it is - assumed each column is a time series and each row is a time point. i.e. the - shape of the data is ``(n_timepoints, n_channels)``. ``axis==1`` indicates - the time series are in rows, i.e. the shape of the data is - ``(n_channels, n_timepoints)``. - - Returns - ------- - X: one of aeon.base._base_series.VALID_SERIES_INPUT_TYPES - Input time series with data structure of type self.get_tag("X_inner_type"). - """ - if axis > 1 or axis < 0: - raise ValueError(f"Input axis should be 0 or 1, saw {axis}") - - inner_type = self.get_tag("X_inner_type") - if not isinstance(inner_type, list): - inner_type = [inner_type] - inner_names = [i.split(".")[-1] for i in inner_type] - - input = type(X).__name__ - if input not in inner_names: - if inner_names[0] == "ndarray": - X = X.to_numpy() - elif inner_names[0] == "DataFrame": - # converting a 1d array will create a 2d array in axis 0 format - transpose = False - if X.ndim == 1 and axis == 1: - transpose = True - X = pd.DataFrame(X) - if transpose: - X = X.T - else: - raise ValueError( - f"Unsupported inner type {inner_names[0]} derived from {inner_type}" - ) - - if X.ndim > 1 and self.axis != axis: - X = X.T - elif X.ndim == 1 and isinstance(X, np.ndarray): - X = X[np.newaxis, :] if self.axis == 1 else X[:, np.newaxis] - + X = result return X diff --git a/aeon/utils/preprocessing.py b/aeon/utils/preprocessing.py new file mode 100644 index 0000000000..ee4200677c --- /dev/null +++ b/aeon/utils/preprocessing.py @@ -0,0 +1,432 @@ +""" +Standalone preprocessing functions for time series data. + +This module contains preprocessing functions that can be used independently +of specific estimator classes. These functions handle validation, metadata +extraction, and format conversion for both single series and collections. +""" + +__maintainer__ = ["TonyBagnall", "MatthewMiddlehurst"] +__all__ = ["preprocess_series", "preprocess_collection"] + +import numpy as np +import pandas as pd + +from aeon.utils.conversion import ( + convert_collection, + resolve_equal_length_inner_type, + resolve_unequal_length_inner_type, +) +from aeon.utils.data_types import VALID_SERIES_INNER_TYPES +from aeon.utils.validation.collection import ( + get_n_cases, + get_n_channels, + get_n_timepoints, + get_type, + has_missing, + is_equal_length, + is_univariate, +) + + +def preprocess_series( + X, + axis: int, + tags: dict, + estimator_axis: int, + return_metadata: bool = True, +): + """Preprocess input X for single time series estimators. + + Checks the characteristics of X, validates that the estimator can handle + the data, stores metadata, and converts X to the specified inner type. + + Parameters + ---------- + X : one of aeon.base._base_series.VALID_SERIES_INPUT_TYPES + A valid aeon time series data structure. See + aeon.base._base_series.VALID_SERIES_INPUT_TYPES for aeon supported types. + axis : int + The time point axis of the input series if it is 2D. If ``axis==0``, it is + assumed each column is a time series and each row is a time point. i.e. the + shape of the data is ``(n_timepoints, n_channels)``. ``axis==1`` indicates + the time series are in rows, i.e. the shape of the data is + ``(n_channels, n_timepoints)``. + tags : dict + Dictionary containing estimator tags and capabilities with keys: + - "capability:univariate": bool + - "capability:multivariate": bool + - "capability:missing_values": bool (optional, defaults to False) + estimator_axis : int + The target axis that the estimator expects. If ``estimator_axis==0``, + output will have shape ``(n_timepoints, n_channels)``. If ``estimator_axis==1``, + output will have shape ``(n_channels, n_timepoints)``. + return_metadata : bool, default=True + Whether to return the metadata dict about X. + + Returns + ------- + X : one of aeon.base._base_series.VALID_SERIES_INPUT_TYPES + Input time series with data structure of type inner_type. + metadata : dict (if return_metadata=True) + Metadata about X, with flags: + - metadata["multivariate"]: whether X has more than one channel or not + - metadata["n_channels"]: number of channels in X + - metadata["missing_values"]: whether X has missing values or not + """ + inner_type = tags.get("X_inner_type") + metadata = _check_series(X, axis, tags) + X_converted = _convert_series(X, axis, inner_type, estimator_axis) + + if return_metadata: + return X_converted, metadata + else: + return X_converted + + +def preprocess_collection(X, tags, return_metadata=True): + """Preprocess input X for collection-based estimators. + + 1. Checks the characteristics of X and validates estimator capabilities + 2. Stores metadata about X if return_metadata is True + 3. Converts X to inner_type if necessary + + Parameters + ---------- + X : collection + See aeon.utils.COLLECTIONS_DATA_TYPES for details on aeon supported + data structures. + tags : dict + Dictionary containing estimator tags and capabilities with keys: + - "capability:univariate": bool + - "capability:multivariate": bool + - "capability:unequal_length": bool + - "capability:missing_values": bool (optional, defaults to False) + - "capability:multithreading": bool (optional, defaults to False) + return_metadata : bool, default=True + Whether to return the metadata dict about X. + + Returns + ------- + X : collection + Processed X. A data structure of type inner_type. + metadata : dict (if return_metadata=True) + Metadata about X, with flags: + - metadata["multivariate"]: whether X has more than one channel or not + - metadata["missing_values"]: whether X has missing values or not + - metadata["unequal_length"]: whether X contains unequal length series + - metadata["n_cases"]: number of cases in X + - metadata["n_channels"]: number of channels in X + - metadata["n_timepoints"]: number of timepoints in X if equal length, else None + + Raises + ------ + ValueError + If X is an invalid type or has characteristics that the estimator cannot + handle. + """ + inner_type = tags.get("X_inner_type") + if isinstance(X, list) and isinstance(X[0], np.ndarray): + X = _reshape_np_list(X) + + metadata = _check_collection(X, tags) + X_converted = _convert_collection_type(X, inner_type, metadata) + + if return_metadata: + return X_converted, metadata + else: + return X_converted + + +def _check_series(X, axis, tags): + """Check input X is valid for series estimators. + + Check if the input data is a compatible type, and that the estimator is + able to handle the data characteristics. This is done by matching the + capabilities of the estimator against the metadata for X for + univariate/multivariate and no missing values/missing values. + + Parameters + ---------- + X : one of aeon.base._base_series.VALID_SERIES_INPUT_TYPES + A valid aeon time series data structure. + axis : int + The time point axis of the input series if it is 2D. + tags : dict + Dictionary containing estimator capabilities. + + Returns + ------- + metadata : dict + Metadata about X, with flags: + - metadata["multivariate"]: whether X has more than one channel or not + - metadata["n_channels"]: number of channels in X + - metadata["missing_values"]: whether X has missing values or not + """ + if axis > 1 or axis < 0: + raise ValueError(f"Input axis should be 0 or 1, saw {axis}") + + # Checks: check valid dtype + if isinstance(X, np.ndarray): + if not ( + issubclass(X.dtype.type, np.integer) + or issubclass(X.dtype.type, np.floating) + ): + raise ValueError("dtype for np.ndarray must be float or int") + elif isinstance(X, pd.Series): + if not pd.api.types.is_numeric_dtype(X): + raise ValueError("pd.Series dtype must be numeric") + elif isinstance(X, pd.DataFrame): + if not all(pd.api.types.is_numeric_dtype(X[col]) for col in X.columns): + raise ValueError("pd.DataFrame dtype must be numeric") + else: + raise ValueError( + f"Input type of X should be one of {VALID_SERIES_INNER_TYPES}, " + f"saw {type(X)}" + ) + + # Validate dimensionality + if X.ndim > 2: + raise ValueError( + "X must have at most 2 dimensions for multivariate data, optionally 1 " + f"for univarate data. Found {X.ndim} dimensions" + ) + + metadata = _get_series_metadata(X, axis) + + # Check capabilities + allow_multivariate = tags.get("capability:multivariate", False) + allow_univariate = tags.get("capability:univariate", True) + allow_missing = tags.get("capability:missing_values", False) + + if metadata["missing_values"] and not allow_missing: + raise ValueError("Missing values not supported by estimator") + if metadata["multivariate"] and not allow_multivariate: + raise ValueError("Multivariate data not supported by estimator") + if not metadata["multivariate"] and not allow_univariate: + raise ValueError("Univariate data not supported by estimator") + + return metadata + + +def _convert_series(X, axis, inner_type, estimator_axis): + """Convert input X to internal estimator datatype. + + Converts input X to the specified internal data type. 1D numpy arrays are + converted to 2D, and the data will be transposed if the input axis does not + match the target axis. + + Parameters + ---------- + X : one of aeon.base._base_series.VALID_SERIES_INPUT_TYPES + A valid aeon time series data structure. + inner_type : str or list of str + The desired internal data type(s). + estimator_axis : int + The target axis that the estimator expects. + + Returns + ------- + X : one of aeon.base._base_series.VALID_SERIES_INPUT_TYPES + Input time series with data structure of type inner_type. + """ + if axis > 1 or axis < 0: + raise ValueError(f"Input axis should be 0 or 1, saw {axis}") + + if not isinstance(inner_type, list): + inner_type = [inner_type] + inner_names = [i.split(".")[-1] for i in inner_type] + + input_type = type(X).__name__ + if input_type not in inner_names: + if inner_names[0] == "ndarray": + X = X.to_numpy() + elif inner_names[0] == "DataFrame": + # converting a 1d array will create a 2d array in axis 0 format + transpose = False + if X.ndim == 1 and axis == 1: + transpose = True + X = pd.DataFrame(X) + if transpose: + X = X.T + else: + raise ValueError( + f"Unsupported inner type {inner_names[0]} derived from {inner_type}" + ) + + if X.ndim > 1 and estimator_axis != axis: + X = X.T + elif X.ndim == 1 and isinstance(X, np.ndarray): + X = X[np.newaxis, :] if estimator_axis == 1 else X[:, np.newaxis] + + return X + + +def _check_collection(X, tags): + """Check collection input X is valid. + + Check if the input data is a compatible type, and that the estimator is + able to handle the data characteristics. + + Parameters + ---------- + X : collection + See aeon.utils.COLLECTIONS_DATA_TYPES for details on aeon supported + data structures. + tags : dict + Dictionary containing estimator capabilities. + + Returns + ------- + metadata : dict + Metadata about X. + + Raises + ------ + ValueError + If X is an invalid type or has characteristics that the estimator cannot + handle. + """ + # check if X is a valid type + get_type(X) + + metadata = _get_collection_metadata(X) + + # Check estimator capabilities for X + allow_multivariate = tags.get("capability:multivariate", False) + allow_missing = tags.get("capability:missing_values", False) + allow_unequal = tags.get("capability:unequal_length", False) + + # Check capabilities vs input + problems = [] + if metadata["missing_values"] and not allow_missing: + problems += ["missing values"] + if metadata["multivariate"] and not allow_multivariate: + problems += ["multivariate series"] + if metadata["unequal_length"] and not allow_unequal: + problems += ["unequal length series"] + + if problems: + # construct error message + problems_and = " and ".join(problems) + msg = ( + f"Data has {problems_and}, but the estimator cannot handle" + f"these characteristics due to having tags : {tags}. " + ) + raise ValueError(msg) + + return metadata + + +def _convert_collection_type(X, inner_type, metadata): + """Convert X to type defined by inner_type. + + If the input data is already an allowed type, it is returned unchanged. + + Parameters + ---------- + X : collection + See aeon.utils.COLLECTIONS_DATA_TYPES for details on aeon supported + data structures. + inner_type : str or list of str + The desired internal data type(s). + metadata : dict + Metadata about X. + + Returns + ------- + X : collection + Converted X. A data structure of type inner_type. + """ + if not isinstance(inner_type, list): + inner_type = [inner_type] + input_type = get_type(X) + + # Check if we need to convert X, return if not + if input_type in inner_type: + return X + + # Convert X to inner_type if possible + # If estimator can handle more than one internal type, resolve correct conversion + # If unequal, choose data structure that can hold unequal + if metadata["unequal_length"]: + inner_type = resolve_unequal_length_inner_type(inner_type) + else: + inner_type = resolve_equal_length_inner_type(inner_type) + + return convert_collection(X, inner_type) + + +def _get_collection_metadata(X): + """Get and store X meta data.""" + metadata = {} + metadata["multivariate"] = not is_univariate(X) + metadata["missing_values"] = has_missing(X) + metadata["unequal_length"] = not is_equal_length(X) + metadata["n_cases"] = get_n_cases(X) + metadata["n_channels"] = get_n_channels(X) + metadata["n_timepoints"] = ( + None if metadata["unequal_length"] else get_n_timepoints(X) + ) + return metadata + + +def _get_series_metadata(X, axis): + """Get and store series metadata. + + Parameters + ---------- + X : one of aeon.base._base_series.VALID_SERIES_INPUT_TYPES + A valid aeon time series data structure. + axis : int + The time point axis of the input series if it is 2D. If ``axis==0``, it is + assumed each column is a time series and each row is a time point. i.e. the + shape of the data is ``(n_timepoints, n_channels)``. ``axis==1`` indicates + the time series are in rows, i.e. the shape of the data is + ``(n_channels, n_timepoints)``. + + Returns + ------- + metadata : dict + Metadata about X, with flags: + - metadata["multivariate"]: whether X has more than one channel or not + - metadata["missing_values"]: whether X has missing values or not + - metadata["n_channels"]: number of channels in X + """ + metadata = {} + + # check if multivariate + channel_idx = 0 if axis == 1 else 1 + if X.ndim > 1 and X.shape[channel_idx] > 1: + metadata["multivariate"] = True + else: + metadata["multivariate"] = False + + metadata["n_channels"] = X.shape[channel_idx] if X.ndim > 1 else 1 + + # check if has missing values + if isinstance(X, np.ndarray): + metadata["missing_values"] = np.isnan(X).any() + elif isinstance(X, pd.Series): + metadata["missing_values"] = X.isna().any() + else: # pd.DataFrame + metadata["missing_values"] = X.isna().any().any() + + return metadata + + +def _reshape_np_list(X): + """Reshape 1D numpy to be 2D.""" + reshape = False + for x in X: + if x.ndim == 1: + reshape = True + break + if reshape: + X2 = [] + for x in X: + if x.ndim == 1: + x = x.reshape(1, -1) + X2.append(x) + return X2 + return X