From c8113998c0dd2149cf45d83ad5400501d8066a3d Mon Sep 17 00:00:00 2001 From: Simon Date: Fri, 16 Aug 2024 03:16:13 +0200 Subject: [PATCH] do some stuff here and there --- src/safeds/ml/nn/_model.py | 106 ++++++++++++++++++++----------------- 1 file changed, 57 insertions(+), 49 deletions(-) diff --git a/src/safeds/ml/nn/_model.py b/src/safeds/ml/nn/_model.py index 24e895ff3..4c26bad1a 100644 --- a/src/safeds/ml/nn/_model.py +++ b/src/safeds/ml/nn/_model.py @@ -315,10 +315,8 @@ def fit_by_exhaustive_search( if not self._contains_choices(): raise FittingWithoutChoiceError - #if isinstance(train_data, TimeSeriesDataset): - # raise LearningError("RNN-Hyperparameter optimization is currently not supported.") # pragma: no cover if isinstance(train_data, ImageDataset): - raise LearningError("CNN-Hyperparameter optimization is currently not supported.") # pragma: no cover + raise LearningError("Hyperparameter optimization is currently not supported for CNN Regression Tasks.") # pragma: no cover _check_bounds("epoch_size", epoch_size, lower_bound=_ClosedBound(1)) _check_bounds("batch_size", batch_size, lower_bound=_ClosedBound(1)) @@ -328,12 +326,11 @@ def fit_by_exhaustive_search( if isinstance(train_data, TabularDataset): (train_set, test_set) = self._data_split_table(train_data) - - elif isinstance(train_data, TimeSeriesDataset): + #elif isinstance(train_data, TimeSeriesDataset): + else: # train_data is TimeSeriesDataset (train_set, test_set) = self._data_split_time_series(train_data) - else: # train_data is ImageDataset - (train_set, test_set) = self._data_split_image(train_data) - pass + #else: # train_data is ImageDataset + # (train_set, test_set) = self._data_split_image(train_data) with ProcessPoolExecutor(max_workers=len(list_of_models), mp_context=mp.get_context("spawn")) as executor: futures = [] @@ -346,13 +343,10 @@ def fit_by_exhaustive_search( if isinstance(train_data, TabularDataset): return self._get_best_fnn_model(list_of_fitted_models, test_set, optimization_metric) - elif isinstance(train_data, TimeSeriesDataset): + #elif isinstance(train_data, TimeSeriesDataset): + else: #train_data is TimeSeriesDataset return self._get_best_rnn_model(list_of_fitted_models, train_set, test_set, optimization_metric) - else: - # Image Cross Validation - test_data = train_data - pass - + #else: # Image Data Splitting necessary def _data_split_table(self, data: TabularDataset) -> (TabularDataset, TabularDataset): [train_split, test_split] = data.to_table().split_rows(0.75) @@ -387,39 +381,47 @@ def _get_best_fnn_model( match optimization_metric: case "mean_squared_error": best_metric_value = RegressionMetrics.mean_squared_error( - predicted=fitted_model.predict(test_features), expected=test_target) # type: ignore[arg-type] + predicted=fitted_model.predict(test_features), + expected=test_target) # type: ignore[arg-type] case "mean_absolute_error": best_metric_value = RegressionMetrics.mean_absolute_error( - predicted=fitted_model.predict(test_features), expected=test_target) # type: ignore[arg-type] + predicted=fitted_model.predict(test_features), + expected=test_target) # type: ignore[arg-type] case "median_absolute_deviation": best_metric_value = RegressionMetrics.median_absolute_deviation( - predicted=fitted_model.predict(test_features), expected=test_target) # type: ignore[arg-type] + predicted=fitted_model.predict(test_features), + expected=test_target) # type: ignore[arg-type] case "coefficient_of_determination": best_metric_value = RegressionMetrics.coefficient_of_determination( - predicted=fitted_model.predict(test_features), expected=test_target) # type: ignore[arg-type] + predicted=fitted_model.predict(test_features), + expected=test_target) # type: ignore[arg-type] else: match optimization_metric: case "mean_squared_error": error_of_fitted_model = RegressionMetrics.mean_squared_error( - predicted=fitted_model.predict(test_features), expected=test_target) # type: ignore[arg-type] + predicted=fitted_model.predict(test_features), + expected=test_target) # type: ignore[arg-type] if error_of_fitted_model < best_metric_value: best_model = fitted_model # pragma: no cover best_metric_value = error_of_fitted_model # pragma: no cover case "mean_absolute_error": error_of_fitted_model = RegressionMetrics.mean_absolute_error( - predicted=fitted_model.predict(test_features), expected=test_target) # type: ignore[arg-type] + predicted=fitted_model.predict(test_features), + expected=test_target) # type: ignore[arg-type] if error_of_fitted_model < best_metric_value: best_model = fitted_model # pragma: no cover best_metric_value = error_of_fitted_model # pragma: no cover case "median_absolute_deviation": error_of_fitted_model = RegressionMetrics.median_absolute_deviation( - predicted=fitted_model.predict(test_features), expected=test_target) # type: ignore[arg-type] + predicted=fitted_model.predict(test_features), + expected=test_target) # type: ignore[arg-type] if error_of_fitted_model < best_metric_value: best_model = fitted_model # pragma: no cover best_metric_value = error_of_fitted_model # pragma: no cover case "coefficient_of_determination": error_of_fitted_model = RegressionMetrics.coefficient_of_determination( - predicted=fitted_model.predict(test_features), expected=test_target) # type: ignore[arg-type] + predicted=fitted_model.predict(test_features), + expected=test_target) # type: ignore[arg-type] if error_of_fitted_model > best_metric_value: best_model = fitted_model # pragma: no cover best_metric_value = error_of_fitted_model # pragma: no cover @@ -458,7 +460,8 @@ def _get_best_rnn_model( expected_values = [] for i in range(size - (train_data.forecast_horizon + train_data.window_size)): if train_data.continuous: - label = test_target[i + train_data.window_size: i + train_data.window_size + train_data.forecast_horizon] + label = test_target[ + i + train_data.window_size: i + train_data.window_size + train_data.forecast_horizon] else: label = test_target[i + train_data.window_size + train_data.forecast_horizon] expected_values.append(label) @@ -518,8 +521,8 @@ def _get_best_rnn_model( best_model._is_fitted = True return best_model - def _data_split_image(self, train_data: ImageDataset) -> (ImageDataset, ImageDataset): - return train_data.split(0.75) + #def _data_split_image(self, train_data: ImageDataset) -> (ImageDataset, ImageDataset): + # return train_data.split(0.75) def _get_models_for_all_choices(self) -> list[Self]: all_possible_layer_combinations: list[list] = [[]] @@ -798,7 +801,8 @@ def fit( raise FittingWithChoiceError if isinstance(train_data, TimeSeriesDataset) and train_data.continuous: - raise NotImplementedError("Continuous Predictions are currently not supported for Time Series Classification.") + raise NotImplementedError( + "Continuous Predictions are currently not supported for Time Series Classification.") if not self._input_conversion._is_fit_data_valid(train_data): raise FeatureDataMismatchError @@ -895,7 +899,7 @@ def fit_by_exhaustive_search( FittingWithoutChoiceError When calling this method on a model without hyperparameter choices. LearningError - If the training data contains invalid values or if the training failed. Currently raised, when calling this on RNNs or CNNs as well. + If the training data contains invalid values or if the training failed. """ _init_default_device() @@ -904,8 +908,8 @@ def fit_by_exhaustive_search( if isinstance(train_data, TimeSeriesDataset) and train_data.continuous: raise NotImplementedError("Continuous Predictions are currently not supported for Time Series Classification.") - #if isinstance(train_data, ImageDataset): - # raise LearningError("CNN-Hyperparameter optimization is currently not supported.") # pragma: no cover + if isinstance(train_data, ImageDataset) and isinstance(self._input_conversion, InputConversionImageToImage): + raise NotImplementedError("Hyperparameter Optimization for Images as Output is currently not supported.") _check_bounds("epoch_size", epoch_size, lower_bound=_ClosedBound(1)) _check_bounds("batch_size", batch_size, lower_bound=_ClosedBound(1)) @@ -933,15 +937,15 @@ def fit_by_exhaustive_search( if isinstance(train_data, TabularDataset): return self._get_best_fnn_model(list_of_fitted_models, test_set, optimization_metric, positive_class) elif isinstance(train_data, TimeSeriesDataset): - return self._get_best_rnn_model(list_of_fitted_models, train_set, test_set, optimization_metric, positive_class) + return self._get_best_rnn_model(list_of_fitted_models, train_set, test_set, optimization_metric, + positive_class) else: if isinstance(self._input_conversion, InputConversionImageToColumn): - return self._get_best_cnn_model_column(list_of_fitted_models, train_set, optimization_metric, positive_class) - elif isinstance(self._input_conversion, InputConversionImageToTable): - return self._get_best_cnn_model_table(list_of_fitted_models, train_set, optimization_metric, positive_class) - else: # ImageToImage - raise NotImplementedError("Hyperparameter Optimization for Image to Image is currently not supported.") - + return self._get_best_cnn_model_column(list_of_fitted_models, train_set, optimization_metric, + positive_class) + else: # ImageToTable + return self._get_best_cnn_model_table(list_of_fitted_models, train_set, optimization_metric, + positive_class) def _data_split_table(self, data: TabularDataset) -> (TabularDataset, TabularDataset): [train_split, test_split] = data.to_table().split_rows(0.75) train_data = train_split.to_tabular_dataset( @@ -952,7 +956,6 @@ def _data_split_table(self, data: TabularDataset) -> (TabularDataset, TabularDat target_name=train_data.target.name, extra_names=train_data.extras.column_names, ) - #target_col = train_data.target return (train_data, test_data) def _data_split_time_series(self, data: TimeSeriesDataset) -> (TimeSeriesDataset, Table): @@ -985,25 +988,29 @@ def _get_best_fnn_model( best_model = fitted_model match optimization_metric: case "accuracy": - best_metric_value = ClassificationMetrics.accuracy(predicted=fitted_model.predict(test_features), - expected=test_target) # type: ignore[arg-type] + best_metric_value = ClassificationMetrics.accuracy( + predicted=fitted_model.predict(test_features), + expected=test_target) # type: ignore[arg-type] case "precision": - best_metric_value = ClassificationMetrics.precision(predicted=fitted_model.predict(test_features), - expected=test_target, - positive_class=positive_class) # type: ignore[arg-type] + best_metric_value = ClassificationMetrics.precision( + predicted=fitted_model.predict(test_features), + expected=test_target, + positive_class=positive_class) # type: ignore[arg-type] case "recall": best_metric_value = ClassificationMetrics.recall(predicted=fitted_model.predict(test_features), expected=test_target, positive_class=positive_class) # type: ignore[arg-type] case "f1_score": - best_metric_value = ClassificationMetrics.f1_score(predicted=fitted_model.predict(test_features), - expected=test_target, - positive_class=positive_class) # type: ignore[arg-type] + best_metric_value = ClassificationMetrics.f1_score( + predicted=fitted_model.predict(test_features), + expected=test_target, + positive_class=positive_class) # type: ignore[arg-type] else: match optimization_metric: case "accuracy": error_of_fitted_model = ClassificationMetrics.accuracy( - predicted=fitted_model.predict(test_features), expected=test_target) # type: ignore[arg-type] + predicted=fitted_model.predict(test_features), + expected=test_target) # type: ignore[arg-type] if error_of_fitted_model > best_metric_value: best_model = fitted_model # pragma: no cover best_metric_value = error_of_fitted_model # pragma: no cover @@ -1015,9 +1022,10 @@ def _get_best_fnn_model( best_model = fitted_model # pragma: no cover best_metric_value = error_of_fitted_model # pragma: no cover case "recall": - error_of_fitted_model = ClassificationMetrics.recall(predicted=fitted_model.predict(test_features), - expected=test_target, - positive_class=positive_class) # type: ignore[arg-type] + error_of_fitted_model = ClassificationMetrics.recall( + predicted=fitted_model.predict(test_features), + expected=test_target, + positive_class=positive_class) # type: ignore[arg-type] if error_of_fitted_model > best_metric_value: best_model = fitted_model # pragma: no cover best_metric_value = error_of_fitted_model # pragma: no cover