From deecedaa0dbea89a26012f2ec57241e968f6cbd5 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Wed, 21 Jan 2026 06:25:05 +0000 Subject: [PATCH 1/4] Initial plan From 79410bbf4df3276a047008fa6d30dd452adb32e2 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Wed, 21 Jan 2026 06:40:23 +0000 Subject: [PATCH 2/4] Add multioutput_train_size parameter for manual validation set specification in multi-output tasks Co-authored-by: thinkall <3197038+thinkall@users.noreply.github.com> --- flaml/automl/automl.py | 47 ++++++++++++++++++++++++++++++++++ test/automl/test_regression.py | 35 +++++++++++++++++++++++++ 2 files changed, 82 insertions(+) diff --git a/flaml/automl/automl.py b/flaml/automl/automl.py index 1bfdc2c166..e247304c74 100644 --- a/flaml/automl/automl.py +++ b/flaml/automl/automl.py @@ -361,6 +361,11 @@ def custom_metric( } ``` mlflow_logging: boolean, default=True | Whether to log the training results to mlflow. Not valid if mlflow is not installed. + multioutput_train_size: int, float or None, default=None | For multi-output regression tasks with + "holdout" evaluation, allows manual specification of validation set by concatenating training and + validation data and specifying where to split. If int, represents the number of samples in the + training set. If float (between 0.0 and 1.0), represents the proportion of the dataset to include + in the training set. If None, no split is performed. Only used when X_val and y_val are not provided. """ if ERROR: @@ -419,6 +424,7 @@ def custom_metric( settings["custom_hp"] = settings.get("custom_hp", {}) settings["skip_transform"] = settings.get("skip_transform", False) settings["mlflow_logging"] = settings.get("mlflow_logging", True) + settings["multioutput_train_size"] = settings.get("multioutput_train_size", None) self._estimator_type = "classifier" if settings["task"] in CLASSIFICATION else "regressor" self.best_run_id = None @@ -1720,6 +1726,28 @@ def metric_constraints(self) -> list: """ return self._metric_constraints + def _train_val_split(self, X, y, train_size): + """Split concatenated training and validation data. + + Args: + X: Combined training and validation features + y: Combined training and validation labels + train_size: int or float - if int, number of samples for training set; + if float, proportion of samples for training set + + Returns: + X_train, X_val, y_train, y_val + """ + if isinstance(train_size, float): + train_size = int(len(X) * train_size) + + X_train = X[:train_size] + X_val = X[train_size:] + y_train = y[:train_size] + y_val = y[train_size:] + + return X_train, X_val, y_train, y_val + def _prepare_data(self, eval_method, split_ratio, n_splits): self._state.task.prepare_data( self._state, @@ -1793,6 +1821,7 @@ def fit( mlflow_logging=None, fit_kwargs_by_estimator=None, mlflow_exp_name=None, + multioutput_train_size=None, **fit_kwargs, ): """Find a model for a given task. @@ -2110,6 +2139,11 @@ def cv_score_agg_func(val_loss_folds, log_metrics_folds): } } ``` + multioutput_train_size: int, float or None, default=None | For multi-output regression tasks with + "holdout" evaluation, allows manual specification of validation set by concatenating training and + validation data and specifying where to split. If int, represents the number of samples in the + training set. If float (between 0.0 and 1.0), represents the proportion of the dataset to include + in the training set. If None, no split is performed. Only used when X_val and y_val are not provided. **fit_kwargs: Other key word arguments to pass to fit() function of the searched learners, such as sample_weight. Below are a few examples of @@ -2312,6 +2346,19 @@ def cv_score_agg_func(val_loss_folds, log_metrics_folds): self.mlflow_integration.only_history = True except KeyError: logger.info("Not in Fabric, Skipped") + + # Handle multioutput_train_size parameter + multioutput_train_size = ( + self._settings.get("multioutput_train_size") if multioutput_train_size is None else multioutput_train_size + ) + if multioutput_train_size is not None and X_val is None and y_val is None: + # Split the concatenated training data into train and validation sets + X_train, X_val, y_train, y_val = self._train_val_split(X_train, y_train, multioutput_train_size) + logger.info( + f"Split data using multioutput_train_size={multioutput_train_size}: " + f"train size={len(X_train)}, val size={len(X_val)}" + ) + task.validate_data( self, self._state, diff --git a/test/automl/test_regression.py b/test/automl/test_regression.py index c451618246..f3d6900fc4 100644 --- a/test/automl/test_regression.py +++ b/test/automl/test_regression.py @@ -244,6 +244,41 @@ def test_multioutput(): print(model.predict(X_test)) +def test_multioutput_train_size(): + """Test multioutput_train_size parameter for manual validation set specification.""" + from sklearn.multioutput import MultiOutputRegressor + + # create multi-output regression data + X, y = make_regression(n_samples=100, n_features=10, n_targets=3, random_state=42) + + # Concatenate what would be training and validation data + # Simulate having 70 samples for training and 30 for validation + train_size = 70 + + # train the model using multioutput_train_size + model = MultiOutputRegressor( + AutoML(task="regression", time_budget=1, eval_method="holdout", multioutput_train_size=train_size) + ) + model.fit(X, y) + + # predict on a subset + predictions = model.predict(X[:10]) + + # Verify predictions have correct shape + assert predictions.shape == (10, 3), f"Expected shape (10, 3), got {predictions.shape}" + print(f"Predictions shape: {predictions.shape}") + print(f"Sample predictions:\n{predictions[:3]}") + + # Test with float train_size (proportion) + model2 = MultiOutputRegressor( + AutoML(task="regression", time_budget=1, eval_method="holdout", multioutput_train_size=0.7) + ) + model2.fit(X, y) + predictions2 = model2.predict(X[:10]) + assert predictions2.shape == (10, 3), f"Expected shape (10, 3), got {predictions2.shape}" + print(f"Model with float train_size also works correctly") + + @pytest.mark.parametrize( "estimator", [ From cd097f3d199bb42fe7d9ab395c6ee81d43c3d3f8 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Wed, 21 Jan 2026 06:42:59 +0000 Subject: [PATCH 3/4] Add validation and warnings for multioutput_train_size parameter Co-authored-by: thinkall <3197038+thinkall@users.noreply.github.com> --- flaml/automl/automl.py | 49 +++++++++++++++++++++++++++++++++++------- 1 file changed, 41 insertions(+), 8 deletions(-) diff --git a/flaml/automl/automl.py b/flaml/automl/automl.py index e247304c74..94ab22c466 100644 --- a/flaml/automl/automl.py +++ b/flaml/automl/automl.py @@ -1738,8 +1738,30 @@ def _train_val_split(self, X, y, train_size): Returns: X_train, X_val, y_train, y_val """ + n_samples = len(X) + + # Validate train_size parameter if isinstance(train_size, float): - train_size = int(len(X) * train_size) + if not 0.0 < train_size < 1.0: + raise ValueError( + f"train_size as a float must be between 0.0 and 1.0, got {train_size}" + ) + train_size = int(n_samples * train_size) + elif isinstance(train_size, int): + if train_size <= 0 or train_size >= n_samples: + raise ValueError( + f"train_size as an integer must be between 1 and {n_samples - 1}, got {train_size}" + ) + else: + raise TypeError( + f"train_size must be int or float, got {type(train_size).__name__}" + ) + + # Check we have at least one sample for validation + if train_size >= n_samples: + raise ValueError( + f"train_size ({train_size}) must be less than the number of samples ({n_samples})" + ) X_train = X[:train_size] X_val = X[train_size:] @@ -2351,13 +2373,24 @@ def cv_score_agg_func(val_loss_folds, log_metrics_folds): multioutput_train_size = ( self._settings.get("multioutput_train_size") if multioutput_train_size is None else multioutput_train_size ) - if multioutput_train_size is not None and X_val is None and y_val is None: - # Split the concatenated training data into train and validation sets - X_train, X_val, y_train, y_val = self._train_val_split(X_train, y_train, multioutput_train_size) - logger.info( - f"Split data using multioutput_train_size={multioutput_train_size}: " - f"train size={len(X_train)}, val size={len(X_val)}" - ) + if multioutput_train_size is not None: + if X_val is None and y_val is None: + # Warn if not using holdout evaluation + if eval_method not in ["auto", "holdout", None]: + logger.warning( + f"multioutput_train_size is intended for use with 'holdout' evaluation method, " + f"but eval_method={eval_method}. The split may be overridden during data preparation." + ) + # Split the concatenated training data into train and validation sets + X_train, X_val, y_train, y_val = self._train_val_split(X_train, y_train, multioutput_train_size) + logger.info( + f"Split data using multioutput_train_size={multioutput_train_size}: " + f"train size={len(X_train)}, val size={len(X_val)}" + ) + else: + logger.warning( + "multioutput_train_size is ignored because X_val and y_val are already provided." + ) task.validate_data( self, From 2c336d79f954d0613d89dcac937df07d47fa1d3d Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Wed, 21 Jan 2026 07:08:22 +0000 Subject: [PATCH 4/4] Run pre-commit hooks to fix formatting issues Co-authored-by: thinkall <3197038+thinkall@users.noreply.github.com> --- flaml/automl/automl.py | 32 +++++++++++--------------------- test/automl/test_regression.py | 4 ++-- 2 files changed, 13 insertions(+), 23 deletions(-) diff --git a/flaml/automl/automl.py b/flaml/automl/automl.py index 44f449004b..0aaf7f8e5a 100644 --- a/flaml/automl/automl.py +++ b/flaml/automl/automl.py @@ -1783,35 +1783,27 @@ def _train_val_split(self, X, y, train_size): X_train, X_val, y_train, y_val """ n_samples = len(X) - + # Validate train_size parameter if isinstance(train_size, float): if not 0.0 < train_size < 1.0: - raise ValueError( - f"train_size as a float must be between 0.0 and 1.0, got {train_size}" - ) + raise ValueError(f"train_size as a float must be between 0.0 and 1.0, got {train_size}") train_size = int(n_samples * train_size) elif isinstance(train_size, int): if train_size <= 0 or train_size >= n_samples: - raise ValueError( - f"train_size as an integer must be between 1 and {n_samples - 1}, got {train_size}" - ) + raise ValueError(f"train_size as an integer must be between 1 and {n_samples - 1}, got {train_size}") else: - raise TypeError( - f"train_size must be int or float, got {type(train_size).__name__}" - ) - + raise TypeError(f"train_size must be int or float, got {type(train_size).__name__}") + # Check we have at least one sample for validation if train_size >= n_samples: - raise ValueError( - f"train_size ({train_size}) must be less than the number of samples ({n_samples})" - ) - + raise ValueError(f"train_size ({train_size}) must be less than the number of samples ({n_samples})") + X_train = X[:train_size] X_val = X[train_size:] y_train = y[:train_size] y_val = y[train_size:] - + return X_train, X_val, y_train, y_val def _prepare_data(self, eval_method, split_ratio, n_splits): @@ -2412,7 +2404,7 @@ def cv_score_agg_func(val_loss_folds, log_metrics_folds): self.mlflow_integration.only_history = True except KeyError: logger.info("Not in Fabric, Skipped") - + # Handle multioutput_train_size parameter multioutput_train_size = ( self._settings.get("multioutput_train_size") if multioutput_train_size is None else multioutput_train_size @@ -2432,10 +2424,8 @@ def cv_score_agg_func(val_loss_folds, log_metrics_folds): f"train size={len(X_train)}, val size={len(X_val)}" ) else: - logger.warning( - "multioutput_train_size is ignored because X_val and y_val are already provided." - ) - + logger.warning("multioutput_train_size is ignored because X_val and y_val are already provided.") + task.validate_data( self, self._state, diff --git a/test/automl/test_regression.py b/test/automl/test_regression.py index f3d6900fc4..3f05c84015 100644 --- a/test/automl/test_regression.py +++ b/test/automl/test_regression.py @@ -263,7 +263,7 @@ def test_multioutput_train_size(): # predict on a subset predictions = model.predict(X[:10]) - + # Verify predictions have correct shape assert predictions.shape == (10, 3), f"Expected shape (10, 3), got {predictions.shape}" print(f"Predictions shape: {predictions.shape}") @@ -276,7 +276,7 @@ def test_multioutput_train_size(): model2.fit(X, y) predictions2 = model2.predict(X[:10]) assert predictions2.shape == (10, 3), f"Expected shape (10, 3), got {predictions2.shape}" - print(f"Model with float train_size also works correctly") + print("Model with float train_size also works correctly") @pytest.mark.parametrize(