microsoft · Copilot · Jan 21, 2026 · Jan 21, 2026 · Jan 21, 2026 · Jan 21, 2026
diff --git a/flaml/automl/automl.py b/flaml/automl/automl.py
@@ -361,6 +361,11 @@ def custom_metric(
         }
         ```
             mlflow_logging: boolean, default=True | Whether to log the training results to mlflow. Not valid if mlflow is not installed.
+            multioutput_train_size: int, float or None, default=None | For multi-output regression tasks with
+                "holdout" evaluation, allows manual specification of validation set by concatenating training and
+                validation data and specifying where to split. If int, represents the number of samples in the
+                training set. If float (between 0.0 and 1.0), represents the proportion of the dataset to include
+                in the training set. If None, no split is performed. Only used when X_val and y_val are not provided.
 
         """
         if ERROR:
@@ -419,6 +424,7 @@ def custom_metric(
         settings["custom_hp"] = settings.get("custom_hp", {})
         settings["skip_transform"] = settings.get("skip_transform", False)
         settings["mlflow_logging"] = settings.get("mlflow_logging", True)
+        settings["multioutput_train_size"] = settings.get("multioutput_train_size", None)
 
         self._estimator_type = "classifier" if settings["task"] in CLASSIFICATION else "regressor"
         self.best_run_id = None
@@ -1764,6 +1770,42 @@ def metric_constraints(self) -> list:
         """
         return self._metric_constraints
 
+    def _train_val_split(self, X, y, train_size):
+        """Split concatenated training and validation data.
+
+        Args:
+            X: Combined training and validation features
+            y: Combined training and validation labels
+            train_size: int or float - if int, number of samples for training set;
+                       if float, proportion of samples for training set
+
+        Returns:
+            X_train, X_val, y_train, y_val
+        """
+        n_samples = len(X)
+
+        # Validate train_size parameter
+        if isinstance(train_size, float):
+            if not 0.0 < train_size < 1.0:
+                raise ValueError(f"train_size as a float must be between 0.0 and 1.0, got {train_size}")
+            train_size = int(n_samples * train_size)
+        elif isinstance(train_size, int):
+            if train_size <= 0 or train_size >= n_samples:
+                raise ValueError(f"train_size as an integer must be between 1 and {n_samples - 1}, got {train_size}")
+        else:
+            raise TypeError(f"train_size must be int or float, got {type(train_size).__name__}")
+
+        # Check we have at least one sample for validation
+        if train_size >= n_samples:
+            raise ValueError(f"train_size ({train_size}) must be less than the number of samples ({n_samples})")
+
+        X_train = X[:train_size]
+        X_val = X[train_size:]
+        y_train = y[:train_size]
+        y_val = y[train_size:]
+
+        return X_train, X_val, y_train, y_val
+
     def _prepare_data(self, eval_method, split_ratio, n_splits):
         self._state.task.prepare_data(
             self._state,
@@ -1837,6 +1879,7 @@ def fit(
         mlflow_logging=None,
         fit_kwargs_by_estimator=None,
         mlflow_exp_name=None,
+        multioutput_train_size=None,
         **fit_kwargs,
     ):
         """Find a model for a given task.
@@ -2154,6 +2197,11 @@ def cv_score_agg_func(val_loss_folds, log_metrics_folds):
             }
         }
         ```
+            multioutput_train_size: int, float or None, default=None | For multi-output regression tasks with
+                "holdout" evaluation, allows manual specification of validation set by concatenating training and
+                validation data and specifying where to split. If int, represents the number of samples in the
+                training set. If float (between 0.0 and 1.0), represents the proportion of the dataset to include
+                in the training set. If None, no split is performed. Only used when X_val and y_val are not provided.
 
             **fit_kwargs: Other key word arguments to pass to fit() function of
                 the searched learners, such as sample_weight. Below are a few examples of
@@ -2356,6 +2404,28 @@ def cv_score_agg_func(val_loss_folds, log_metrics_folds):
                     self.mlflow_integration.only_history = True
             except KeyError:
                 logger.info("Not in Fabric, Skipped")
+
+        # Handle multioutput_train_size parameter
+        multioutput_train_size = (
+            self._settings.get("multioutput_train_size") if multioutput_train_size is None else multioutput_train_size
+        )
+        if multioutput_train_size is not None:
+            if X_val is None and y_val is None:
+                # Warn if not using holdout evaluation
+                if eval_method not in ["auto", "holdout", None]:
+                    logger.warning(
+                        f"multioutput_train_size is intended for use with 'holdout' evaluation method, "
+                        f"but eval_method={eval_method}. The split may be overridden during data preparation."
+                    )
+                # Split the concatenated training data into train and validation sets
+                X_train, X_val, y_train, y_val = self._train_val_split(X_train, y_train, multioutput_train_size)
+                logger.info(
+                    f"Split data using multioutput_train_size={multioutput_train_size}: "
+                    f"train size={len(X_train)}, val size={len(X_val)}"
+                )
+            else:
+                logger.warning("multioutput_train_size is ignored because X_val and y_val are already provided.")
+
         task.validate_data(
             self,
             self._state,

diff --git a/test/automl/test_regression.py b/test/automl/test_regression.py
@@ -244,6 +244,41 @@ def test_multioutput():
     print(model.predict(X_test))
 
 
+def test_multioutput_train_size():
+    """Test multioutput_train_size parameter for manual validation set specification."""
+    from sklearn.multioutput import MultiOutputRegressor
+
+    # create multi-output regression data
+    X, y = make_regression(n_samples=100, n_features=10, n_targets=3, random_state=42)
+
+    # Concatenate what would be training and validation data
+    # Simulate having 70 samples for training and 30 for validation
+    train_size = 70
+
+    # train the model using multioutput_train_size
+    model = MultiOutputRegressor(
+        AutoML(task="regression", time_budget=1, eval_method="holdout", multioutput_train_size=train_size)
+    )
+    model.fit(X, y)
+
+    # predict on a subset
+    predictions = model.predict(X[:10])
+
+    # Verify predictions have correct shape
+    assert predictions.shape == (10, 3), f"Expected shape (10, 3), got {predictions.shape}"
+    print(f"Predictions shape: {predictions.shape}")
+    print(f"Sample predictions:\n{predictions[:3]}")
+
+    # Test with float train_size (proportion)
+    model2 = MultiOutputRegressor(
+        AutoML(task="regression", time_budget=1, eval_method="holdout", multioutput_train_size=0.7)
+    )
+    model2.fit(X, y)
+    predictions2 = model2.predict(X[:10])
+    assert predictions2.shape == (10, 3), f"Expected shape (10, 3), got {predictions2.shape}"
+    print("Model with float train_size also works correctly")
+
+
 @pytest.mark.parametrize(
     "estimator",
     [