agyn-sandbox · casey-brooks · Dec 26, 2025
diff --git a/sklearn/feature_selection/_sequential.py b/sklearn/feature_selection/_sequential.py
@@ -8,12 +8,12 @@
 import warnings
 
 from ._base import SelectorMixin
-from ..base import BaseEstimator, MetaEstimatorMixin, clone
+from ..base import BaseEstimator, MetaEstimatorMixin, clone, is_classifier
 from ..utils._param_validation import HasMethods, Hidden, Interval, StrOptions
 from ..utils._param_validation import RealNotInt
 from ..utils._tags import _safe_tags
 from ..utils.validation import check_is_fitted
-from ..model_selection import cross_val_score
+from ..model_selection import check_cv, cross_val_score
 from ..metrics import get_scorer_names
 
 
@@ -79,19 +79,23 @@ class SequentialFeatureSelector(SelectorMixin, MetaEstimatorMixin, BaseEstimator
 
         If None, the estimator's score method is used.
 
-    cv : int, cross-validation generator or an iterable, default=None
-        Determines the cross-validation splitting strategy.
-        Possible inputs for cv are:
+    cv : int, cross-validation generator, or iterable, default=None
+        Determines the cross-validation splitting strategy. Possible inputs
+        for ``cv`` are:
 
-        - None, to use the default 5-fold cross validation,
+        - ``None``, to use the default 5-fold cross validation,
         - integer, to specify the number of folds in a `(Stratified)KFold`,
         - :term:`CV splitter`,
-        - An iterable yielding (train, test) splits as arrays of indices.
+        - an iterable yielding (train, test) splits as arrays of indices.
 
-        For integer/None inputs, if the estimator is a classifier and ``y`` is
-        either binary or multiclass, :class:`StratifiedKFold` is used. In all
-        other cases, :class:`KFold` is used. These splitters are instantiated
-        with `shuffle=False` so the splits will be the same across calls.
+        For integer/``None`` inputs, if the estimator is a classifier and ``y``
+        is either binary or multiclass, :class:`StratifiedKFold` is used. In
+        all other cases, :class:`KFold` is used. These splitters are
+        instantiated with ``shuffle=False`` so the splits will be the same
+        across calls.
+
+        Iterables, including generators, are materialized once for reuse. This
+        can increase memory consumption when the number of folds is large.
 
         Refer :ref:`User Guide <cross_validation>` for the various
         cross-validation strategies that can be used here.
@@ -273,9 +277,11 @@ def fit(self, X, y=None):
 
         old_score = -np.inf
         is_auto_select = self.tol is not None and self.n_features_to_select == "auto"
+        cv = check_cv(self.cv, y, classifier=is_classifier(self.estimator))
+
         for _ in range(n_iterations):
             new_feature_idx, new_score = self._get_best_new_feature_score(
-                cloned_estimator, X, y, current_mask
+                cloned_estimator, X, y, current_mask, cv
             )
             if is_auto_select and ((new_score - old_score) < self.tol):
                 break
@@ -291,7 +297,7 @@ def fit(self, X, y=None):
 
         return self
 
-    def _get_best_new_feature_score(self, estimator, X, y, current_mask):
+    def _get_best_new_feature_score(self, estimator, X, y, current_mask, cv):
         # Return the best new feature and its score to add to the current_mask,
         # i.e. return the best new feature and its score to add (resp. remove)
         # when doing forward selection (resp. backward selection).
@@ -309,7 +315,7 @@ def _get_best_new_feature_score(self, estimator, X, y, current_mask):
                 estimator,
                 X_new,
                 y,
-                cv=self.cv,
+                cv=cv,
                 scoring=self.scoring,
                 n_jobs=self.n_jobs,
             ).mean()

diff --git a/sklearn/feature_selection/tests/test_sequential.py b/sklearn/feature_selection/tests/test_sequential.py
@@ -6,11 +6,12 @@
 from sklearn.preprocessing import StandardScaler
 from sklearn.pipeline import make_pipeline
 from sklearn.feature_selection import SequentialFeatureSelector
-from sklearn.datasets import make_regression, make_blobs
+from sklearn.datasets import make_blobs, make_classification, make_regression
 from sklearn.linear_model import LinearRegression
 from sklearn.ensemble import HistGradientBoostingRegressor
-from sklearn.model_selection import cross_val_score
+from sklearn.model_selection import LeaveOneGroupOut, cross_val_score
 from sklearn.cluster import KMeans
+from sklearn.neighbors import KNeighborsClassifier
 
 
 def test_bad_n_features_to_select():
@@ -314,3 +315,37 @@ def test_backward_neg_tol():
 
     assert 0 < sfs.get_support().sum() < X.shape[1]
     assert new_score < initial_score
+
+
+def test_sfs_supports_iterable_cv_generator():
+    X, y = make_classification(n_samples=40, n_features=8, random_state=0)
+
+    groups = np.zeros_like(y, dtype=int)
+    groups[y.size // 2 :] = 1
+
+    logo = LeaveOneGroupOut()
+    cv = logo.split(X, y, groups=groups)
+
+    selector = SequentialFeatureSelector(
+        KNeighborsClassifier(n_neighbors=3),
+        n_features_to_select=3,
+        scoring="accuracy",
+        cv=cv,
+    )
+
+    selector.fit(X, y)
+
+    assert selector.get_support().sum() == 3
+
+
+def test_sfs_baseline_cv_int_runs():
+    X, y = make_regression(n_samples=60, n_features=10, random_state=0)
+
+    selector = SequentialFeatureSelector(
+        LinearRegression(), n_features_to_select=4, cv=5
+    )
+
+    selector.fit(X, y)
+
+    assert selector.get_support().sum() == 4
+    assert selector.transform(X).shape[1] == 4