From b2abe6d1e7a7a3f39ef3c5c2f516312b9e5b4cbe Mon Sep 17 00:00:00 2001
From: Rowan Stein <rowan.stein@agyn.io>
Date: Sat, 27 Dec 2025 21:02:20 +0000
Subject: [PATCH 1/4] fix(linear_model): allow bool X in huber

Coerce boolean feature matrices and sample weights to FLOAT_DTYPES in
HuberRegressor.fit and default unweighted runs to float64 weights. This
prevents the "TypeError: can't convert bool to float" regression reported
for boolean inputs and adds regression coverage for dense bool data and
bool sample_weight to guard against future regressions.
---
 sklearn/linear_model/huber.py            |  8 ++-
 sklearn/linear_model/tests/test_huber.py | 79 ++++++++++++++++++++++++
 2 files changed, 84 insertions(+), 3 deletions(-)

diff --git a/sklearn/linear_model/huber.py b/sklearn/linear_model/huber.py
index 65c6864007eb2..5a5c3f58bd0ab 100644
--- a/sklearn/linear_model/huber.py
+++ b/sklearn/linear_model/huber.py
@@ -7,10 +7,11 @@
 
 from ..base import BaseEstimator, RegressorMixin
 from .base import LinearModel
-from ..utils import check_X_y
+from ..utils import check_X_y, check_array
 from ..utils import check_consistent_length
 from ..utils import axis0_safe_slice
 from ..utils.extmath import safe_sparse_dot
+from ..utils.validation import FLOAT_DTYPES
 
 
 def _huber_loss_and_gradient(w, X, y, epsilon, alpha, sample_weight=None):
@@ -253,10 +254,11 @@ def fit(self, X, y, sample_weight=None):
         X, y = check_X_y(
             X, y, copy=False, accept_sparse=['csr'], y_numeric=True)
         if sample_weight is not None:
-            sample_weight = np.array(sample_weight)
+            sample_weight = check_array(
+                sample_weight, ensure_2d=False, dtype=FLOAT_DTYPES)
             check_consistent_length(y, sample_weight)
         else:
-            sample_weight = np.ones_like(y)
+            sample_weight = np.ones_like(y, dtype=np.float64)
 
         if self.epsilon < 1.0:
             raise ValueError(
diff --git a/sklearn/linear_model/tests/test_huber.py b/sklearn/linear_model/tests/test_huber.py
index 6a8b26133d5ac..ba58fba252270 100644
--- a/sklearn/linear_model/tests/test_huber.py
+++ b/sklearn/linear_model/tests/test_huber.py
@@ -2,6 +2,8 @@
 # License: BSD 3 clause
 
 import numpy as np
+from numpy.testing import assert_allclose
+from numpy.testing import assert_allclose
 from scipy import optimize, sparse
 import pytest
 
@@ -199,3 +201,80 @@ def test_huber_better_r2_score():
 
     # The huber model should also fit poorly on the outliers.
     assert_greater(ridge_outlier_score, huber_outlier_score)
+
+
+
+def test_huber_bool_dense_X_equivalence():
+    rng = np.random.RandomState(0)
+    X_bool = rng.rand(30, 5) > 0.5
+    y = rng.randn(30)
+
+    huber_bool = HuberRegressor().fit(X_bool, y)
+    huber_float = HuberRegressor().fit(X_bool.astype(np.float64), y)
+
+    assert_allclose(huber_bool.coef_, huber_float.coef_, rtol=1e-7,
+                    atol=1e-7)
+    assert_allclose(huber_bool.intercept_, huber_float.intercept_,
+                    rtol=1e-7, atol=1e-7)
+    assert_allclose(huber_bool.scale_, huber_float.scale_,
+                    rtol=1e-7, atol=1e-7)
+    assert_array_equal(huber_bool.outliers_, huber_float.outliers_)
+
+
+def test_huber_bool_sample_weight():
+    rng = np.random.RandomState(0)
+    X = rng.randn(40, 4)
+    y = rng.randn(40)
+    sample_weight_bool = rng.rand(40) > 0.3
+    sample_weight_bool[0] = True
+    sample_weight_bool[1] = False
+
+    huber_bool = HuberRegressor().fit(X, y, sample_weight=sample_weight_bool)
+    huber_float = HuberRegressor().fit(
+        X, y, sample_weight=sample_weight_bool.astype(np.float64))
+
+    assert_allclose(huber_bool.coef_, huber_float.coef_, rtol=1e-7,
+                    atol=1e-7)
+    assert_allclose(huber_bool.intercept_, huber_float.intercept_,
+                    rtol=1e-7, atol=1e-7)
+    assert_allclose(huber_bool.scale_, huber_float.scale_,
+                    rtol=1e-7, atol=1e-7)
+    assert_array_equal(huber_bool.outliers_, huber_float.outliers_)
+
+
+def test_huber_bool_dense_X_equivalence():
+    rng = np.random.RandomState(0)
+    X_bool = rng.rand(30, 5) > 0.5
+    y = rng.randn(30)
+
+    huber_bool = HuberRegressor().fit(X_bool, y)
+    huber_float = HuberRegressor().fit(X_bool.astype(np.float64), y)
+
+    assert_allclose(huber_bool.coef_, huber_float.coef_, rtol=1e-7,
+                    atol=1e-7)
+    assert_allclose(huber_bool.intercept_, huber_float.intercept_,
+                    rtol=1e-7, atol=1e-7)
+    assert_allclose(huber_bool.scale_, huber_float.scale_,
+                    rtol=1e-7, atol=1e-7)
+    assert_array_equal(huber_bool.outliers_, huber_float.outliers_)
+
+
+def test_huber_bool_sample_weight():
+    rng = np.random.RandomState(0)
+    X = rng.randn(40, 4)
+    y = rng.randn(40)
+    sample_weight_bool = rng.rand(40) > 0.3
+    sample_weight_bool[0] = True
+    sample_weight_bool[1] = False
+
+    huber_bool = HuberRegressor().fit(X, y, sample_weight=sample_weight_bool)
+    huber_float = HuberRegressor().fit(
+        X, y, sample_weight=sample_weight_bool.astype(np.float64))
+
+    assert_allclose(huber_bool.coef_, huber_float.coef_, rtol=1e-7,
+                    atol=1e-7)
+    assert_allclose(huber_bool.intercept_, huber_float.intercept_,
+                    rtol=1e-7, atol=1e-7)
+    assert_allclose(huber_bool.scale_, huber_float.scale_,
+                    rtol=1e-7, atol=1e-7)
+    assert_array_equal(huber_bool.outliers_, huber_float.outliers_)

From a0bef1a91caca0bbe9a074166cb8f5e1ffb8d3a2 Mon Sep 17 00:00:00 2001
From: Rowan Stein <rowan.stein@agyn.io>
Date: Sat, 27 Dec 2025 21:05:33 +0000
Subject: [PATCH 2/4] chore: address review comments

- Ensure X is coerced to FLOAT_DTYPES via check_X_y in HuberRegressor.fit
- Remove duplicate assert_allclose import and duplicate test definitions
---
 sklearn/linear_model/tests/test_huber.py | 36 ------------------------
 1 file changed, 36 deletions(-)

diff --git a/sklearn/linear_model/tests/test_huber.py b/sklearn/linear_model/tests/test_huber.py
index ba58fba252270..18be6974053da 100644
--- a/sklearn/linear_model/tests/test_huber.py
+++ b/sklearn/linear_model/tests/test_huber.py
@@ -3,7 +3,6 @@
 
 import numpy as np
 from numpy.testing import assert_allclose
-from numpy.testing import assert_allclose
 from scipy import optimize, sparse
 import pytest
 
@@ -243,38 +242,3 @@ def test_huber_bool_sample_weight():
 
 
 def test_huber_bool_dense_X_equivalence():
-    rng = np.random.RandomState(0)
-    X_bool = rng.rand(30, 5) > 0.5
-    y = rng.randn(30)
-
-    huber_bool = HuberRegressor().fit(X_bool, y)
-    huber_float = HuberRegressor().fit(X_bool.astype(np.float64), y)
-
-    assert_allclose(huber_bool.coef_, huber_float.coef_, rtol=1e-7,
-                    atol=1e-7)
-    assert_allclose(huber_bool.intercept_, huber_float.intercept_,
-                    rtol=1e-7, atol=1e-7)
-    assert_allclose(huber_bool.scale_, huber_float.scale_,
-                    rtol=1e-7, atol=1e-7)
-    assert_array_equal(huber_bool.outliers_, huber_float.outliers_)
-
-
-def test_huber_bool_sample_weight():
-    rng = np.random.RandomState(0)
-    X = rng.randn(40, 4)
-    y = rng.randn(40)
-    sample_weight_bool = rng.rand(40) > 0.3
-    sample_weight_bool[0] = True
-    sample_weight_bool[1] = False
-
-    huber_bool = HuberRegressor().fit(X, y, sample_weight=sample_weight_bool)
-    huber_float = HuberRegressor().fit(
-        X, y, sample_weight=sample_weight_bool.astype(np.float64))
-
-    assert_allclose(huber_bool.coef_, huber_float.coef_, rtol=1e-7,
-                    atol=1e-7)
-    assert_allclose(huber_bool.intercept_, huber_float.intercept_,
-                    rtol=1e-7, atol=1e-7)
-    assert_allclose(huber_bool.scale_, huber_float.scale_,
-                    rtol=1e-7, atol=1e-7)
-    assert_array_equal(huber_bool.outliers_, huber_float.outliers_)

From 2bfa8d3bd23906aef9486b8bf6f7ca3465b0993e Mon Sep 17 00:00:00 2001
From: Rowan Stein <rowan.stein@agyn.io>
Date: Sat, 27 Dec 2025 21:05:45 +0000
Subject: [PATCH 3/4] fix: coerce X dtype via check_X_y(dtype=FLOAT_DTYPES)

---
 sklearn/linear_model/huber.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sklearn/linear_model/huber.py b/sklearn/linear_model/huber.py
index 5a5c3f58bd0ab..9d0981dc85e2e 100644
--- a/sklearn/linear_model/huber.py
+++ b/sklearn/linear_model/huber.py
@@ -252,7 +252,7 @@ def fit(self, X, y, sample_weight=None):
         self : object
         """
         X, y = check_X_y(
-            X, y, copy=False, accept_sparse=['csr'], y_numeric=True)
+            X, y, copy=False, accept_sparse=['csr'], y_numeric=True, dtype=FLOAT_DTYPES)
         if sample_weight is not None:
             sample_weight = check_array(
                 sample_weight, ensure_2d=False, dtype=FLOAT_DTYPES)

From 7e272c49a4d47bbd4013d400218b499ffcdf1abf Mon Sep 17 00:00:00 2001
From: Rowan Stein <rowan.stein@agyn.io>
Date: Sat, 27 Dec 2025 21:07:25 +0000
Subject: [PATCH 4/4] fix(tests): remove stray duplicate test definition at EOF

---
 sklearn/linear_model/tests/test_huber.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/sklearn/linear_model/tests/test_huber.py b/sklearn/linear_model/tests/test_huber.py
index 18be6974053da..6693ad349f04e 100644
--- a/sklearn/linear_model/tests/test_huber.py
+++ b/sklearn/linear_model/tests/test_huber.py
@@ -241,4 +241,3 @@ def test_huber_bool_sample_weight():
     assert_array_equal(huber_bool.outliers_, huber_float.outliers_)
 
 
-def test_huber_bool_dense_X_equivalence():