diff --git a/sklearn/linear_model/huber.py b/sklearn/linear_model/huber.py index 65c6864007eb2..9d0981dc85e2e 100644 --- a/sklearn/linear_model/huber.py +++ b/sklearn/linear_model/huber.py @@ -7,10 +7,11 @@ from ..base import BaseEstimator, RegressorMixin from .base import LinearModel -from ..utils import check_X_y +from ..utils import check_X_y, check_array from ..utils import check_consistent_length from ..utils import axis0_safe_slice from ..utils.extmath import safe_sparse_dot +from ..utils.validation import FLOAT_DTYPES def _huber_loss_and_gradient(w, X, y, epsilon, alpha, sample_weight=None): @@ -251,12 +252,13 @@ def fit(self, X, y, sample_weight=None): self : object """ X, y = check_X_y( - X, y, copy=False, accept_sparse=['csr'], y_numeric=True) + X, y, copy=False, accept_sparse=['csr'], y_numeric=True, dtype=FLOAT_DTYPES) if sample_weight is not None: - sample_weight = np.array(sample_weight) + sample_weight = check_array( + sample_weight, ensure_2d=False, dtype=FLOAT_DTYPES) check_consistent_length(y, sample_weight) else: - sample_weight = np.ones_like(y) + sample_weight = np.ones_like(y, dtype=np.float64) if self.epsilon < 1.0: raise ValueError( diff --git a/sklearn/linear_model/tests/test_huber.py b/sklearn/linear_model/tests/test_huber.py index 6a8b26133d5ac..6693ad349f04e 100644 --- a/sklearn/linear_model/tests/test_huber.py +++ b/sklearn/linear_model/tests/test_huber.py @@ -2,6 +2,7 @@ # License: BSD 3 clause import numpy as np +from numpy.testing import assert_allclose from scipy import optimize, sparse import pytest @@ -199,3 +200,44 @@ def test_huber_better_r2_score(): # The huber model should also fit poorly on the outliers. assert_greater(ridge_outlier_score, huber_outlier_score) + + + +def test_huber_bool_dense_X_equivalence(): + rng = np.random.RandomState(0) + X_bool = rng.rand(30, 5) > 0.5 + y = rng.randn(30) + + huber_bool = HuberRegressor().fit(X_bool, y) + huber_float = HuberRegressor().fit(X_bool.astype(np.float64), y) + + assert_allclose(huber_bool.coef_, huber_float.coef_, rtol=1e-7, + atol=1e-7) + assert_allclose(huber_bool.intercept_, huber_float.intercept_, + rtol=1e-7, atol=1e-7) + assert_allclose(huber_bool.scale_, huber_float.scale_, + rtol=1e-7, atol=1e-7) + assert_array_equal(huber_bool.outliers_, huber_float.outliers_) + + +def test_huber_bool_sample_weight(): + rng = np.random.RandomState(0) + X = rng.randn(40, 4) + y = rng.randn(40) + sample_weight_bool = rng.rand(40) > 0.3 + sample_weight_bool[0] = True + sample_weight_bool[1] = False + + huber_bool = HuberRegressor().fit(X, y, sample_weight=sample_weight_bool) + huber_float = HuberRegressor().fit( + X, y, sample_weight=sample_weight_bool.astype(np.float64)) + + assert_allclose(huber_bool.coef_, huber_float.coef_, rtol=1e-7, + atol=1e-7) + assert_allclose(huber_bool.intercept_, huber_float.intercept_, + rtol=1e-7, atol=1e-7) + assert_allclose(huber_bool.scale_, huber_float.scale_, + rtol=1e-7, atol=1e-7) + assert_array_equal(huber_bool.outliers_, huber_float.outliers_) + +