Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 6 additions & 4 deletions sklearn/linear_model/huber.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,11 @@

from ..base import BaseEstimator, RegressorMixin
from .base import LinearModel
from ..utils import check_X_y
from ..utils import check_X_y, check_array
from ..utils import check_consistent_length
from ..utils import axis0_safe_slice
from ..utils.extmath import safe_sparse_dot
from ..utils.validation import FLOAT_DTYPES


def _huber_loss_and_gradient(w, X, y, epsilon, alpha, sample_weight=None):
Expand Down Expand Up @@ -251,12 +252,13 @@ def fit(self, X, y, sample_weight=None):
self : object
"""
X, y = check_X_y(
X, y, copy=False, accept_sparse=['csr'], y_numeric=True)
X, y, copy=False, accept_sparse=['csr'], y_numeric=True, dtype=FLOAT_DTYPES)
if sample_weight is not None:
sample_weight = np.array(sample_weight)
sample_weight = check_array(
sample_weight, ensure_2d=False, dtype=FLOAT_DTYPES)
check_consistent_length(y, sample_weight)
else:
sample_weight = np.ones_like(y)
sample_weight = np.ones_like(y, dtype=np.float64)

if self.epsilon < 1.0:
raise ValueError(
Expand Down
42 changes: 42 additions & 0 deletions sklearn/linear_model/tests/test_huber.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
# License: BSD 3 clause

import numpy as np
from numpy.testing import assert_allclose
from scipy import optimize, sparse
import pytest

Expand Down Expand Up @@ -199,3 +200,44 @@ def test_huber_better_r2_score():

# The huber model should also fit poorly on the outliers.
assert_greater(ridge_outlier_score, huber_outlier_score)



def test_huber_bool_dense_X_equivalence():
rng = np.random.RandomState(0)
X_bool = rng.rand(30, 5) > 0.5
y = rng.randn(30)

huber_bool = HuberRegressor().fit(X_bool, y)
huber_float = HuberRegressor().fit(X_bool.astype(np.float64), y)

assert_allclose(huber_bool.coef_, huber_float.coef_, rtol=1e-7,
atol=1e-7)
assert_allclose(huber_bool.intercept_, huber_float.intercept_,
rtol=1e-7, atol=1e-7)
assert_allclose(huber_bool.scale_, huber_float.scale_,
rtol=1e-7, atol=1e-7)
assert_array_equal(huber_bool.outliers_, huber_float.outliers_)


def test_huber_bool_sample_weight():
rng = np.random.RandomState(0)
X = rng.randn(40, 4)
y = rng.randn(40)
sample_weight_bool = rng.rand(40) > 0.3
sample_weight_bool[0] = True
sample_weight_bool[1] = False

huber_bool = HuberRegressor().fit(X, y, sample_weight=sample_weight_bool)
huber_float = HuberRegressor().fit(
X, y, sample_weight=sample_weight_bool.astype(np.float64))

assert_allclose(huber_bool.coef_, huber_float.coef_, rtol=1e-7,
atol=1e-7)
assert_allclose(huber_bool.intercept_, huber_float.intercept_,
rtol=1e-7, atol=1e-7)
assert_allclose(huber_bool.scale_, huber_float.scale_,
rtol=1e-7, atol=1e-7)
assert_array_equal(huber_bool.outliers_, huber_float.outliers_)