Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
139 changes: 93 additions & 46 deletions sklearn/linear_model/logistic.py
Original file line number Diff line number Diff line change
Expand Up @@ -2003,6 +2003,7 @@ def fit(self, X, y, sample_weight=None):

multi_class = _check_multi_class(self.multi_class, solver,
len(classes))
self.multi_class = multi_class

if solver in ['sag', 'saga']:
max_squared_sum = row_norms(X, squared=True).max()
Expand Down Expand Up @@ -2081,74 +2082,115 @@ def fit(self, X, y, sample_weight=None):
# - n_iter is of shape
# (n_classes, n_folds, n_Cs . n_l1_ratios) or
# (1, n_folds, n_Cs . n_l1_ratios)
coefs_paths, Cs, scores, n_iter_ = zip(*fold_coefs_)
self.Cs_ = Cs[0]
coefs_paths_blocks, Cs_blocks, scores_blocks, n_iter_blocks = zip(
*fold_coefs_)
coefs_paths = np.stack(coefs_paths_blocks, axis=0)
Cs_blocks = np.stack(Cs_blocks, axis=0)
scores = np.stack(scores_blocks, axis=0)
n_iter_ = np.stack(n_iter_blocks, axis=0)

self.Cs_ = Cs_blocks[0]
if not np.allclose(Cs_blocks, self.Cs_):
raise ValueError(
"Inconsistent regularization grids across folds.")

n_folds = len(folds)
n_l1 = len(l1_ratios_)
n_cs = self.Cs_.size
n_classes_total = len(classes)
expected_blocks = len(iter_encoded_labels) * n_folds * n_l1

if (coefs_paths.shape[0] != expected_blocks or
scores.shape[0] != expected_blocks or
n_iter_.shape[0] != expected_blocks):
raise ValueError(
"Unexpected internal LogisticRegressionCV shapes: "
"expected %d blocks, got coefs=%d, scores=%d, n_iter=%d"
% (expected_blocks, coefs_paths.shape[0],
scores.shape[0], n_iter_.shape[0]))

if multi_class == 'multinomial':
coefs_paths = np.reshape(
coefs_paths,
(len(folds), len(l1_ratios_) * len(self.Cs_), n_classes, -1)
)
# equiv to coefs_paths = np.moveaxis(coefs_paths, (0, 1, 2, 3),
# (1, 2, 0, 3))
coefs_paths = np.swapaxes(coefs_paths, 0, 1)
coefs_paths = np.swapaxes(coefs_paths, 0, 2)
self.n_iter_ = np.reshape(
n_iter_,
(1, len(folds), len(self.Cs_) * len(l1_ratios_))
)
# repeat same scores across all classes
scores = np.tile(scores, (n_classes, 1, 1))
coefs_paths = coefs_paths.reshape(
n_folds, n_l1, n_cs, n_classes_total, -1)
coefs_paths = np.moveaxis(coefs_paths, 3, 0)
coefs_paths = coefs_paths.reshape(
n_classes_total, n_folds, n_l1 * n_cs, -1)

scores = scores.reshape(n_folds, n_l1, n_cs)
scores = scores.reshape(1, n_folds, n_l1 * n_cs)
scores = np.tile(scores, (n_classes_total, 1, 1))

n_iter_ = n_iter_.reshape(1, n_folds, n_l1 * n_cs)
else:
coefs_paths = np.reshape(
coefs_paths,
(n_classes, len(folds), len(self.Cs_) * len(l1_ratios_),
-1)
)
self.n_iter_ = np.reshape(
n_iter_,
(n_classes, len(folds), len(self.Cs_) * len(l1_ratios_))
)
scores = np.reshape(scores, (n_classes, len(folds), -1))
self.scores_ = dict(zip(classes, scores))
self.coefs_paths_ = dict(zip(classes, coefs_paths))
n_targets = len(iter_encoded_labels)
coefs_paths = coefs_paths.reshape(
n_targets, n_folds, n_l1, n_cs, -1)
coefs_paths = coefs_paths.reshape(
n_targets, n_folds, n_l1 * n_cs, -1)

scores = scores.reshape(n_targets, n_folds, n_l1, n_cs)
scores = scores.reshape(n_targets, n_folds, n_l1 * n_cs)

n_iter_ = n_iter_.reshape(n_targets, n_folds, n_l1 * n_cs)

if coefs_paths.ndim != 4:
raise ValueError(
"LogisticRegressionCV expected 4D coefficient paths but "
"got %d dimensions" % coefs_paths.ndim)

scores_per_class = scores
coefs_paths_per_class = coefs_paths

if self.penalty == 'elasticnet':
l1_ratios_array = np.asarray(l1_ratios_, dtype=float)
else:
l1_ratios_array = None

self.scores_ = dict(zip(classes, scores_per_class))
self.coefs_paths_ = dict(zip(classes, coefs_paths_per_class))
self.n_iter_ = n_iter_

self.C_ = list()
self.l1_ratio_ = list()
self.coef_ = np.empty((n_classes, X.shape[1]))
self.intercept_ = np.zeros(n_classes)
fold_indices = np.arange(len(folds))
for index, (cls, encoded_label) in enumerate(
zip(iter_classes, iter_encoded_labels)):

if multi_class == 'ovr':
scores = self.scores_[cls]
coefs_paths = self.coefs_paths_[cls]
class_scores = self.scores_[cls]
class_coefs_paths = self.coefs_paths_[cls]
else:
# For multinomial, all scores are the same across classes
scores = scores[0]
# coefs_paths will keep its original shape because
# logistic_regression_path expects it this way
# For multinomial, reuse the shared structures across classes
class_scores = scores_per_class[0]
class_coefs_paths = coefs_paths_per_class

if self.refit:
# best_index is between 0 and (n_Cs . n_l1_ratios - 1)
# for example, with n_cs=2 and n_l1_ratios=3
# the layout of scores is
# [c1, c2, c1, c2, c1, c2]
# l1_1 , l1_2 , l1_3
best_index = scores.sum(axis=0).argmax()
best_index = class_scores.sum(axis=0).argmax()

best_index_C = best_index % len(self.Cs_)
C_ = self.Cs_[best_index_C]
self.C_.append(C_)

best_index_l1 = best_index // len(self.Cs_)
l1_ratio_ = l1_ratios_[best_index_l1]
if self.penalty == 'elasticnet':
l1_ratio_ = float(l1_ratios_array[best_index_l1])
else:
l1_ratio_ = None
self.l1_ratio_.append(l1_ratio_)

if multi_class == 'multinomial':
coef_init = np.mean(coefs_paths[:, :, best_index, :],
axis=1)
coef_init = np.mean(
class_coefs_paths[:, :, best_index, :], axis=1)
else:
coef_init = np.mean(coefs_paths[:, best_index, :], axis=0)
coef_init = np.mean(
class_coefs_paths[:, best_index, :], axis=0)

# Note that y is label encoded and hence pos_class must be
# the encoded label / None (for 'multinomial')
Expand All @@ -2169,19 +2211,24 @@ def fit(self, X, y, sample_weight=None):
else:
# Take the best scores across every fold and the average of
# all coefficients corresponding to the best scores.
best_indices = np.argmax(scores, axis=1)
if self.multi_class == 'ovr':
w = np.mean([coefs_paths[i, best_indices[i], :]
for i in range(len(folds))], axis=0)
best_indices = np.argmax(class_scores, axis=1)
if multi_class == 'ovr':
w = class_coefs_paths[fold_indices, best_indices, :]
w = w.mean(axis=0)
else:
w = np.mean([coefs_paths[:, i, best_indices[i], :]
for i in range(len(folds))], axis=0)
w = class_coefs_paths[:, fold_indices, best_indices, :]
w = w.mean(axis=1)

best_indices_C = best_indices % len(self.Cs_)
self.C_.append(np.mean(self.Cs_[best_indices_C]))

best_indices_l1 = best_indices // len(self.Cs_)
self.l1_ratio_.append(np.mean(l1_ratios_[best_indices_l1]))
if self.penalty == 'elasticnet':
mean_l1_ratio = float(
np.mean(l1_ratios_array[best_indices_l1]))
else:
mean_l1_ratio = None
self.l1_ratio_.append(mean_l1_ratio)

if multi_class == 'multinomial':
self.C_ = np.tile(self.C_, n_classes)
Expand Down
92 changes: 92 additions & 0 deletions sklearn/linear_model/tests/test_logistic.py
Original file line number Diff line number Diff line change
Expand Up @@ -702,6 +702,98 @@ def test_ovr_multinomial_iris():
assert_equal(scores.shape, (3, n_cv, 10))


@pytest.mark.parametrize(
"solver, penalty, l1_ratios",
[
("saga", "elasticnet", [0.3, 0.7]),
("liblinear", "l2", None),
],
)
def test_logistic_regression_cv_refit_false_binary_shapes(
solver, penalty, l1_ratios):
X, y = make_classification(
n_samples=80, n_features=6, n_informative=5,
n_redundant=0, n_classes=2, random_state=0)

params = dict(
solver=solver,
penalty=penalty,
Cs=[0.1, 1.0],
cv=3,
refit=False,
tol=1e-3,
max_iter=3000 if solver == 'saga' else 200,
random_state=0,
)
if l1_ratios is not None:
params["l1_ratios"] = l1_ratios

clf = LogisticRegressionCV(**params)
with ignore_warnings(category=ConvergenceWarning):
clf.fit(X, y)

assert_equal(clf.coef_.shape, (1, X.shape[1]))
assert_equal(clf.intercept_.shape, (1,))

assert_equal(len(clf.coefs_paths_), 1)
coefs_path = next(iter(clf.coefs_paths_.values()))
class_scores = next(iter(clf.scores_.values()))

expected_features = X.shape[1] + int(clf.fit_intercept)
n_l1 = clf.l1_ratios_.size
n_cs = clf.Cs_.size

if clf.penalty == 'elasticnet':
assert_equal(coefs_path.shape,
(params["cv"], n_cs, n_l1, expected_features))
assert_equal(class_scores.shape,
(params["cv"], n_cs, n_l1))
flattened = coefs_path.reshape(params["cv"], n_cs * n_l1,
expected_features)
assert_equal(flattened.shape[1], n_cs * n_l1)
else:
assert_equal(coefs_path.shape,
(params["cv"], n_cs, expected_features))
assert_equal(class_scores.shape,
(params["cv"], n_cs))


def test_logistic_regression_cv_refit_false_multinomial():
X, y = make_classification(
n_samples=120, n_features=5, n_informative=4,
n_redundant=0, n_classes=3, random_state=0)

clf = LogisticRegressionCV(
solver='saga',
penalty='elasticnet',
l1_ratios=[0.2, 0.8],
Cs=[0.1, 1.0],
cv=3,
refit=False,
multi_class='multinomial',
tol=1e-3,
max_iter=4000,
random_state=0,
)

with ignore_warnings(category=ConvergenceWarning):
clf.fit(X, y)

n_classes = len(clf.classes_)
expected_features = X.shape[1] + int(clf.fit_intercept)
assert_equal(clf.coef_.shape, (n_classes, X.shape[1]))
assert_equal(clf.intercept_.shape, (n_classes,))

for coefs_path, class_scores in zip(clf.coefs_paths_.values(),
clf.scores_.values()):
assert_equal(coefs_path.shape,
(class_scores.shape[0], clf.Cs_.size,
clf.l1_ratios_.size, expected_features))
assert_equal(class_scores.shape,
(class_scores.shape[0], clf.Cs_.size,
clf.l1_ratios_.size))


def test_logistic_regression_solvers():
X, y = make_classification(n_features=10, n_informative=5, random_state=0)

Expand Down