Skip to content

Commit

Permalink
feat: regularization strength for logistic classifier (#866)
Browse files Browse the repository at this point in the history
Closes #750 

### Summary of Changes

Added an optional, keyword-only constructor parameter c: float = 1.0 and
passed it to the wrapped scikit-learn estimator.

<!-- Please provide a summary of changes in this pull request, ensuring
all changes are explained. -->

---------

Co-authored-by: grefrathc <s23cgref@uni-bonn.de>
Co-authored-by: megalinter-bot <129584137+megalinter-bot@users.noreply.github.com>
Co-authored-by: Lars Reimann <mail@larsreimann.com>
  • Loading branch information
4 people authored Jun 29, 2024
1 parent 4ef078e commit 9f74e92
Show file tree
Hide file tree
Showing 2 changed files with 60 additions and 3 deletions.
30 changes: 27 additions & 3 deletions src/safeds/ml/classical/classification/_logistic_classifier.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
from typing import TYPE_CHECKING

from safeds._utils import _get_random_seed, _structural_hash
from safeds._validation import _check_bounds, _OpenBound

from ._classifier import Classifier

Expand All @@ -11,31 +12,54 @@


class LogisticClassifier(Classifier):
"""Regularized logistic regression for classification."""
"""
Regularized logistic regression for classification.
Parameters
----------
c:
The regularization strength. Lower values imply stronger regularization. Must be greater than 0.
"""

# ------------------------------------------------------------------------------------------------------------------
# Dunder methods
# ------------------------------------------------------------------------------------------------------------------

def __init__(self) -> None:
def __init__(self, *, c: float = 1.0) -> None:
super().__init__()

# Validation
_check_bounds("c", c, lower_bound=_OpenBound(0))

# Hyperparameters
self._c: float = c

def __hash__(self) -> int:
return _structural_hash(
super().__hash__(),
)

# ------------------------------------------------------------------------------------------------------------------
# Properties
# ------------------------------------------------------------------------------------------------------------------

@property
def c(self) -> float:
"""The regularization strength. Lower values imply stronger regularization."""
return self._c

# ------------------------------------------------------------------------------------------------------------------
# Template methods
# ------------------------------------------------------------------------------------------------------------------

def _clone(self) -> LogisticClassifier:
return LogisticClassifier()
return LogisticClassifier(c=self.c)

def _get_sklearn_model(self) -> ClassifierMixin:
from sklearn.linear_model import LogisticRegression as SklearnLogisticRegression

return SklearnLogisticRegression(
random_state=_get_random_seed(),
n_jobs=-1,
C=self.c,
)
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
import pytest
from safeds.data.labeled.containers import TabularDataset
from safeds.data.tabular.containers import Table
from safeds.exceptions import OutOfBoundsError
from safeds.ml.classical.classification import LogisticClassifier


@pytest.fixture()
def training_set() -> TabularDataset:
table = Table({"col1": [1, 2, 3, 4], "col2": [1, 2, 3, 4]})
return table.to_tabular_dataset(target_name="col1")


class TestC:
def test_should_be_passed_to_fitted_model(self, training_set: TabularDataset) -> None:
fitted_model = LogisticClassifier(c=2).fit(training_set)
assert fitted_model.c == 2

def test_should_be_passed_to_sklearn(self, training_set: TabularDataset) -> None:
fitted_model = LogisticClassifier(c=2).fit(training_set)
assert fitted_model._wrapped_model is not None
assert fitted_model._wrapped_model.C == 2

def test_clone(self, training_set: TabularDataset) -> None:
fitted_model = LogisticClassifier(c=2).fit(training_set)
cloned_classifier = fitted_model._clone()
assert isinstance(cloned_classifier, LogisticClassifier)
assert cloned_classifier.c == fitted_model.c

@pytest.mark.parametrize("c", [-1.0, 0.0], ids=["minus_one", "zero"])
def test_should_raise_if_less_than_or_equal_to_0(self, c: float) -> None:
with pytest.raises(OutOfBoundsError):
LogisticClassifier(c=c)

0 comments on commit 9f74e92

Please sign in to comment.