Skip to content

Commit

Permalink
[Feature] introduce sckit-learn class (#17)
Browse files Browse the repository at this point in the history
  • Loading branch information
RektPunk authored Sep 22, 2024
1 parent 2dc8a63 commit c9c22d6
Show file tree
Hide file tree
Showing 5 changed files with 111 additions and 18 deletions.
2 changes: 1 addition & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ repos:
- id: check-merge-conflict

- repo: https://github.com/astral-sh/ruff-pre-commit
rev: v0.6.5
rev: v0.6.7
hooks:
- id: ruff
args: [ --fix ]
Expand Down
3 changes: 2 additions & 1 deletion imlightgbm/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
# ruff: noqa
from imlightgbm.engine import cv, train
from imlightgbm.sklearn import ImbalancedLGBMClassifier

__version__ = "0.0.2"
__version__ = "0.0.3"
51 changes: 36 additions & 15 deletions imlightgbm/objective.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,36 +32,57 @@ def _sigmoid(x: np.ndarray) -> np.ndarray:
return 1 / (1 + np.exp(-x))


def binary_focal_objective(
pred: np.ndarray, train_data: Dataset, gamma: float
def sklearn_binary_focal_objective(
y_true: np.ndarray, y_pred: np.ndarray, gamma: float
) -> tuple[np.ndarray, np.ndarray]:
"""Return grad, hess for binary focal objective."""
label = train_data.get_label()
pred_prob = _sigmoid(pred)
pred_prob = _sigmoid(y_pred)

# gradient
g1 = pred_prob * (1 - pred_prob)
g2 = label + ((-1) ** label) * pred_prob
g3 = pred_prob + label - 1
g4 = 1 - label - ((-1) ** label) * pred_prob
g5 = label + ((-1) ** label) * pred_prob
grad = gamma * g3 * _power(g2, gamma) * _log(g4) + ((-1) ** label) * _power(
g2 = y_true + ((-1) ** y_true) * pred_prob
g3 = pred_prob + y_true - 1
g4 = 1 - y_true - ((-1) ** y_true) * pred_prob
g5 = y_true + ((-1) ** y_true) * pred_prob
grad = gamma * g3 * _power(g2, gamma) * _log(g4) + ((-1) ** y_true) * _power(
g5, (gamma + 1)
)

# hess
h1 = _power(g2, gamma) + gamma * ((-1) ** label) * g3 * _power(g2, (gamma - 1))
h2 = ((-1) ** label) * g3 * _power(g2, gamma) / g4
h1 = _power(g2, gamma) + gamma * ((-1) ** y_true) * g3 * _power(g2, (gamma - 1))
h2 = ((-1) ** y_true) * g3 * _power(g2, gamma) / g4
hess = ((h1 * _log(g4) - h2) * gamma + (gamma + 1) * _power(g5, gamma)) * g1
return grad, hess


def sklearn_binary_weighted_objective(
y_true: np.ndarray, y_pred: np.ndarray, alpha: float
) -> tuple[np.ndarray, np.ndarray]:
"""Return grad, hess for binary weighted objective."""
pred_prob = _sigmoid(y_pred)
grad = -(alpha**y_true) * (y_true - pred_prob)
hess = (alpha**y_true) * pred_prob * (1.0 - pred_prob)
return grad, hess


def binary_focal_objective(
pred: np.ndarray, train_data: Dataset, gamma: float
) -> tuple[np.ndarray, np.ndarray]:
"""Return grad, hess for binary focal objective."""
label = train_data.get_label()
grad, hess = sklearn_binary_focal_objective(
y_true=label,
y_pred=pred,
gamma=gamma,
)
return grad, hess


def binary_weighted_objective(pred: np.ndarray, train_data: Dataset, alpha: float):
"""Return grad, hess for binary weighted objective."""
label = train_data.get_label()
pred_prob = _sigmoid(pred)
grad = -(alpha**label) * (label - pred_prob)
hess = (alpha**label) * pred_prob * (1.0 - pred_prob)
grad, hess = sklearn_binary_weighted_objective(
y_true=label, y_pred=pred, alpha=alpha
)
return grad, hess


Expand Down
71 changes: 71 additions & 0 deletions imlightgbm/sklearn.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
from typing import Callable, Literal

import numpy as np
from lightgbm import LGBMClassifier

from imlightgbm.objective import (
sklearn_binary_focal_objective,
sklearn_binary_weighted_objective,
)

_Objective = Literal["binary_focal", "binary_weighted"]


class ImbalancedLGBMClassifier(LGBMClassifier):
def __init__(
self,
objective: _Objective,
boosting_type: str = "gbdt",
num_leaves: int = 31,
max_depth: int = -1,
learning_rate: float = 0.1,
n_estimators: int = 100,
subsample_for_bin: int = 200000,
class_weight: dict | str | None = None,
min_split_gain: float = 0.0,
min_child_weight: float = 1e-3,
min_child_samples: int = 20,
subsample: float = 1.0,
subsample_freq: int = 0,
colsample_bytree: float = 1.0,
reg_alpha: float = 0.0,
reg_lambda: float = 0.0,
random_state: int | np.random.RandomState | np.random.Generator | None = None,
n_jobs: int | None = None,
importance_type: str = "split",
alpha: float = 0.25,
gamma: float = 2.0,
) -> None:
self.alpha = alpha
self.gamma = gamma
_OBJECTIVE_MAPPER: dict[
str, Callable[[np.ndarray, np.ndarray], tuple[np.ndarray, np.ndarray]]
] = {
"binary_focal": lambda y_true, y_pred: sklearn_binary_focal_objective(
y_true, y_pred, gamma=gamma
),
"binary_weighted": lambda y_true, y_pred: sklearn_binary_weighted_objective(
y_true, y_pred, alpha=alpha
),
}
super().__init__(
boosting_type=boosting_type,
num_leaves=num_leaves,
max_depth=max_depth,
learning_rate=learning_rate,
n_estimators=n_estimators,
subsample_for_bin=subsample_for_bin,
objective=_OBJECTIVE_MAPPER[objective],
class_weight=class_weight,
min_split_gain=min_split_gain,
min_child_weight=min_child_weight,
min_child_samples=min_child_samples,
subsample=subsample,
subsample_freq=subsample_freq,
colsample_bytree=colsample_bytree,
reg_alpha=reg_alpha,
reg_lambda=reg_lambda,
random_state=random_state,
n_jobs=n_jobs,
importance_type=importance_type,
)
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "imlightgbm"
version = "0.0.2"
version = "0.0.3"
description = "LightGBM for label-imbalanced data with focal and weighted loss function"
authors = ["RektPunk <rektpunk@gmail.com>"]
license = "MIT"
Expand Down

0 comments on commit c9c22d6

Please sign in to comment.