RektPunk · RektPunk · Sep 28, 2024 · Sep 27, 2024 · Sep 28, 2024 · Sep 28, 2024
diff --git a/examples/multiclass_engine.py b/examples/multiclass_engine.py
@@ -77,5 +77,6 @@
 # Evaluate models
 print("\nClassification Report for Standard:")
 print(classification_report(y_test, y_pred_standard_label))
+
 print("\nClassification Report for Imbalanced:")
 print(classification_report(y_test, y_pred_focal_label))
diff --git a/examples/multiclass_sklearn.py b/examples/multiclass_sklearn.py
@@ -0,0 +1,42 @@
+from sklearn.datasets import make_classification
+from sklearn.metrics import classification_report
+from sklearn.model_selection import train_test_split
+
+import imlightgbm as imlgb
+
+# Generate dataset
+X, y = make_classification(
+    n_samples=5000,
+    n_features=10,
+    n_classes=3,
+    n_informative=5,
+    weights=[0.05, 0.15, 0.8],
+    flip_y=0,
+    random_state=42,
+)
+
+# Split the data into training and testing sets
+X_train, X_test, y_train, y_test = train_test_split(
+    X, y, test_size=0.2, random_state=42
+)
+
+# Initialize the ImbalancedLGBMClassifier using binary focal loss
+clf = imlgb.ImbalancedLGBMClassifier(
+    objective="multiclass_focal",  # multiclass_weighted
+    gamma=2.0,  # alpha with multiclass_weighted
+    num_class=3,
+    learning_rate=0.05,
+    num_leaves=31,
+)
+
+# Train the classifier on the training data
+clf.fit(X=X_train, y=y_train)
+
+# Make predictions on the test data
+y_pred_focal = clf.predict(X_test)
+
+
+# Evaluate the model performance using accuracy, log loss, and ROC AUC
+# Evaluate models
+print("\nClassification Report:")
+print(classification_report(y_test, y_pred_focal))
diff --git a/imlightgbm/__init__.py b/imlightgbm/__init__.py
@@ -2,4 +2,4 @@
 from imlightgbm.engine import cv, train
 from imlightgbm.sklearn import ImbalancedLGBMClassifier
 
-__version__ = "0.0.4"
+__version__ = "0.1.0"
diff --git a/imlightgbm/objective/core.py b/imlightgbm/objective/core.py
@@ -3,7 +3,7 @@
 from scipy.special import expit, softmax
 
 
-def _safe_power(num_base: np.ndarray, num_pow: float):
+def _safe_power(num_base: np.ndarray, num_pow: float) -> np.ndarray:
     """Safe power."""
     return np.sign(num_base) * (np.abs(num_base)) ** (num_pow)
 
@@ -54,21 +54,19 @@ def binary_focal_objective(
 ) -> tuple[np.ndarray, np.ndarray]:
     """Return grad, hess for binary focal objective for engine."""
     label = train_data.get_label()
-    grad, hess = sklearn_binary_focal_objective(
+    return sklearn_binary_focal_objective(
         y_true=label,
         y_pred=pred,
         gamma=gamma,
     )
-    return grad, hess
 
 
-def binary_weighted_objective(pred: np.ndarray, train_data: Dataset, alpha: float):
+def binary_weighted_objective(
+    pred: np.ndarray, train_data: Dataset, alpha: float
+) -> tuple[np.ndarray, np.ndarray]:
     """Return grad, hess for binary weighted objective for engine."""
     label = train_data.get_label()
-    grad, hess = sklearn_binary_weighted_objective(
-        y_true=label, y_pred=pred, alpha=alpha
-    )
-    return grad, hess
+    return sklearn_binary_weighted_objective(y_true=label, y_pred=pred, alpha=alpha)
 
 
 def sklearn_multiclass_focal_objective(
@@ -79,7 +77,7 @@ def sklearn_multiclass_focal_objective(
 ) -> tuple[np.ndarray, np.ndarray]:
     """Return grad, hess for multclass focal objective for sklearn API.."""
     pred_prob = softmax(y_pred, axis=1)
-    y_true_onehot = np.eye(num_class)[y_true]
+    y_true_onehot = np.eye(num_class)[y_true.astype(int)]
 
     # gradient
     g1 = pred_prob * (1 - pred_prob)
@@ -110,7 +108,7 @@ def sklearn_multiclass_weighted_objective(
 ) -> tuple[np.ndarray, np.ndarray]:
     """Return grad, hess for multclass weighted objective for sklearn API."""
     pred_prob = softmax(y_pred, axis=1)
-    y_true_onehot = np.eye(num_class)[y_true]
+    y_true_onehot = np.eye(num_class)[y_true.astype(int)]
     grad = -(alpha**y_true_onehot) * (y_true_onehot - pred_prob)
     hess = (alpha**y_true_onehot) * pred_prob * (1.0 - pred_prob)
     return grad, hess
@@ -123,28 +121,26 @@ def multiclass_focal_objective(
     num_class: int,
 ) -> tuple[np.ndarray, np.ndarray]:
     """Return grad, hess for multclass focal objective for engine."""
-    label = train_data.get_label().astype(int)
-    grad, hess = sklearn_multiclass_focal_objective(
+    label = train_data.get_label()
+    return sklearn_multiclass_focal_objective(
         y_true=label,
         y_pred=pred,
         gamma=gamma,
         num_class=num_class,
     )
-    return grad, hess
 
 
 def multiclass_weighted_objective(
     pred: np.ndarray,
     train_data: Dataset,
     alpha: float,
     num_class: int,
-) -> tuple[str, float, bool]:
+) -> tuple[np.ndarray, np.ndarray]:
     """Return grad, hess for multclass weighted objective for engine."""
-    label = train_data.get_label().astype(int)
-    grad, hess = sklearn_multiclass_weighted_objective(
+    label = train_data.get_label()
+    return sklearn_multiclass_weighted_objective(
         y_true=label,
         y_pred=pred,
         alpha=alpha,
         num_class=num_class,
     )
-    return grad, hess
diff --git a/imlightgbm/sklearn.py b/imlightgbm/sklearn.py
@@ -10,6 +10,8 @@
 from imlightgbm.objective.core import (
     sklearn_binary_focal_objective,
     sklearn_binary_weighted_objective,
+    sklearn_multiclass_focal_objective,
+    sklearn_multiclass_weighted_objective,
 )
 from imlightgbm.utils import validate_positive_number
 
@@ -44,6 +46,7 @@ def __init__(
         random_state: int | np.random.RandomState | np.random.Generator | None = None,
         n_jobs: int | None = None,
         importance_type: str = "split",
+        num_class: int | None = None,
     ) -> None:
         """Construct a gradient boosting model.
 
@@ -52,20 +55,42 @@ def __init__(
         objective : str
             Specify the learning objective. Options are 'binary_focal' and 'binary_weighted'.
         alpha: float
+            For 'binary_weighted' objective
         gamma: float
-        Check http://lightgbm.readthedocs.io/en/latest/Parameters.html for more other parameters.
+            For 'binary_focal' objective
+        other parameters:
+            Check http://lightgbm.readthedocs.io/en/latest/Parameters.html for more details.
         """
         validate_positive_number(alpha)
         validate_positive_number(gamma)
 
         self.alpha = alpha
         self.gamma = gamma
+        self.num_class = num_class
         _objective = Objective.get(objective)
-        _OBJECTIVE_MAPPER: dict[Objective, _SklearnObjLike] = {
+        if _objective in {
+            Objective.multiclass_focal,
+            Objective.multiclass_weighted,
+        } and not isinstance(num_class, int):
+            raise ValueError("num_class must be provided")
+
+        _objective_mapper: dict[Objective, _SklearnObjLike] = {
             Objective.binary_focal: lambda y_true,
-            y_pred: sklearn_binary_focal_objective(y_true, y_pred, gamma=gamma),
+            y_pred: sklearn_binary_focal_objective(
+                y_true=y_true, y_pred=y_pred, gamma=gamma
+            ),
             Objective.binary_weighted: lambda y_true,
-            y_pred: sklearn_binary_weighted_objective(y_true, y_pred, alpha=alpha),
+            y_pred: sklearn_binary_weighted_objective(
+                y_true=y_true, y_pred=y_pred, alpha=alpha
+            ),
+            Objective.multiclass_focal: lambda y_true,
+            y_pred: sklearn_multiclass_focal_objective(
+                y_true=y_true, y_pred=y_pred, gamma=gamma, num_class=num_class
+            ),
+            Objective.multiclass_weighted: lambda y_true,
+            y_pred: sklearn_multiclass_weighted_objective(
+                y_true=y_true, y_pred=y_pred, alpha=alpha, num_class=num_class
+            ),
         }
         super().__init__(
             boosting_type=boosting_type,
@@ -74,7 +99,7 @@ def __init__(
             learning_rate=learning_rate,
             n_estimators=n_estimators,
             subsample_for_bin=subsample_for_bin,
-            objective=_OBJECTIVE_MAPPER[_objective],
+            objective=_objective_mapper[_objective],
             class_weight=class_weight,
             min_split_gain=min_split_gain,
             min_child_weight=min_child_weight,
@@ -102,7 +127,7 @@ def predict(
         **kwargs: Any,
     ) -> np.ndarray | spmatrix | list[spmatrix]:
         """Docstring is inherited from the LGBMClassifier."""
-        result = super().predict(
+        _predict = super().predict(
             X=X,
             raw_score=raw_score,
             start_iteration=start_iteration,
@@ -112,13 +137,18 @@ def predict(
             validate_features=validate_features,
             **kwargs,
         )
-        if raw_score or pred_leaf or pred_contrib:
-            return result
+        if (
+            raw_score
+            or pred_leaf
+            or pred_contrib
+            or isinstance(_predict, spmatrix | list)
+        ):
+            return _predict
 
-        if self._LGBMClassifier__is_multiclass:  # TODO: multiclass
-            class_index = np.argmax(result, axis=1)
-            return self._LGBMClassifier_le.inverse_transform(class_index)
+        if self._LGBMClassifier__is_multiclass:
+            class_index = np.argmax(_predict, axis=1)
+            return self._le.inverse_transform(class_index)
         else:
-            return expit(result)
+            return expit(_predict)
 
     predict.__doc__ = LGBMClassifier.predict.__doc__
diff --git a/pyproject.toml b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "imlightgbm"
-version = "0.0.4"
+version = "0.1.0"
 description = "LightGBM for label-imbalanced data with focal and weighted loss function"
 authors = ["RektPunk <rektpunk@gmail.com>"]
 license = "MIT"