-
Notifications
You must be signed in to change notification settings - Fork 6
/
Copy pathOneVsRestLightGBMWithCustomizedLoss.py
89 lines (75 loc) · 3.73 KB
/
OneVsRestLightGBMWithCustomizedLoss.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
import numpy as np
from joblib import Parallel, delayed
from sklearn.multiclass import _ConstantPredictor
from sklearn.preprocessing import LabelBinarizer
from scipy import special
import lightgbm as lgb
class OneVsRestLightGBMWithCustomizedLoss:
def __init__(self, loss, n_jobs=3):
self.loss = loss
self.n_jobs = n_jobs
def fit(self, X, y, **fit_params):
self.label_binarizer_ = LabelBinarizer(sparse_output=True)
Y = self.label_binarizer_.fit_transform(y)
Y = Y.tocsc()
self.classes_ = self.label_binarizer_.classes_
columns = (col.toarray().ravel() for col in Y.T)
if 'eval_set' in fit_params:
# use eval_set for early stopping
X_val, y_val = fit_params['eval_set'][0]
Y_val = self.label_binarizer_.transform(y_val)
Y_val = Y_val.tocsc()
columns_val = (col.toarray().ravel() for col in Y_val.T)
self.results_ = Parallel(n_jobs=self.n_jobs)(delayed(self._fit_binary)
(X, column, X_val, column_val, **fit_params) for
i, (column, column_val) in
enumerate(zip(columns, columns_val)))
else:
# eval set not available
self.results_ = Parallel(n_jobs=self.n_jobs)(delayed(self._fit_binary)
(X, column, None, None, **fit_params) for i, column
in enumerate(columns))
return self
def _fit_binary(self, X, y, X_val, y_val, **fit_params):
unique_y = np.unique(y)
init_score_value = self.loss.init_score(y)
if len(unique_y) == 1:
estimator = _ConstantPredictor().fit(X, unique_y)
else:
fit = lgb.Dataset(X, y, init_score=np.full_like(y, init_score_value, dtype=float))
if 'eval_set' in fit_params:
val = lgb.Dataset(X_val, y_val, init_score=np.full_like(y_val, init_score_value, dtype=float),
reference=fit)
estimator = lgb.train(params=fit_params,
train_set=fit,
valid_sets=(fit, val),
valid_names=('fit', 'val'),
early_stopping_rounds=10,
fobj=self.loss.lgb_obj,
feval=self.loss.lgb_eval,
verbose_eval=10)
else:
estimator = lgb.train(params=fit_params,
train_set=fit,
fobj=self.loss.lgb_obj,
feval=self.loss.lgb_eval,
verbose_eval=10)
return estimator, init_score_value
def predict(self, X):
n_samples = X.shape[0]
maxima = np.empty(n_samples, dtype=float)
maxima.fill(-np.inf)
argmaxima = np.zeros(n_samples, dtype=int)
for i, (e, init_score) in enumerate(self.results_):
margins = e.predict(X, raw_score=True)
prob = special.expit(margins + init_score)
np.maximum(maxima, prob, out=maxima)
argmaxima[maxima == prob] = i
return argmaxima
def predict_proba(self, X):
y = np.zeros((X.shape[0], len(self.results_)))
for i, (e, init_score) in enumerate(self.results_):
margins = e.predict(X, raw_score=True)
y[:, i] = special.expit(margins + init_score)
y /= np.sum(y, axis=1)[:, np.newaxis]
return y