-
Notifications
You must be signed in to change notification settings - Fork 0
/
trial.py
115 lines (98 loc) · 4.3 KB
/
trial.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
import time
from sklearn.metrics.scorer import log_loss as error_func
from sklearn.linear_model.logistic import LogisticRegression as MetaEstimator
from sklearn.model_selection import cross_val_predict, KFold
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.linear_model import LogisticRegression
from lightgbm import LGBMRegressor, LGBMClassifier
from catboost import CatBoostClassifier, CatBoostRegressor
from xgboost import XGBRegressor, XGBClassifier
from base import tuning
from config import get_setting
from data_loader import *
# from config import log
class Ensemble:
def __init__(self, base_estimators=None, random_state=0, cv=3):
self.base_estimators = base_estimators
self.estimator = MetaEstimator()
self.random_state = random_state
self.fit_cv = cv
def fit(self, X, y):
cv = KFold(n_splits=self.fit_cv, shuffle=True, random_state=self.random_state)
predictions = []
for estimator in self.base_estimators:
name = estimator.__class__.__name__
log(0x25, 'cross_val_predict start', name)
prediction = cross_val_predict(estimator, X, y, cv=cv, method='predict_proba')
log(0x25, 'cross_val_predict end', name)
# print('prediction of', estimator.__class__.__name__)
# print(prediction)
log(0x25, 'CV Score', name, check_result(y, prediction))
predictions.append(prediction.T[0])
# print('all predictions')
# print(np.array(predictions), y)
self.estimator.fit(np.array(predictions).T, y)
for estimator in self.base_estimators:
name = estimator.__class__.__name__
log(0x25, 'fit start', name)
estimator.fit(X, y)
log(0x25, 'fit end:', name)
def predict(self, X, margin):
return np.array(self.predict_proba(X)[:, 0]) > margin
def predict_proba(self, X):
predictions = []
for estimator in self.base_estimators:
# predictions.extend(estimator.predict_proba(X).T)
predictions.append(estimator.predict_proba(X).T[0])
return self.estimator.predict_proba(np.array(predictions).T)
def check_result(y_true, y_pred):
# print(pd.DataFrame(y_pred, dtype='object').describe())
# print(set(y_pred))
return error_func(y_true, y_pred)
def check(estimator, data, tune=True, fit=True):
log(0x25, '~Default Setting~', estimator.__class__.__name__)
if fit:
tick = time.time()
estimator.fit(**data['train'])
log(0x25, 'Fit in:', time.time() - tick)
if estimator.__class__.__name__ == 'Ensemble':
log(0x25, 'Base Estimators:', ', '.join(['%s' % e.__class__.__name__ for e in estimator.base_estimators]))
log(0x25, 'Ceof:', estimator.estimator.coef_, 'intercept:', estimator.estimator.intercept_)
tick = time.time()
prediction = estimator.predict_proba(data['test']['X'])
log(0x25, 'Predict in:', time.time() - tick)
score = check_result(data['test']['y'], prediction)
log(0x25, 'Score:', score)
if not tune:
return
log(0x25, '~Tuned~', estimator.__class__.__name__)
tick = time.time()
tuning(estimator, **data['train'], **get_setting(estimator.__class__.__name__))
# estimator.fit(**data['train'])
score = check_result(data['test']['y'], estimator.predict_proba(data['test']['X']))
log(0x25, 'Params:', estimator.get_params())
log(0x25, 'Time:', time.time() - tick)
log(0x25, 'Score:', score)
if __name__ == '__main__':
# check(LGBMClassifier, load_Amazon())
# data = load_adult()
# data = load_Amazon()
data = load_orange()
random_state = 0
cv = 5
log(0x24, 'random_state:', random_state, 'cv:', cv)
ensemble = Ensemble(
base_estimators=[
RandomForestClassifier(random_state=random_state),
# GradientBoostingClassifier(random_state=random_state),
LGBMClassifier(seed=random_state),
# XGBClassifier(seed=random_state),
CatBoostClassifier(random_seed=random_state),
LogisticRegression(random_state=random_state),
],
random_state=random_state,
cv=cv,
)
check(ensemble, data, tune=False)
for estimator in ensemble.base_estimators:
check(estimator, data, tune=False, fit=False)