-
Notifications
You must be signed in to change notification settings - Fork 0
/
ml_lightgbm.py
executable file
·101 lines (79 loc) · 3.35 KB
/
ml_lightgbm.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
import math
import numpy as np
import pandas as pd
import lightgbm as lgb
import matplotlib.pyplot as plt
from sklearn import metrics
import logging
class lightgbm_regression:
def __init__(self, cdn_name, data_series, model_params, learner):
logging.info("initing object lightgbm regression...")
self.cdn_name = cdn_name
self.data_series = data_series
self.model_params = model_params
self.learner = learner
def construct_data_set(self):
logging.info("construct to train data set.")
data_df = pd.DataFrame(self.data_series, columns=["x_0"])
feature_points = self.model_params["feature_points_length"]
for i in range(1, feature_points):
data_df["x_%s" %(i)] = data_df["x_0"].shift(-i)
data_df["y"] = data_df["x_0"].shift(feature_points)
data_df.dropna(inplace=True)
X = data_df.values[:, : -1]
y = data_df.values[:, -1]
return X, y
def train_data_set(self):
logging.info("train data set")
X, y = self.construct_data_set()
#split X, y to train and test
test_size = (int)(X.shape[0] * self.model_params["test_size_ration"])
X_train, X_test = X[: -test_size], X[-test_size :]
y_train, y_test = y[: -test_size], y[-test_size :]
d_train = lgb.Dataset(X_train, label = y_train)
#create and train model
params = {}
params['learning_rate'] = 0.03
params['boosting_type'] = 'gbdt'
params['objective'] = 'regression'
params['n_estimators'] = 100
params['max_depth'] = 10
clf = lgb.train(params, d_train, 100)
# model prediction on test
prediction = clf.predict(X_test)
self.y_train = y_train
self.y_test = y_test
self.predict = prediction
def plot_train_result(self):
logging.info("ploting train result")
test_len = self.y_test.shape[0]
#train
x1 = np.arange(test_len)
y1 = self.y_train[-test_len :]
df_train = pd.DataFrame(y1, index=x1, columns=["train"])
#test and predict
x23 = np.arange(test_len, 2*test_len)
y2 = self.y_test
y3 = self.predict
df_test = pd.DataFrame(y2, index=x23, columns=["test"])
df_predict = pd.DataFrame(y3, index=x23, columns=["predict"])
#concat train, test, prediction
df_result = pd.concat([df_train, df_test, df_predict], axis = 0)
#plot
df_result.plot()
plt.title("%s_prediction" %(self.cdn_name))
plt.xlabel("timeline")
plt.ylabel("traffic")
plt.grid()
#plt.show()
plt.savefig(self.cdn_name + "_" + self.learner)
def calculate_error(self):
logging.info("calculating model prediction error")
self.mape = np.mean(np.abs((self.y_test - self.predict) / self.y_test)) * 100
self.mae = metrics.mean_absolute_error(self.y_test, self.predict)
self.rmse = math.sqrt(metrics.mean_squared_error(self.y_test, self.predict))
print("MAPE : %s\nMAE : %s\nRMSE : %s" %(self.mape, self.mae, self.rmse))
def model_run(self):
self.train_data_set()
self.plot_train_result()
self.calculate_error()