-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmodel.py
173 lines (162 loc) · 5.67 KB
/
model.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
# recursive multi-step forecast with linear algorithms
from math import sqrt
from numpy import split
from numpy import array
from pandas import read_csv
from sklearn.metrics import mean_squared_error
from matplotlib import pyplot
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import MinMaxScaler
from sklearn.pipeline import Pipeline
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import Lasso
from sklearn.linear_model import Ridge
from sklearn.linear_model import ElasticNet
from sklearn.linear_model import HuberRegressor
from sklearn.linear_model import Lars
from sklearn.linear_model import LassoLars
from sklearn.linear_model import PassiveAggressiveRegressor
from sklearn.linear_model import RANSACRegressor
from sklearn.linear_model import SGDRegressor
# split a univariate dataset into train/test sets
def split_dataset(data):
# split into standard weeks
train, test = data[1:-328], data[-328:-6]
# restructure into windows of weekly data
train = array(split(train, len(train)/7))
test = array(split(test, len(test)/7))
return train, test
# evaluate one or more weekly forecasts against expected values
def evaluate_forecasts(actual, predicted):
scores = list()
# calculate an RMSE score for each day
for i in range(actual.shape[1]):
# calculate mse
mse = mean_squared_error(actual[:, i], predicted[:, i])
# calculate rmse
rmse = sqrt(mse)
# store
scores.append(rmse)
# calculate overall RMSE
s = 0
for row in range(actual.shape[0]):
for col in range(actual.shape[1]):
s += (actual[row, col] - predicted[row, col])**2
score = sqrt(s / (actual.shape[0] * actual.shape[1]))
return score, scores
# summarize scores
def summarize_scores(name, score, scores):
s_scores = ', '.join(['%.1f' % s for s in scores])
print('%s: [%.3f] %s' % (name, score, s_scores))
# prepare a list of ml models
def get_models(models=dict()):
# linear models
models['lr'] = LinearRegression()
models['lasso'] = Lasso()
models['ridge'] = Ridge()
models['en'] = ElasticNet()
models['huber'] = HuberRegressor()
models['lars'] = Lars()
models['llars'] = LassoLars()
models['pa'] = PassiveAggressiveRegressor(max_iter=1000, tol=1e-3)
models['ranscac'] = RANSACRegressor()
models['sgd'] = SGDRegressor(max_iter=1000, tol=1e-3)
print('Defined %d models' % len(models))
return models
# create a feature preparation pipeline for a model
def make_pipeline(model):
steps = list()
# standardization
steps.append(('standardize', StandardScaler()))
# normalization
steps.append(('normalize', MinMaxScaler()))
# the model
steps.append(('model', model))
# create pipeline
pipeline = Pipeline(steps=steps)
return pipeline
# make a recursive multi-step forecast
def forecast(model, input_x, n_input):
yhat_sequence = list()
input_data = [x for x in input_x]
for j in range(7):
# prepare the input data
X = array(input_data[-n_input:]).reshape(1, n_input)
# make a one-step forecast
yhat = model.predict(X)[0]
# add to the result
yhat_sequence.append(yhat)
# add the prediction to the input
input_data.append(yhat)
return yhat_sequence
# convert windows of weekly multivariate data into a series of total power
def to_series(data):
# extract just the total power from each week
series = [week[:, 0] for week in data]
# flatten into a single series
series = array(series).flatten()
return series
# convert history into inputs and outputs
def to_supervised(history, n_input):
# convert history to a univariate series
data = to_series(history)
X, y = list(), list()
ix_start = 0
# step over the entire history one time step at a time
for i in range(len(data)):
# define the end of the input sequence
ix_end = ix_start + n_input
# ensure we have enough data for this instance
if ix_end < len(data):
X.append(data[ix_start:ix_end])
y.append(data[ix_end])
# move along one time step
ix_start += 1
return array(X), array(y)
# fit a model and make a forecast
def sklearn_predict(model, history, n_input):
# prepare data
train_x, train_y = to_supervised(history, n_input)
# make pipeline
pipeline = make_pipeline(model)
# fit the model
pipeline.fit(train_x, train_y)
# predict the week, recursively
yhat_sequence = forecast(pipeline, train_x[-1, :], n_input)
return yhat_sequence
# evaluate a single model
def evaluate_model(model, train, test, n_input):
# history is a list of weekly data
history = [x for x in train]
# walk-forward validation over each week
predictions = list()
for i in range(len(test)):
# predict the week
yhat_sequence = sklearn_predict(model, history, n_input)
# store the predictions
predictions.append(yhat_sequence)
# get real observation and add to history for predicting the next week
history.append(test[i, :])
predictions = array(predictions)
# evaluate predictions days for each week
score, scores = evaluate_forecasts(test[:, :, 0], predictions)
return score, scores
# load the new file
dataset = read_csv('household_power_consumption_days.csv', header=0, infer_datetime_format=True, parse_dates=['datetime'], index_col=['datetime'])
# split into train and test
train, test = split_dataset(dataset.values)
# prepare the models to evaluate
models = get_models()
n_input = 7
# evaluate each model
days = ['sun', 'mon', 'tue', 'wed', 'thr', 'fri', 'sat']
for name, model in models.items():
# evaluate and get scores
score, scores = evaluate_model(model, train, test, n_input)
# summarize scores
summarize_scores(name, score, scores)
# plot scores
pyplot.plot(days, scores, marker='o', label=name)
# show plot
pyplot.legend()
pyplot.show()