-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathautoregression.py
executable file
·85 lines (76 loc) · 2.46 KB
/
autoregression.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.metrics import mean_squared_error
import numpy as np
data = pd.read_csv('imputed_arimadata.csv', parse_dates=[0], index_col=0)
print(data.describe())
series=data['X1']
# from statsmodels.tsa.stattools import adfuller
# series=data['X2']
# X=series.values
# result = adfuller(np.diff(X))
# print('ADF Statistic: %f' % result[0])
# print('p-value: %f' % result[1])
# print('Critical Values:')
# for key, value in result[4].items():
# print('\t%s: %.3f' % (key, value))
# series=pd.Series(data['X2'])
dat=data.index[1:]
value=np.diff(data['X1'])
series=pd.Series(value,index=dat)
from pandas.tools.plotting import autocorrelation_plot
autocorrelation_plot(series)
plt.show()
from statsmodels.graphics.tsaplots import plot_pacf,plot_acf
plot_acf(series,lags=25)
plot_pacf(series,lags=25)
plt.show()
## PERSISTENCE MODEL ###########################################
# create lagged dataset
values = pd.DataFrame(series.values)
dataframe = pd.concat([values.shift(1), values], axis=1)
dataframe.columns = ['t-1', 't+1']
# split into train and test sets
X = dataframe.values
n_test=89
train, test = X[1:len(X)-n_test], X[len(X)-n_test:]
train_X, train_y = train[:,0], train[:,1]
test_X, test_y = test[:,0], test[:,1]
# persistence model
def model_persistence(x):
return x
# walk-forward validation
predictions = list()
for x in test_X:
yhat = model_persistence(x)
predictions.append(yhat)
test_score = mean_squared_error(test_y, predictions)
print('Test MSE: %.3f' % test_score)
# plot predictions vs expected
plt.plot(test_y)
plt.plot(predictions, color='red')
plt.show()
################################################################
## AUTO_REGRESSION MODEL #######################################
from statsmodels.tsa.ar_model import AR
# split dataset
X = series.values
train, test = X[1:len(X)-n_test], X[len(X)-n_test:]
# train autoregression
model = AR(train)
model_fit = model.fit()
print('Lag: %s' % model_fit.k_ar)
print('Coefficients: %s' % model_fit.params)
# make predictions
predictions = model_fit.predict(start=len(train), end=len(train)+len(test)-1, dynamic=False)
for i in range(len(predictions)):
print('predicted=%f, expected=%f' % (predictions[i], test[i]))
error = mean_squared_error(test, predictions)
print('Test MSE: %.3f' % error) #4789.437
# plot results
plt.plot(test)
plt.plot(predictions, color='red')
plt.show()
# X1- 12,263 to 4840
# X2-35797 to 70801
###############################################################