-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathTNX10_LSTM.py
72 lines (63 loc) · 2.47 KB
/
TNX10_LSTM.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
import tensorflow as tf
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import re
#PREPROCESSING
# df = pd.read_csv('daily-treasury-rates-2018.csv', parse_dates=['Date'])
# #sort the dataset based on Date in ascending order
# df = df.sort_values('Date')
# print(f'There are {df.shape[0]} days in 2018')
#
# #read other files, sort the dates, then stack it with the old df
# for f in ['daily-treasury-rates-2019.csv', 'daily-treasury-rates-2020.csv',
# 'daily-treasury-rates-2021.csv', 'daily-treasury-rates-2022.csv', 'daily-treasury-rates-2023_Aug.csv']:
# new_df = pd.read_csv(f, parse_dates=['Date']).sort_values('Date')
# print(f"There are {new_df.shape[0]} days in {re.findall('[0-9]{4}', f)}")
# df = pd.concat([df, new_df], ignore_index=True)
# df.to_csv('full-daily-treasury-rates-20182023.csv', index=False)
# print(df.shape)
df = pd.read_csv('full-daily-treasury-rates-20182023.csv', parse_dates=['Date'])
y10 = df['10 Yr']
# plt.plot(df['Date'], y10)
# plt.title('Yield curve rates for 10-year maturity bond\n from 2018 to Aug 2023')
# plt.ylabel('%')
# plt.show()
WINDOW_SIZE = 10
BATCH_SIZE = 32
x = tf.data.Dataset.from_tensor_slices(y10)
x = x.window(WINDOW_SIZE+1, shift=1, drop_remainder=True)
x = x.flat_map(lambda a: a.batch(WINDOW_SIZE+1))
x = x.map(lambda window: (window[:-1], window[-1]))
x = x.shuffle(1000)
x = x.batch(BATCH_SIZE).prefetch(1)
#MODEL BUILDING AND TRAINING
model = tf.keras.models.Sequential([
tf.keras.layers.Lambda(lambda dat: tf.expand_dims(dat, axis=-1), input_shape=[WINDOW_SIZE]), #one feature only
tf.keras.layers.LSTM(WINDOW_SIZE),
tf.keras.layers.Dense(1)
])
model.compile(loss='mae', optimizer='SGD')
history = model.fit(x, epochs=100, verbose=2, workers=3, use_multiprocessing=True)
#plot the loss curve
plt.plot(history.history['loss'])
plt.xlabel('iterations')
plt.ylabel('Mean Absolute Error')
plt.show()
#FORECASTING
new_data = np.array(y10[-10:])
preds = []
N_AHEAD = 8
for i in range (N_AHEAD):
input = new_data[i:(i+WINDOW_SIZE)]
input = input[np.newaxis, :]
pred = model.predict(input)
new_data = np.append(new_data, pred[0][0])
preds.append(pred[0][0])
print(preds)
#plot the predictions
plt.plot(df['Date'], y10, label='train data')
xaxis_preds = pd.period_range(start=df.Date.iloc[-1], periods=N_AHEAD, freq='D')
plt.plot(xaxis_preds, preds, color='hotpink', label='predictions')
plt.legend()
plt.show()