-
Notifications
You must be signed in to change notification settings - Fork 9
/
autoencoder.py
395 lines (325 loc) · 15.1 KB
/
autoencoder.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
# ----------------------------------------------------
# Time-Series Autoencoder using Tensorflow 1.0.3
# Created by: Jonathan Zia
# Last Modified: Friday, March 9, 2018
# Georgia Institute of Technology
# ----------------------------------------------------
import tensorflow as tf
import network as net
import pandas as pd
import random as rd
import numpy as np
import time
import math
import csv
import os
# ----------------------------------------------------
# User-Defined Constants
# ----------------------------------------------------
# Training
NUM_TRAINING = 500 # Number of training batches (balanced minibatches)
NUM_VALIDATION = 500 # Number of validation batches (balanced minibatches)
# Learning rate decay
# Decay type can be 'none', 'exp', 'inv_time', or 'nat_exp'
DECAY_TYPE = 'none' # Set decay type for learning rate
LEARNING_RATE_INIT = 0.001 # Set initial learning rate for optimizer (default 0.001) (fixed LR for 'none')
LEARNING_RATE_END = 0.00001 # Set ending learning rate for optimizer
# Load File
LOAD_FILE = False # Load initial LSTM model from saved checkpoint?
# ----------------------------------------------------
# Instantiate Network Classes
# ----------------------------------------------------
lstm_encoder = net.EncoderNetwork()
lstm_decoder = net.DecoderNetwork(batch_size = lstm_encoder.batch_size, num_steps = lstm_encoder.num_steps,
input_features = lstm_encoder.latent+lstm_encoder.input_features)
# ----------------------------------------------------
# Input data files
# ----------------------------------------------------
# Specify filenames
# Root directory:
dir_name = "/Users/username"
with tf.name_scope("Training_Data"): # Training dataset
tDataset = os.path.join(dir_name, "data/dataset.csv")
with tf.name_scope("Validation_Data"): # Validation dataset
vDataset = os.path.join(dir_name, "data/dataset.csv")
with tf.name_scope("Model_Data"): # Model save/load paths
load_path = os.path.join(dir_name, "checkpoints/model") # Load previous model
save_path = os.path.join(dir_name, "checkpoints/model") # Save model at each step
save_path_op = os.path.join(dir_name, "checkpoints/model_op") # Save optimal model
with tf.name_scope("Filewriter_Data"): # Filewriter save path
filewriter_path = os.path.join(dir_name, "output")
with tf.name_scope("Output_Data"): # Output data filenames (.txt)
# These .txt files will contain loss data for Matlab analysis
training_loss = os.path.join(dir_name, "training_loss.txt")
validation_loss = os.path.join(dir_name, "validation_loss.txt")
# Obtain length of testing and validation datasets
file_length = len(pd.read_csv(tDataset))
v_file_length = len(pd.read_csv(vDataset))
# ----------------------------------------------------
# User-Defined Methods
# ----------------------------------------------------
def init_values(shape):
"""
Initialize Weight and Bias Matrices
Returns: Tensor of shape "shape" w/ normally-distributed values
"""
temp = tf.truncated_normal(shape, stddev=0.1)
return tf.Variable(temp)
def extract_data(filename, batch_size, num_steps, input_features, f_length):
"""
Extract features and labels from filename.csv in random batches
Returns:
feature_batch ~ [batch_size, num_steps, input_features]
label_batch := feature_batch
"""
# Initialize numpy arrays for return value placeholders
feature_batch = np.zeros((batch_size,num_steps,input_features))
label_batch = np.zeros((batch_size,num_steps,input_features))
# Import data from CSV as a random minibatch:
for i in range(batch_size):
# Generate random index for number of rows to skip
temp_index = rd.randint(0, f_length-num_steps-1)
# Read data from CSV and write as matrix
temp = pd.read_csv(filename, skiprows=temp_index, nrows=num_steps, header=None)
temp = temp.as_matrix()
# Return features in specified columns
feature_batch[i,:,:] = temp[:,1:input_features+1]
# Setting features as labels for autoencoding
label_batch[i,:,:] = feature_batch[i,:,:]
# Return feature and label batches
return feature_batch, label_batch
def set_decay_rate(decay_type, learning_rate_init, learning_rate_end, num_training):
"""
Calcualte decay rate for specified decay type
Returns: Scalar decay rate
"""
if decay_type == 'none':
return 0
elif decay_type == 'exp':
return math.pow((learning_rate_end/learning_rate_init),(1/num_training))
elif decay_type == 'inv_time':
return ((learning_rate_init/learning_rate_end)-1)/num_training
elif decay_type == 'nat_exp':
return (-1/num_training)*math.log(learning_rate_end/learning_rate_init)
else:
return 0
def decayed_rate(decay_type, decay_rate, learning_rate_init, step):
"""
Calculate decayed learning rate for specified parameters
Returns: Scalar decayed learning rate
"""
if decay_type == 'none':
return learning_rate_init
elif decay_type == 'exp':
return learning_rate_init*math.pow(decay_rate,step)
elif decay_type == 'inv_time':
return learning_rate_init/(1+decay_rate*step)
elif decay_type == 'nat_exp':
return learning_rate_init*math.exp(-decay_rate*step)
# ----------------------------------------------------
# Importing Session Parameters
# ----------------------------------------------------
# Create placeholders for inputs and target values
# Input dimensions: BATCH_SIZE x NUM_STEPS x INPUT_FEATURES
# Target dimensions: BATCH_SIZE x NUM_STEPS x INPUT_FEATURES
inputs = tf.placeholder(tf.float32, [lstm_encoder.batch_size, lstm_encoder.num_steps, lstm_encoder.input_features], name="Input_Placeholder")
targets = tf.placeholder(tf.float32, [lstm_encoder.batch_size, lstm_encoder.num_steps, lstm_encoder.input_features], name="Target_Placeholder")
# Create placeholder for learning rate
learning_rate = tf.placeholder(tf.float32, name="Learning_Rate_Placeholder")
# ----------------------------------------------------
# Building an LSTM Encoder
# ----------------------------------------------------
# Build LSTM cell
# Creating basic LSTM cell
encoder_cell = tf.contrib.rnn.BasicLSTMCell(lstm_encoder.num_lstm_hidden,name='Encoder_Cell')
# Adding dropout wrapper to cell
encoder_cell = tf.nn.rnn_cell.DropoutWrapper(encoder_cell, input_keep_prob=lstm_encoder.i_keep_prob, output_keep_prob=lstm_encoder.o_keep_prob)
# Initialize weights and biases for latent layer.
with tf.name_scope("Encoder_Variables"):
W_latent = init_values([lstm_encoder.num_lstm_hidden, lstm_encoder.latent])
tf.summary.histogram('Weights',W_latent)
b_latent = init_values([lstm_encoder.latent])
tf.summary.histogram('Biases',b_latent)
# Add LSTM cells to dynamic_rnn and implement truncated BPTT
initial_state_encoder = state_encoder = encoder_cell.zero_state(lstm_encoder.batch_size, tf.float32)
with tf.variable_scope("Encoder_RNN"):
for i in range(lstm_encoder.num_steps):
# Obtain output at each step
output, state_encoder = tf.nn.dynamic_rnn(encoder_cell, inputs[:,i:i+1,:], initial_state=state_encoder)
# Obtain final output and convert to logit
# Reshape output to remove extra dimension
output = tf.reshape(output,[lstm_encoder.batch_size,lstm_encoder.num_lstm_hidden])
with tf.name_scope("Encoder_Output"):
# Obtain logits by performing (weights)*(output)+(biases)
logit = tf.matmul(output, W_latent) + b_latent
# Convert logits to tensor
latent_layer = tf.convert_to_tensor(logit)
# Converting to dimensions [batch_size, 1 (num_steps), latent]
latent_layer = tf.expand_dims(latent_layer,1,name='latent_layer')
# ----------------------------------------------------
# Building an LSTM Decoder
# ----------------------------------------------------
# Build LSTM cell
# Creating basic LSTM cells
# decoder_cell_1 is the first cell in the decoder layer, accepting latent_layer as an input
# decoder_cell_2 is each subsequent cell in the decoder layer, accepting the output at (t-1)
# as the input and the hidden state at (t-1) as the initial state.
decoder_cell_1 = tf.contrib.rnn.BasicLSTMCell(lstm_decoder.num_lstm_hidden,name='Decoder_Cell_1')
decoder_cell_2 = tf.contrib.rnn.BasicLSTMCell(lstm_decoder.num_lstm_hidden,name='Decoder_Cell_2')
# Adding dropout wrapper to each cell
decoder_cell_1 = tf.nn.rnn_cell.DropoutWrapper(decoder_cell_1, input_keep_prob=lstm_decoder.i_keep_prob, output_keep_prob=lstm_decoder.o_keep_prob)
decoder_cell_2 = tf.nn.rnn_cell.DropoutWrapper(decoder_cell_2, input_keep_prob=lstm_decoder.i_keep_prob, output_keep_prob=lstm_decoder.o_keep_prob)
# Initialize weights and biases for output layer.
with tf.name_scope("Decoder_Variables"):
W_output = init_values([lstm_decoder.num_lstm_hidden, lstm_encoder.input_features])
tf.summary.histogram('Weights',W_output)
b_output = init_values([lstm_encoder.input_features])
tf.summary.histogram('Biases',b_output)
# Initialize the initial state for the first LSTM cell
initial_state_decoder = state_decoder = decoder_cell_1.zero_state(lstm_decoder.batch_size, tf.float32)
# Initialize placeholder for outputs of the decoder layer at each timestep
logits = []
with tf.variable_scope("Decoder_RNN"):
for i in range(lstm_decoder.num_steps):
# Obtain output at each step
# For the first timestep...
if i == 0:
# Input the latent layer and obtain the output and hidden state
output, state_decoder = tf.nn.dynamic_rnn(decoder_cell_1, latent_layer, initial_state=state_decoder)
else: # For all subsequent timesteps...
# Combine the output layer at (t-1) with the latent layer; then input to obtain output at (t) and the hidden state
input_vector = tf.concat([tf.expand_dims(output,1),latent_layer],axis=2,name='Decoder_Input')
output, state_decoder = tf.nn.dynamic_rnn(decoder_cell_2, input_vector, initial_state=state_decoder)
# Obtain output and convert to logit
# Reshape output to remove extra dimension
output = tf.reshape(output,[lstm_decoder.batch_size,lstm_decoder.num_lstm_hidden])
with tf.name_scope("Decoder_Output"):
# Obtain logits by applying operation (weights)*(outputs)+(biases)
logit = tf.matmul(output, W_output) + b_output
# Append output at each timestep
logits.append(logit)
# Convert logits to tensor entitled "predictions"
predictions = tf.convert_to_tensor(logits)
# Converting to dimensions [batch_size, num_steps, input_features]
predictions = tf.transpose(logits, perm=[1, 0, 2], name='Predictions')
# ----------------------------------------------------
# Calculate Loss and Define Optimizer
# ----------------------------------------------------
# Calculating mean squared error of labels and logits
loss = tf.losses.mean_squared_error(labels=targets, predictions=predictions)
loss = tf.reduce_mean(loss)
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(loss)
# ----------------------------------------------------
# Run Session
# ----------------------------------------------------
init = tf.global_variables_initializer()
saver = tf.train.Saver() # Instantiate Saver class
t_loss = [] # Placeholder for training loss values
v_loss = [] # Placeholder for validation loss values
with tf.Session() as sess:
# Create Tensorboard graph
writer = tf.summary.FileWriter(filewriter_path, sess.graph)
merged = tf.summary.merge_all()
# If there is a model checkpoint saved, load the checkpoint. Else, initialize variables.
if LOAD_FILE:
# Restore saved session
saver.restore(sess, load_path)
else:
# Initialize the variables
sess.run(init)
# Training the network
# Setting step ranges
step_range = NUM_TRAINING # Set step range for training
v_step_range = NUM_VALIDATION # Set step range for validation
# Obtain start time
start_time = time.time()
# Initialize optimal loss
loss_op = 0
# Determine learning rate decay
decay_rate = set_decay_rate(DECAY_TYPE, LEARNING_RATE_INIT, LEARNING_RATE_END, NUM_TRAINING)
if DECAY_TYPE != 'none':
print('\nLearning Decay Rate = ', decay_rate)
# Set number of trials to NUM_TRAINING
for step in range(0,step_range):
# Initialize optimal model saver to False
save_op = False
try: # While there is no out-of-bounds exception...
# Obtaining batch of features and labels from TRAINING dataset(s)
features, labels = extract_data(tDataset, lstm_encoder.batch_size, lstm_encoder.num_steps, lstm_encoder.input_features, file_length)
except:
break
# Set optional conditional for network training
if True:
# Print step
print("\nOptimizing at step", step)
# Calculate time-decay learning rate:
decayed_learning_rate = decayed_rate(DECAY_TYPE, decay_rate, LEARNING_RATE_INIT, step)
# Input data and learning rate
feed_dict = {inputs: features, targets:labels, learning_rate:decayed_learning_rate}
# Run optimizer, loss, and predicted error ops in graph
predictions_, targets_, _, loss_ = sess.run([predictions, targets, optimizer, loss], feed_dict=feed_dict)
# Record loss
t_loss.append(loss_)
# Evaluate network and print data in terminal periodically
with tf.name_scope("Validation"):
# Conditional statement for validation and printing
if step % 50 == 0:
print("\nMinibatch train loss at step", step, ":", loss_)
# Evaluate network
test_loss = []
for step_num in range(0,v_step_range):
try: # While there is no out-of-bounds exception...
# Obtaining batch of features and labels from VALIDATION dataset(s)
v_features, v_labels = extract_data(vDataset, lstm_encoder.batch_size, lstm_encoder.num_steps, lstm_encoder.input_features, v_file_length)
except:
break
# Input data and run session to find loss
data_test = {inputs: v_features, targets: v_labels}
loss_test = sess.run(loss, feed_dict=data_test)
test_loss.append(loss_test)
# Record loss
v_loss.append(np.mean(test_loss))
# Print test loss
print("Test loss: %.3f" % np.mean(test_loss))
# For the first step, set optimal loss to test loss
if step == 0:
loss_op = np.mean(test_loss)
# If test_loss < optimal loss, overwrite optimal loss
if np.mean(test_loss) < loss_op:
loss_op = np.mean(test_loss)
save_op = True # Save model as new optimal model
# Print predictions and targets for reference
print("Predictions:")
print(predictions_)
print("Targets:")
print(targets_)
# Save and overwrite the session at each training step
saver.save(sess, save_path)
# Save the model if loss over the test set is optimal
if save_op:
saver.save(sess,save_path_op)
# Writing summaries to Tensorboard at each training step
summ = sess.run(merged)
writer.add_summary(summ,step)
# Conditional statement for calculating time remaining and percent completion
if step % 10 == 0:
# Report percent completion
p_completion = 100*step/NUM_TRAINING
print("\nPercent completion: %.3f%%" % p_completion)
# Print time remaining
avg_elapsed_time = (time.time() - start_time)/(step+1)
sec_remaining = avg_elapsed_time*(NUM_TRAINING-step)
min_remaining = round(sec_remaining/60)
print("\nTime Remaining: %d minutes" % min_remaining)
# Print learning rate if learning rate decay is used
if DECAY_TYPE != 'none':
print("\nLearning Rate = ", decayed_learning_rate)
# Write training and validation loss to file
t_loss = np.array(t_loss)
v_loss = np.array(v_loss)
with open(training_loss, 'a') as file_object:
np.savetxt(file_object, t_loss)
with open(validation_loss, 'a') as file_object:
np.savetxt(file_object, v_loss)
# Close the writer
writer.close()