-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathtrain_assoc_recall_task.py
103 lines (75 loc) · 2.67 KB
/
train_assoc_recall_task.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
import os
from datetime import datetime
import numpy as np
import tensorflow as tf
from src.tf.ntm import NTM
ntm = NTM(external_output_size=18)
if os.path.exists('./assoc_model'):
print('loading weights')
ntm.load_weights('assoc_model/weights')
loss_object = tf.keras.losses.BinaryCrossentropy(from_logits=True)
optimizer = tf.keras.optimizers.Nadam()
current_time = datetime.now().strftime("%Y%m%d-%H%M%S")
train_log_dir = 'logs/' + current_time + '/train'
train_summary_writer = tf.summary.create_file_writer(train_log_dir)
def train_step(batch):
losses = []
with tf.GradientTape() as tape:
for seq in batch:
state = ntm.get_start_state()
for item in seq:
x = tf.convert_to_tensor(item)
_, state = ntm(x, state)
query_i = np.random.randint(len(seq) - 2)
query = seq[query_i]
y_true = seq[query_i + 1]
pred, _ = ntm(query, state)
pred = tf.reshape(pred, shape=(6, 3))
loss = loss_object(y_true, pred)
losses.append(loss)
loss = tf.reduce_mean(losses)
gradients = tape.gradient(loss, ntm.trainable_variables)
optimizer.apply_gradients(zip(gradients, ntm.trainable_variables))
def eval(val_set, i, min_loss):
losses = []
for seq in val_set:
state = ntm.get_start_state()
for item in seq:
x = tf.convert_to_tensor(item)
_, state = ntm(x, state)
query_i = np.random.randint(len(seq) - 2)
query = seq[query_i]
y_true = seq[query_i + 1]
pred, _ = ntm(query, state)
pred = tf.reshape(pred, shape=(6, 3))
loss = loss_object(y_true, pred)
losses.append(loss)
loss = tf.reduce_mean(losses)
with train_summary_writer.as_default():
tf.summary.scalar('eval_loss', loss.numpy(), step=i)
print(i, loss.numpy())
if loss.numpy() < min_loss:
min_loss = loss.numpy()
ntm.save_weights('assoc_model/weights', save_format='tf')
return min_loss
def get_batch(size, max_len=6):
batch = []
for _ in range(size):
length = np.random.randint(2, max_len + 1)
seq = [np.random.randint(2, size=(6, 3)) for _ in range(length)]
seq.append(np.ones(shape=(6, 3)) * -1)
batch.append(seq)
return batch
def train():
min_loss = float('inf')
val_set = get_batch(100)
batch_size = 10
for i in range(100000):
batch = get_batch(batch_size)
train_step(batch)
if i % 10 == 0:
min_loss = eval(val_set, i * batch_size, min_loss)
if min_loss < 1e-3:
break
if __name__ == '__main__':
train()