-
Notifications
You must be signed in to change notification settings - Fork 3
/
train.py
137 lines (115 loc) · 6.01 KB
/
train.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
import os
import numpy as np
import torch
from torch.autograd import Variable
from torch.utils.data.dataloader import DataLoader
from data_gen import BabiDataset, pad_collate
from models import DMNPlus
from utils import parse_args
def train_net(args):
torch.manual_seed(7)
np.random.seed(7)
for run in range(10):
for task_id in range(1, 21):
dset = BabiDataset(task_id)
vocab_size = len(dset.QA.VOCAB)
model = DMNPlus(args.hidden_size, vocab_size, num_hop=3, qa=dset.QA)
model.cuda()
early_stopping_cnt = 0
early_stopping_flag = False
best_acc = 0
optim = torch.optim.Adam(model.parameters())
for epoch in range(256):
dset.set_mode('train')
train_loader = DataLoader(
dset, batch_size=args.batch_size, shuffle=True, collate_fn=pad_collate
)
model.train()
if not early_stopping_flag:
total_acc = 0
cnt = 0
for batch_idx, data in enumerate(train_loader):
optim.zero_grad()
contexts, questions, answers = data
batch_size = contexts.size()[0]
contexts = Variable(contexts.long().cuda())
questions = Variable(questions.long().cuda())
answers = Variable(answers.cuda())
loss, acc = model.get_loss(contexts, questions, answers)
loss.backward()
total_acc += acc * batch_size
cnt += batch_size
if batch_idx % 20 == 0:
print(
'[Task {}, Epoch {}] [Training] loss : {}, acc : {:.4f}, batch_idx : {}'.format(task_id,
epoch,
loss.item(),
total_acc / cnt,
batch_idx))
optim.step()
dset.set_mode('valid')
valid_loader = DataLoader(dset, batch_size=args.batch_size, shuffle=False, collate_fn=pad_collate)
model.eval()
total_acc = 0
cnt = 0
for batch_idx, data in enumerate(valid_loader):
contexts, questions, answers = data
batch_size = contexts.size()[0]
contexts = Variable(contexts.long().cuda())
questions = Variable(questions.long().cuda())
answers = Variable(answers.cuda())
_, acc = model.get_loss(contexts, questions, answers)
total_acc += acc * batch_size
cnt += batch_size
total_acc = total_acc / cnt
if total_acc > best_acc:
best_acc = total_acc
best_state = model.state_dict()
early_stopping_cnt = 0
else:
early_stopping_cnt += 1
if early_stopping_cnt > 20:
early_stopping_flag = True
print('[Run {}, Task {}, Epoch {}] [Validate] Accuracy : {:.4f}'.format(run, task_id, epoch,
total_acc))
with open('log.txt', 'a') as fp:
fp.write('[Run {}, Task {}, Epoch {}] [Validate] Accuracy : {:.4f}\n'.format(
run,
task_id,
epoch,
total_acc))
if total_acc == 1.0:
break
else:
print('[Run {}, Task {}] Early Stopping at Epoch {}, Valid Accuracy : {:.4f}'.format(run,
task_id,
epoch,
best_acc))
break
dset.set_mode('test')
test_loader = DataLoader(dset, batch_size=args.batch_size, shuffle=False, collate_fn=pad_collate)
test_acc = 0
cnt = 0
for batch_idx, data in enumerate(test_loader):
contexts, questions, answers = data
batch_size = contexts.size()[0]
contexts = Variable(contexts.long().cuda())
questions = Variable(questions.long().cuda())
answers = Variable(answers.cuda())
model.load_state_dict(best_state)
_, acc = model.get_loss(contexts, questions, answers)
test_acc += acc * batch_size
cnt += batch_size
print('[Run {}, Task {}, Epoch {}] [Test] Accuracy : {:.4f}'.format(run, task_id, epoch, test_acc / cnt))
os.makedirs('models', exist_ok=True)
with open('models/task{}_epoch{}_run{}_acc{:.4f}.pth'.format(task_id, epoch, run, test_acc / cnt), 'wb') as fp:
torch.save(model.state_dict(), fp)
with open('log.txt', 'a') as fp:
fp.write(
'[Run {}, Task {}, Epoch {}] [Test] Accuracy : {:.4f}\n'.format(run, task_id, epoch, total_acc))
def main():
global args
args = parse_args()
train_net(args)
if __name__ == '__main__':
main()