-
Notifications
You must be signed in to change notification settings - Fork 0
/
eval_utils.py
197 lines (155 loc) · 7.4 KB
/
eval_utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
# encoding=utf8
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
# import sys
# reload(sys)
# sys.setdefaultencoding('utf8')
import os
import json
import hashlib
import pandas as pd
import time
import logging
import torch
import torch.nn as nn
from torch.autograd import Variable
import torch.optim as optim
from torch.utils.data import DataLoader
import misc.utils as utils
from vist_eval.album_eval import AlbumEvaluator
class Evaluator:
def __init__(self, save_dir, mode = 'val', vt_mode = 1, beam_size = 3):
self.vt_mode = vt_mode
ref_json_path = './vist_reference/{}_reference_m{}.json'.format(mode, self.vt_mode)
self.save_dir = save_dir
self.beam_size = beam_size
if not os.path.exists(self.save_dir):
os.makedirs(self.save_dir)
self.reference = json.load(open(ref_json_path))
print("\nloading file {}".format(ref_json_path))
self.prediction_file = os.path.join(self.save_dir, 'prediction_{}'.format(mode))
print('*********************self.prediction_file***************************', self.prediction_file, '\n')
self.eval = AlbumEvaluator()
def measure(self, predictions):
# # test on first batch
# # tmpkeys = list(predictions.keys())
# tmpkeys = ['666831', '72157631530976322']
# tmpref = {}
# tmppre = {}
# for k in tmpkeys:
# tmpref[k] = self.reference[k]
# tmppre[k] = self.predictions[k]
# self.reference = tmpref
# self.predictions = tmppre
# album eval
self.eval.evaluate(self.reference, predictions)
return self.eval.eval_overall
def eval_story(self, model, topicModel, crit, dataset, loader, iteration, ifTest = False):
# Make sure in the evaluation mode
print("Evaluating...")
start = time.time()
model.eval()
if ifTest:
dataset.test()
else:
dataset.val()
loss_sum = 0
loss_evals = 0
predictions = {}
self.prediction_file = os.path.join(self.save_dir, 'prediction_val_{}'.format(iteration))
# open the file to store the predictions
count = 0
for iter, batch in enumerate(loader):
iter_start = time.time()
feature_fc = Variable(batch['feature_fc']).cuda()
target = Variable(batch['split_story']).cuda()
# keywords = Variable(batch['keywords']).cuda()
conv_feature = Variable(batch['feature_conv']).cuda() if 'feature_conv' in batch else None
with torch.no_grad():
keywords = topicModel(feature_fc)
count += feature_fc.size(0)
with torch.no_grad(): # gjj weap model
output = model(feature_fc, keywords, target)
loss = crit(output, target).item() # .data[0] --> .item() gjj-2
loss_sum += loss
loss_evals += 1
# forward the model to also get generated samples for each video
results, _ = model.predict(feature_fc, keywords, beam_size=self.beam_size)
sents = utils.decode_story(dataset.get_vocab(), results)
indexes = batch['index'].numpy()
for j, story in enumerate(sents):
if self.vt_mode == 1: # 1: album_id, 2: joined flickr_id
id = dataset.get_aid(indexes[j])
else:
id = dataset.get_fid(indexes[j])
if id not in predictions:
predictions[id] = [story]
print("Evaluate iter {}/{} {:04.2f}%. Time used: {}".format(iter,
len(loader),
iter * 100.0 / len(loader),
time.time() - iter_start))
metrics = self.measure(predictions) # compute all the language metrics
# write to json
json_prediction_file = '{}.json'.format(self.prediction_file)
with open(json_prediction_file, 'w') as f:
json.dump(predictions, f)
# Switch back to training mode
model.train()
dataset.train()
print("Evaluation finished. Evaluated {} samples. Time used: {}".format(count, time.time() - start))
return loss_sum / loss_evals, predictions, metrics
def test_story(self, model, topicModel, dataset, loader, hamming_diversity, hamming_f, hamming_n):
# filename
if hamming_diversity:
self.prediction_file = os.path.join(self.save_dir, 'prediction_test_hamming_n{}_f{}'.format(hamming_n, hamming_f))
self.prediction_score_file = os.path.join(self.save_dir, 'test_scores_hamming_n{}_f{}.json'.format(hamming_n, hamming_f))
else:
self.prediction_file = os.path.join(self.save_dir, 'prediction_test_nohamming')
self.prediction_score_file = os.path.join(self.save_dir, 'test_scores_nohamming.json')
print("Evaluating...")
start = time.time()
model.eval()
dataset.test()
predictions = {}
# keywords = {}
# # print one story's BS process
# batch = dataset.get_by_fid('5694501782_5693928729_5694502126_5693929211_5694502472')
# feature_fc = torch.tensor(batch['feature_fc']).unsqueeze(0).cuda()
# keywords = topicModel(feature_fc)
# results, _ = model.predict(feature_fc, keywords, beam_size=self.beam_size)
# sents = utils.decode_story(dataset.get_vocab(), results)
# print()
# print(sents)
# return
for iter, batch in enumerate(loader):
iter_start = time.time()
feature_fc = Variable(batch['feature_fc']).cuda()
keywords = topicModel(feature_fc)
results, _ = model.predict(feature_fc, keywords, beam_size=self.beam_size)
# sents = utils.decode_story_with_keywords(dataset.get_vocab(), results, results_kwords)
sents = utils.decode_story(dataset.get_vocab(), results)
indexes = batch['index'].numpy()
# for j, (story, kwords) in enumerate(sents):
for j, story in enumerate(sents):
if self.vt_mode == 1:
id = dataset.get_aid(indexes[j])
else:
id = dataset.get_fid(indexes[j])
if id not in predictions:
predictions[id] = [story]
print("Evaluate iter {}/{} {:04.2f}%. Time used: {}".format(iter,
len(loader),
iter * 100.0 / len(loader),
time.time() - iter_start))
metrics = self.measure(predictions) # compute all the language metrics
# write to json
# for id in predictions:
# predictions[id].insert(0, keywords[id])
json_prediction_file = '{}.json'.format(self.prediction_file)
with open(json_prediction_file, 'w') as f:
json.dump(predictions, f)
json.dump(metrics, open(self.prediction_score_file, 'w'))
# Switch back to training mode
print("Test finished. Time used: {}".format(time.time() - start))
return predictions, metrics