-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathevaluation_score.py
executable file
·72 lines (65 loc) · 3.09 KB
/
evaluation_score.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
from bert_score import score
from nlgmetricverse import NLGMetricverse,load_metric
import pickle , os, json
## calculate scores
def calculate_scores(predictions,gts):
metrics = [
load_metric("bleu",resulting_name="bleu_1",compute_kwargs={"max_order":1}),
load_metric("bleu",resulting_name="bleu_4",compute_kwargs={"max_order":4}),
load_metric("rouge"),
load_metric("cider"),
]
Evaluator = NLGMetricverse(metrics)
## need to convert predictions and gts to list to fit with bert_score
### make sure predictions and gts are in the same order
predictions = dict(sorted(predictions.items()))
gts = dict(sorted(gts.items()))
predictions = list(predictions.values())
gts = list(gts.values())
scores = Evaluator(predictions=predictions,references=gts)
score_results = {}
score_results["bleu_1"] = scores["bleu_1"]['score']
score_results["bleu_4"] = scores["bleu_4"]['score']
score_results["rouge"] = scores["rouge"]['rougeL']
score_results["cider"] = scores["cider"]['score']
P,R,F1 = score(predictions,gts,lang="en",verbose=False,idf=True,rescale_with_baseline=True)
score_results["bertscore"] = F1.mean().item()
return score_results
def gts():
# pkl_file = "/home/c1l1mo/projects/VideoAlignment/result/scl_skating_long_50/output_test_label_para6.pkl"
pkl_file = "/home/weihsin/datasets/FigureSkate/HumanML3D_l/local_human_test.pkl"
groud_truth = {}
with open(pkl_file, 'rb') as f:
data_list = pickle.load(f)
for item in data_list:
# item.video_name and item.labels
if item['video_name'] == 'standard':
continue
groud_truth[item['video_name']] = item['labels']
return groud_truth
def main():
groud_truth = gts()
All_file = {}
# folder_path = "/home/weihsin/projects/MotionExpert_tmp/MotionExpert/STAGCN_output_finetune_new2"
folder_path = "/home/weihsin/projects/MotionExpert_tmp/MotionExpert/STAGCN_output_local_new"
for file_name in os.listdir(folder_path):
if file_name.endswith('.json') and file_name.startswith('results_epoch'):
file_path = os.path.join(folder_path, file_name)
predictions = {}
with open(file_path, 'r') as f:
json_data = json.load(f)
for(k, v) in json_data.items():
if k == 'standard' :
print("standard")
continue
if 'Motion Instruction : ' in v:
v = v.replace('Motion Instruction : ', '')
predictions[k] = v
All_file[file_name] = calculate_scores(predictions,groud_truth)
# All_file calculate bertscore sort and then calculate bleu1, bleu4, rouge, cider
All_file = dict(sorted(All_file.items(), key=lambda item: item[1]['bertscore'], reverse=True))
path_name = 'pretrained_metrics_2.json'
with open(path_name, 'w') as f:
json.dump(All_file, f, indent=4)
if __name__ == "__main__" :
main()