-
Notifications
You must be signed in to change notification settings - Fork 5
/
Copy pathcompute_metrics.py
108 lines (86 loc) · 4.54 KB
/
compute_metrics.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
from sklearn import metrics
from typing import List, Tuple, Dict, Set, Union
import matplotlib.pyplot as plt
import numpy as np
import json
import math
""" Programm for computing metrics for the given result"""
dataset_dir = "runs//AAPD//dualTransV4Ori//2019-12-06...20.38.48//iteration_39000/"
dataset_name = 'AAPD'
'''Can add detailed information for this result'''
detail_info = ""
def fopen():
"""Open file and extract List[List[Str]].
"""
# file_total_label = dataset_name + "/label_train"
file_test = dataset_dir + "/" + dataset_name + ".gold" + detail_info + ".txt"
file_predict = dataset_dir + "/" + dataset_name + detail_info + ".txt"
# with open(file_total_label, "r", encoding="utf-8") as reader:
# total_labels = reader.readlines()
with open(file_test, "r", encoding="utf-8") as reader:
references = reader.readlines()
with open(file_predict, "r", encoding="utf-8") as reader:
hypotheses = reader.readlines()
refers = [reference.replace('1', 'A').replace('2', 'B').replace('3', 'C').replace('4', 'D').replace('5', 'E')
.replace('6', 'F').replace('7', 'G').replace('8', 'H').replace('9', 'I').replace('.', '')
.replace('"', '').replace('-', '').lower().split()
for reference in references]
hypos = [hypothese.split() for hypothese in hypotheses]
# hypos = [list(set(hypothese.split())) for hypothese in hypotheses]
return refers, hypos
def create_idx(labels):
"""Create label to index dict."""
idx = 0
l2i_dict = {}
for label in labels:
# label = label.split()
for la in label:
if la not in l2i_dict.keys():
l2i_dict[la] = idx
idx += 1
print("")
return l2i_dict
def tag2Idx(tags, l2i_dict):
""" Return the indexes of the given tags"""
return [l2i_dict[tag] for tag in tags]
def compute_hamming_loss(references, hypotheses, tgt_dictionary):
""" Given decoding results and reference sentences, compute corpus-level BLEU score.
@param references: (List[List[str]]), a list of gold-standard reference target sentences (or labels)
@param hypotheses: (List[predict]), a list of hypotheses, one for each reference
@param tgt_dictionary: (Dict[str, int]), a dictionary of tgt sentences (or labels)
@returns hamming_loss: hamming loss
"""
def sentence_ids_to_multi_ones_hot_vector(y, dictionary):
total_length = len(dictionary)
ones_hot = np.zeros(total_length, dtype=np.int)
hot_indices = tag2Idx(y, dictionary)
ones_hot[hot_indices] = 1
# ignore the following words '<pad>' '<s>' '</s>' '<unk>'
return ones_hot
def sentences_ids_to_multi_ones_hot_vectors(ys, dictionary):
return np.array([sentence_ids_to_multi_ones_hot_vector(y, dictionary) for y in ys],
dtype=np.int)
references_ones_hot_vectors = sentences_ids_to_multi_ones_hot_vectors(references, tgt_dictionary)
hypotheses_ones_hot_vectors = sentences_ids_to_multi_ones_hot_vectors(hypotheses,
tgt_dictionary)
hamming_loss = metrics.hamming_loss(references_ones_hot_vectors, hypotheses_ones_hot_vectors)
macro_f1 = metrics.f1_score(references_ones_hot_vectors, hypotheses_ones_hot_vectors, average='macro')
macro_precision = metrics.precision_score(references_ones_hot_vectors, hypotheses_ones_hot_vectors, average='macro')
macro_recall = metrics.recall_score(references_ones_hot_vectors, hypotheses_ones_hot_vectors, average='macro')
micro_f1 = metrics.f1_score(references_ones_hot_vectors, hypotheses_ones_hot_vectors, average='micro')
micro_precision = metrics.precision_score(references_ones_hot_vectors, hypotheses_ones_hot_vectors, average='micro')
micro_recall = metrics.recall_score(references_ones_hot_vectors, hypotheses_ones_hot_vectors, average='micro')
results = dict(hamming_loss=round(hamming_loss, 4),
macro_f1=round(macro_f1, 3),
macro_precision=round(macro_precision, 3),
macro_recall=round(macro_recall, 3),
micro_f1=round(micro_f1, 3),
micro_precision=round(micro_precision, 3),
micro_recall=round(micro_recall, 3), )
return results
if __name__ == "__main__":
references, hypotheses = fopen()
l2i_dict = create_idx(references)
# print("-----l2i_dict-----\n", l2i_dict)
results = compute_hamming_loss(references, hypotheses, l2i_dict)
print("-----Result-----\n", results)