-
Notifications
You must be signed in to change notification settings - Fork 0
/
myMetric.py
109 lines (89 loc) · 4.05 KB
/
myMetric.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
import numpy as np
import torch
import sacrebleu
import pyter
from bert_score import score
from sklearn.metrics import accuracy_score, recall_score, precision_score, f1_score
from sklearn.metrics import confusion_matrix
'''
args: ndarray[float]
'''
def classification_metrics(preds, labels):
try: # Multi Label
accuracy = accuracy_score(y_true=labels, y_pred=preds, average='weighted')
except: # Bin Label
accuracy = accuracy_score(y_true=labels, y_pred=preds)
try: # Multi Label
recall = recall_score(y_true=labels, y_pred=preds, average='weighted')
except: # Bin Label
recall = recall_score(y_true=labels, y_pred=preds)
try: # Multi Label
precision = precision_score(y_true=labels, y_pred=preds, average='weighted')
except: # Bin Label
precision = precision_score(y_true=labels, y_pred=preds)
try: # Multi Label
f1_macro = f1_score(y_true=labels, y_pred=preds, average='macro')
f1_micro = f1_score(y_true=labels, y_pred=preds, average='micro')
except: # Bin Label
f1_macro = f1_score(y_true=labels, y_pred=preds, average='macro')
f1_micro = f1_score(y_true=labels, y_pred=preds, average='micro')
return {"accuracy": accuracy, "precision": precision, "recall": recall, "f1_micro": f1_micro, "f1_macro": f1_macro}
'''
args: List[str]
'''
def bleu_score(hyp,ref):
corpus_bleu = sacrebleu.corpus_bleu(hyp, [ref])
return corpus_bleu.score
def ter_score(hyp,ref):
ter_scores=[]
for h,r in zip(hyp,ref):
s = pyter.ter(h.split(), r.split())
ter_scores.append(s)
corpus_ter = np.mean(np.array(ter_scores))
return corpus_ter
def bert_score(hyp,ref):
if torch.cuda.is_available():
Precision, Recall, F1 = score(hyp, ref, lang="others", verbose=True, device=torch.cuda.current_device())
else:
Precision, Recall, F1 = score(hyp, ref, lang="others", verbose=True)
Precision = np.mean(Precision.numpy().tolist())
Recall = np.mean(Recall.numpy().tolist())
F1 = np.mean(F1.numpy().tolist())
return Precision, Recall, F1
def selfbleu_score(hyp):
self_bleu = 0
hyp_ = np.array(hyp)
for idx in range(len(hyp)):
# [True, True, True, True, False, True, False, True, True, True] を作れば良いという方針
rev_idx = np.arange(len(hyp))
bool_idx = np.ones(len(hyp), dtype=bool)
bool_idx[idx] = False
rev_idx = rev_idx[bool_idx]
for rev in rev_idx:
tmp_score = sacrebleu.sentence_bleu(hyp_[idx], hyp_[rev])
self_bleu += tmp_score.score
self_bleu /= len(hyp)*(len(hyp)-1)
return self_bleu
if __name__ == "__main__":
multi_preds = [ [0.001, 0.002, 0.003],
[0.999, 0.888, 0.777],
[0.123, 0.321, 0.213]]
multi_preds = np.argmax(multi_preds, axis=1)
multi_labels = np.array([2,1,0])
bi_preds = [[0.001, 0.002],
[0.999, 0.888],
[0.123, 0.321]]
bi_preds = np.argmax(bi_preds, axis=1)
bi_labels = np.array([1,1,0])
print(f"Multi: {classification_metrics(multi_preds, multi_labels)}")
print(f"Binary: {classification_metrics(bi_preds, bi_labels)}")
hyp = ["Abcde",
"Applications of Natural Language Processing in the Real World",
"As the accuracy of learning has improved and a common language of vectors has been created, an environment has been created in which information from other media (such as images) and language can be handled in an integrated manner in the neural world."]
ref=["Abcde",
"Utilization of natural language processing in the real world",
"With the improvement of learning accuracy and the creation of a common language called vectors, we have created an environment in which information from other media (images, etc.) and language information can be handled in an integrated manner in the neural world."]
print(f"BLEU: {bleu_score(hyp,ref)}")
print(f"TER: {ter_score(hyp,ref)}")
print(f"BERTScore: {bert_score(hyp,ref)}")
print(f"selfBLEU: {selfbleu_score(hyp)}")