From 3bcdf8f3db90d13742c45df76b182ec6339e6246 Mon Sep 17 00:00:00 2001 From: dimakarp1996 Date: Tue, 31 Jan 2023 22:58:31 +0300 Subject: [PATCH 1/5] Added the options of easily printing the confusion matrix and working with multilabel output to the F-measure. --- deeppavlov/metrics/fmeasure.py | 71 +++++++++++++++++++++++++++------- 1 file changed, 58 insertions(+), 13 deletions(-) diff --git a/deeppavlov/metrics/fmeasure.py b/deeppavlov/metrics/fmeasure.py index 442d3d5262..8a032f0d5f 100644 --- a/deeppavlov/metrics/fmeasure.py +++ b/deeppavlov/metrics/fmeasure.py @@ -203,15 +203,13 @@ def round_f1(y_true, y_predicted): return f1_score(y_true, predictions) -@register_metric('f1_macro') -def round_f1_macro(y_true, y_predicted): - """ - Calculates F1 macro measure. +def _f1_macro_weighted(y_true, y_predicted,print_matrix=False, average='macro'): + """ + Function to calculate f1 macro and f1 weighted. Used in the metrics. Args: y_true: list of true values y_predicted: list of predicted values - Returns: F1 score """ @@ -219,28 +217,75 @@ def round_f1_macro(y_true, y_predicted): predictions = [np.round(x) for x in y_predicted] except TypeError: predictions = y_predicted + if not len(y_true) and not len(y_predicted): + # y_true and y_predicted are empty lists. This situation can happen in multitask setting + return -1 + if print_matrix and all(isinstance(k, list) for k in y_true): + mlb = MultiLabelBinarizer(sparse_output=False) + mlb.fit(y_true + y_predicted) + y_true_binarized = mlb.transform(y_true) + y_predicted_binarized = mlb.transform(y_predicted) + f_score = f1_score(np.array(y_true_binarized), np.array(y_predicted_binarized), average=average) + if print_matrix: + print(multilabel_confusion_matrix(np.array(y_true_binarized), np.array(y_predicted_binarized)).tolist()) + else: + f_score = f1_score(np.array(y_true), np.array(y_predicted), average=average) + if print_matrix: + print(confusion_matrix(np.array(y_true), np.array(y_predicted)).tolist()) + return f_score - return f1_score(np.array(y_true), np.array(predictions), average="macro") +@register_metric('f1_macro_with_confusion_matrix') +def round_f1_macro_with_confusion_matrix(y_true, y_predicted): + """ + Calculates F1 macro measure and prints confusion matrix. + Args: + y_true: list of true values + y_predicted: list of predicted values + Returns: + F1 score + """ + return _f1_macro_weighted(y_true, y_predicted, print_matrix=True, average='macro') + + +@register_metric('f1_macro') +def round_f1_macro(y_true, y_predicted): + """ + Calculates F1 macro measure. + Args: + y_true: list of true values + y_predicted: list of predicted values + Returns: + F1 score + """ + return _f1_macro_weighted(y_true, y_predicted, print_matrix=False,average='macro') + + +@register_metric('f1_weighted_with_confusion_matrix') +def round_f1_weighted_with_confusion_matrix(y_true, y_predicted): + """ + Calculates F1 weighted measure and prints confusion matrix. + Args: + y_true: list of true values + y_predicted: list of predicted values + Returns: + F1 score + """ + return _f1_macro_weighted(y_true, y_predicted, print_matrix=True,average='weighted') + @register_metric('f1_weighted') def round_f1_weighted(y_true, y_predicted): """ Calculates F1 weighted measure. - Args: y_true: list of true values y_predicted: list of predicted values - Returns: F1 score """ - try: - predictions = [np.round(x) for x in y_predicted] - except TypeError: - predictions = y_predicted + return _f1_macro_weighted(y_true, y_predicted, print_matrix=False,average='weighted') - return f1_score(np.array(y_true), np.array(predictions), average="weighted") def chunk_finder(current_token, previous_token, tag): From 4c88f3ade68d127493b6c57f3e12d631fc527a9c Mon Sep 17 00:00:00 2001 From: dimakarp1996 Date: Mon, 13 Feb 2023 14:52:08 +0300 Subject: [PATCH 2/5] Update metrics_registry.json --- deeppavlov/core/common/metrics_registry.json | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/deeppavlov/core/common/metrics_registry.json b/deeppavlov/core/common/metrics_registry.json index c1f1a6c7a0..bb6119c4ca 100644 --- a/deeppavlov/core/common/metrics_registry.json +++ b/deeppavlov/core/common/metrics_registry.json @@ -8,7 +8,9 @@ "elmo_loss2ppl": "deeppavlov.metrics.elmo_metrics:elmo_loss2ppl", "f1": "deeppavlov.metrics.fmeasure:round_f1", "f1_macro": "deeppavlov.metrics.fmeasure:round_f1_macro", + "f1_macro_with_confusion_matrix": "deeppavlov.metrics.fmeasure:round_f1_macro_with_confusion_matrix", "f1_weighted": "deeppavlov.metrics.fmeasure:round_f1_weighted", + "f1_weighted_with_confusion_matrix": "deeppavlov.metrics.fmeasure:round_f1_weighted_with_confusion_matrix", "google_bleu": "deeppavlov.metrics.bleu:google_bleu", "kbqa_accuracy": "deeppavlov.metrics.accuracy:kbqa_accuracy", "log_loss": "deeppavlov.metrics.log_loss:sk_log_loss", @@ -40,4 +42,4 @@ "squad_v2_f1": "deeppavlov.metrics.squad_metrics:squad_v2_f1", "record_f1_score": "deeppavlov.metrics.record_metrics:record_f1_score", "record_em_score": "deeppavlov.metrics.record_metrics:record_em_score" -} \ No newline at end of file +} From 39da2bd80d0a61564f88c52b26a19398cd30c41c Mon Sep 17 00:00:00 2001 From: dimakarp1996 Date: Mon, 13 Feb 2023 15:09:18 +0300 Subject: [PATCH 3/5] Update fmeasure.py --- deeppavlov/metrics/fmeasure.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/deeppavlov/metrics/fmeasure.py b/deeppavlov/metrics/fmeasure.py index 8a032f0d5f..6e82065f48 100644 --- a/deeppavlov/metrics/fmeasure.py +++ b/deeppavlov/metrics/fmeasure.py @@ -18,7 +18,8 @@ from logging import getLogger import numpy as np -from sklearn.metrics import f1_score +from sklearn.metrics import f1_score, confusion_matrix +from sklearn.preprocessing import MultiLabelBinarizer from deeppavlov.core.common.metrics_registry import register_metric From 237bdd5b447727338264c654da17c77156ccdc05 Mon Sep 17 00:00:00 2001 From: dimakarp1996 Date: Tue, 21 Feb 2023 19:59:02 +0300 Subject: [PATCH 4/5] Update fmeasure.py --- deeppavlov/metrics/fmeasure.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/deeppavlov/metrics/fmeasure.py b/deeppavlov/metrics/fmeasure.py index 6e82065f48..c2b1233cb1 100644 --- a/deeppavlov/metrics/fmeasure.py +++ b/deeppavlov/metrics/fmeasure.py @@ -18,7 +18,7 @@ from logging import getLogger import numpy as np -from sklearn.metrics import f1_score, confusion_matrix +from sklearn.metrics import f1_score, confusion_matrix, multilabel_confusion_matrix from sklearn.preprocessing import MultiLabelBinarizer from deeppavlov.core.common.metrics_registry import register_metric From 8d12825169844e7eb47db6f1b27239335d0e415d Mon Sep 17 00:00:00 2001 From: dimakarp1996 Date: Tue, 21 Feb 2023 20:00:49 +0300 Subject: [PATCH 5/5] Update fmeasure.py --- deeppavlov/metrics/fmeasure.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/deeppavlov/metrics/fmeasure.py b/deeppavlov/metrics/fmeasure.py index c2b1233cb1..e43b9a204b 100644 --- a/deeppavlov/metrics/fmeasure.py +++ b/deeppavlov/metrics/fmeasure.py @@ -221,7 +221,7 @@ def _f1_macro_weighted(y_true, y_predicted,print_matrix=False, average='macro'): if not len(y_true) and not len(y_predicted): # y_true and y_predicted are empty lists. This situation can happen in multitask setting return -1 - if print_matrix and all(isinstance(k, list) for k in y_true): + if all(isinstance(k, list) for k in y_true): mlb = MultiLabelBinarizer(sparse_output=False) mlb.fit(y_true + y_predicted) y_true_binarized = mlb.transform(y_true)