diff --git a/cyclops/evaluate/metrics/experimental/__init__.py b/cyclops/evaluate/metrics/experimental/__init__.py index c4b00ba99..7fe2361f9 100644 --- a/cyclops/evaluate/metrics/experimental/__init__.py +++ b/cyclops/evaluate/metrics/experimental/__init__.py @@ -19,12 +19,21 @@ ) from cyclops.evaluate.metrics.experimental.metric_dict import MetricDict from cyclops.evaluate.metrics.experimental.precision_recall import ( + BinaryPPV, BinaryPrecision, BinaryRecall, + BinarySensitivity, + BinaryTPR, + MulticlassPPV, MulticlassPrecision, MulticlassRecall, + MulticlassSensitivity, + MulticlassTPR, + MultilabelPPV, MultilabelPrecision, MultilabelRecall, + MultilabelSensitivity, + MultilabelTPR, ) from cyclops.evaluate.metrics.experimental.specificity import ( BinarySpecificity, diff --git a/cyclops/evaluate/metrics/experimental/functional/__init__.py b/cyclops/evaluate/metrics/experimental/functional/__init__.py index 7a4962ea0..f3d1b4ac3 100644 --- a/cyclops/evaluate/metrics/experimental/functional/__init__.py +++ b/cyclops/evaluate/metrics/experimental/functional/__init__.py @@ -18,12 +18,18 @@ multilabel_fbeta_score, ) from cyclops.evaluate.metrics.experimental.functional.precision_recall import ( + binary_ppv, binary_precision, binary_recall, + binary_tpr, + multiclass_ppv, multiclass_precision, multiclass_recall, + multiclass_tpr, + multilabel_ppv, multilabel_precision, multilabel_recall, + multilabel_tpr, ) from cyclops.evaluate.metrics.experimental.functional.specificity import ( binary_specificity, diff --git a/cyclops/evaluate/metrics/experimental/functional/precision_recall.py b/cyclops/evaluate/metrics/experimental/functional/precision_recall.py index 09d6bb343..b4411db31 100644 --- a/cyclops/evaluate/metrics/experimental/functional/precision_recall.py +++ b/cyclops/evaluate/metrics/experimental/functional/precision_recall.py @@ -828,3 +828,15 @@ def multilabel_recall( fp=fp, fn=fn, ) + + +# Aliases +binary_ppv = binary_precision +multiclass_ppv = multiclass_precision +multilabel_ppv = multilabel_precision +binary_sensitivity = binary_recall +multiclass_sensitivity = multiclass_recall +multilabel_sensitivity = multilabel_recall +binary_tpr = binary_recall +multiclass_tpr = multiclass_recall +multilabel_tpr = multilabel_recall diff --git a/cyclops/evaluate/metrics/experimental/precision_recall.py b/cyclops/evaluate/metrics/experimental/precision_recall.py index 04f2f85a8..d704aff89 100644 --- a/cyclops/evaluate/metrics/experimental/precision_recall.py +++ b/cyclops/evaluate/metrics/experimental/precision_recall.py @@ -50,6 +50,40 @@ def _compute_metric(self) -> Array: return _binary_precision_recall_compute("precision", tp=tp, fp=fp, fn=fn) +class BinaryPPV(BinaryPrecision, registry_key="binary_ppv"): + """The proportion of positive predictions that are classified correctly. + + Parameters + ---------- + threshold : float, default=0.5 + Threshold for converting probabilities into binary values. + ignore_index : int, optional + Values in the target array to ignore when computing the metric. + **kwargs + Additional keyword arguments common to all metrics. + + Examples + -------- + >>> from cyclops.evaluate.metrics.experimental import BinaryPPV + >>> import numpy.array_api as anp + >>> target = anp.asarray([0, 1, 0, 1]) + >>> preds = anp.asarray([0, 1, 1, 1]) + >>> metric = BinaryPPV() + >>> metric(target, preds) + Array(0.6666667, dtype=float32) + >>> metric.reset() + >>> target = [[0, 1, 0, 1], [1, 0, 1, 0]] + >>> preds = [[0, 1, 1, 1], [1, 0, 1, 0]] + >>> for t, p in zip(target, preds): + ... metric.update(anp.asarray(t), anp.asarray(p)) + >>> metric.compute() + Array(0.8, dtype=float32) + + """ + + name: str = "Positive Predictive Value" + + class MulticlassPrecision( _AbstractMulticlassStatScores, registry_key="multiclass_precision", @@ -116,6 +150,59 @@ def _compute_metric(self) -> Array: ) +class MulticlassPPV(MulticlassPrecision, registry_key="multiclass_ppv"): + """The proportion of predicted classes that match the target classes. + + Parameters + ---------- + num_classes : int + The number of classes in the classification task. + top_k : int, default=1 + The number of highest probability or logit score predictions to consider + when computing the positive predictive value. By default, only the top + prediction is considered. This parameter is ignored if `preds` contains + integer values. + average : {'micro', 'macro', 'weighted', 'none'}, optional, default='micro' + Specifies the type of averaging to apply to the positive predictive values. + Should be one of the following: + - `'micro'`: Compute the positive predictive value globally by considering + all predictions and all targets. + - `'macro'`: Compute the positive predictive value for each class individually + and then take the unweighted mean of the positive predictive values. + - `'weighted'`: Compute the positive predictive value for each class + individually and then take the mean of the positive predictive values + weighted by the support (the number of true positives + the number of + false negatives) for each class. + - `'none'` or `None`: Compute the positive predictive value for each class + individually and return the scores as an array. + ignore_index : int or tuple of int, optional, default=None + Specifies a target class that is ignored when computing the positive predictive + value. Ignoring a target class means that the corresponding predictions do not + contribute to the positive predictive value. + + Examples + -------- + >>> from cyclops.evaluate.metrics.experimental import MulticlassPPV + >>> import numpy.array_api as anp + >>> target = anp.asarray([0, 1, 2, 2, 2]) + >>> preds = anp.asarray([0, 0, 2, 2, 1]) + >>> metric = MulticlassPPV(num_classes=3) + >>> metric(target, preds) + Array(0.6, dtype=float32) + >>> metric.reset() + >>> target = [[0, 1, 2], [2, 1, 0]] + >>> preds = [[[0.05, 0.95, 0], [0.1, 0.8, 0.1], [0.2, 0.6, 0.2]], + ... [[0.1, 0.8, 0.1], [0.05, 0.95, 0], [0.2, 0.6, 0.2]]] + >>> for t, p in zip(target, preds): + ... metric.update(anp.asarray(t), anp.asarray(p)) + >>> metric.compute() + Array(0.33333334, dtype=float32) + + """ + + name: str = "Positive Predictive Value" + + class MultilabelPrecision( _AbstractMultilabelStatScores, registry_key="multilabel_precision", @@ -184,6 +271,60 @@ def _compute_metric(self) -> Array: ) +class MultilabelPPV(MultilabelPrecision, registry_key="multilabel_ppv"): + """The proportion of positive predictions that are classified correctly. + + Parameters + ---------- + num_labels : int + The number of labels in the classification task. + threshold : float, optional, default=0.5 + The threshold used to convert probabilities to binary values. + top_k : int, optional, default=1 + The number of highest probability predictions to assign the value `1` + (all other predictions are assigned the value `0`). By default, only the + highest probability prediction is considered. This parameter is ignored + if `preds` does not contain floating point values. + average : {'micro', 'macro', 'weighted', 'none'}, optional, default='macro' + Specifies the type of averaging to apply to the positive predictive values. + Should be one of the following: + - `'micro'`: Compute the positive predictive value globally by considering all + predictions and all targets. + - `'macro'`: Compute the positive predictive value for each label individually + and then take the unweighted mean of the positive predictive values. + - `'weighted'`: Compute the positive predictive value for each label + individually and then take the mean of the positive predictive values + weighted by the support (the number of true positives + the number of + false negatives) for each label. + - `'none'` or `None`: Compute the positive predictive value for each label + individually and return the scores as an array. + ignore_index : int, optional, default=None + Specifies a value in the target array(s) that is ignored when computing + the positive predictive value. + + Examples + -------- + >>> from cyclops.evaluate.metrics.experimental import MultilabelPPV + >>> import numpy.array_api as anp + >>> target = anp.asarray([[0, 1, 1], [1, 0, 0]]) + >>> preds = anp.asarray([[0, 1, 0], [1, 0, 1]]) + >>> metric = MultilabelPPV(num_labels=3) + >>> metric(target, preds) + Array(0.6666667, dtype=float32) + >>> metric.reset() + >>> target = [[[0, 1, 1], [1, 0, 0]], [[1, 0, 0], [0, 1, 1]]] + >>> preds = [[[0.05, 0.95, 0], [0.1, 0.8, 0.1]], + ... [[0.1, 0.8, 0.1], [0.05, 0.95, 0]]] + >>> for t, p in zip(target, preds): + ... metric.update(anp.asarray(t), anp.asarray(p)) + >>> metric.compute() + Array(0.16666667, dtype=float32) + + """ + + name: str = "Positive Predictive Value" + + class BinaryRecall(_AbstractBinaryStatScores, registry_key="binary_recall"): """The proportion of positive predictions that are classified correctly. @@ -223,6 +364,74 @@ def _compute_metric(self) -> Array: return _binary_precision_recall_compute("recall", tp=tp, fp=fp, fn=fn) +class BinarySensitivity(BinaryRecall, registry_key="binary_sensitivity"): + """The proportion of positive predictions that are classified correctly. + + Parameters + ---------- + threshold : float, default=0.5 + Threshold for converting probabilities into binary values. + ignore_index : int, optional + Values in the target array to ignore when computing the metric. + **kwargs + Additional keyword arguments common to all metrics. + + Examples + -------- + >>> from cyclops.evaluate.metrics.experimental import BinarySensitivity + >>> import numpy.array_api as anp + >>> target = anp.asarray([0, 1, 0, 1]) + >>> preds = anp.asarray([0, 1, 1, 1]) + >>> metric = BinarySensitivity() + >>> metric(target, preds) + Array(1., dtype=float32) + >>> metric.reset() + >>> target = [[0, 1, 0, 1], [1, 0, 1, 0]] + >>> preds = [[0, 1, 1, 1], [1, 0, 1, 0]] + >>> for t, p in zip(target, preds): + ... metric.update(anp.asarray(t), anp.asarray(p)) + >>> metric.compute() + Array(1., dtype=float32) + + """ + + name: str = "Sensitivity Score" + + +class BinaryTPR(BinaryRecall, registry_key="binary_tpr"): + """The proportion of positive predictions that are classified correctly. + + Parameters + ---------- + threshold : float, default=0.5 + Threshold for converting probabilities into binary values. + ignore_index : int, optional + Values in the target array to ignore when computing the metric. + **kwargs + Additional keyword arguments common to all metrics. + + Examples + -------- + >>> from cyclops.evaluate.metrics.experimental import BinaryTPR + >>> import numpy.array_api as anp + >>> target = anp.asarray([0, 1, 0, 1]) + >>> preds = anp.asarray([0, 1, 1, 1]) + >>> metric = BinaryTPR() + >>> metric(target, preds) + Array(1., dtype=float32) + >>> metric.reset() + >>> target = [[0, 1, 0, 1], [1, 0, 1, 0]] + >>> preds = [[0, 1, 1, 1], [1, 0, 1, 0]] + >>> for t, p in zip(target, preds): + ... metric.update(anp.asarray(t), anp.asarray(p)) + >>> metric.compute() + Array(1., dtype=float32) + + """ + + name: str = "True Positive Rate" + + class MulticlassRecall(_AbstractMulticlassStatScores, registry_key="multiclass_recall"): """The proportion of predicted classes that match the target classes. @@ -286,6 +495,110 @@ def _compute_metric(self) -> Array: ) +class MulticlassSensitivity(MulticlassRecall, registry_key="multiclass_sensitivity"): + """The proportion of predicted classes that match the target classes. + + Parameters + ---------- + num_classes : int + The number of classes in the classification task. + top_k : int, default=1 + The number of highest probability or logit score predictions to consider + when computing the sensitivity score. By default, only the top prediction is + considered. This parameter is ignored if `preds` contains integer values. + average : {'micro', 'macro', 'weighted', 'none'}, optional, default='micro' + Specifies the type of averaging to apply to the sensitivity scores. Should + be one of the following: + - `'micro'`: Compute the sensitivity score globally by considering all + predictions and all targets. + - `'macro'`: Compute the sensitivity score for each class individually + and then take the unweighted mean of the sensitivity scores. + - `'weighted'`: Compute the sensitivity score for each class individually + and then take the mean of the sensitivity scores weighted by the support + (the number of true positives + the number of false negatives) for + each class. + - `'none'` or `None`: Compute the sensitivity score for each class individually + and return the scores as an array. + ignore_index : int or tuple of int, optional, default=None + Specifies a target class that is ignored when computing the sensitivity score. + Ignoring a target class means that the corresponding predictions do not + contribute to the sensitivity score. + + Examples + -------- + >>> from cyclops.evaluate.metrics.experimental import MulticlassSensitivity + >>> import numpy.array_api as anp + >>> target = anp.asarray([0, 1, 2, 2, 2]) + >>> preds = anp.asarray([0, 0, 2, 2, 1]) + >>> metric = MulticlassSensitivity(num_classes=3) + >>> metric(target, preds) + Array(0.6, dtype=float32) + >>> metric.reset() + >>> target = [[0, 1, 2], [2, 1, 0]] + >>> preds = [[[0.05, 0.95, 0], [0.1, 0.8, 0.1], [0.2, 0.6, 0.2]], + ... [[0.1, 0.8, 0.1], [0.05, 0.95, 0], [0.2, 0.6, 0.2]]] + >>> for t, p in zip(target, preds): + ... metric.update(anp.asarray(t), anp.asarray(p)) + >>> metric.compute() + Array(0.33333334, dtype=float32) + + """ + + name: str = "Sensitivity Score" + + +class MulticlassTPR(MulticlassRecall, registry_key="multiclass_tpr"): + """The proportion of predicted classes that match the target classes. + + Parameters + ---------- + num_classes : int + The number of classes in the classification task. + top_k : int, default=1 + The number of highest probability or logit score predictions to consider + when computing the true positive rate. By default, only the top prediction is + considered. This parameter is ignored if `preds` contains integer values. + average : {'micro', 'macro', 'weighted', 'none'}, optional, default='micro' + Specifies the type of averaging to apply to the true positive rates. Should + be one of the following: + - `'micro'`: Compute the true positive rate globally by considering all + predictions and all targets. + - `'macro'`: Compute the true positive rate for each class individually + and then take the unweighted mean of the true positive rates. + - `'weighted'`: Compute the true positive rate for each class individually + and then take the mean of the true positive rates weighted by the support + (the number of true positives + the number of false negatives) for + each class. + - `'none'` or `None`: Compute the true positive rate for each class individually + and return the scores as an array. + ignore_index : int or tuple of int, optional, default=None + Specifies a target class that is ignored when computing the true positive rate. + Ignoring a target class means that the corresponding predictions do not + contribute to the true positive rate. + + Examples + -------- + >>> from cyclops.evaluate.metrics.experimental import MulticlassTPR + >>> import numpy.array_api as anp + >>> target = anp.asarray([0, 1, 2, 2, 2]) + >>> preds = anp.asarray([0, 0, 2, 2, 1]) + >>> metric = MulticlassTPR(num_classes=3) + >>> metric(target, preds) + Array(0.6, dtype=float32) + >>> metric.reset() + >>> target = [[0, 1, 2], [2, 1, 0]] + >>> preds = [[[0.05, 0.95, 0], [0.1, 0.8, 0.1], [0.2, 0.6, 0.2]], + ... [[0.1, 0.8, 0.1], [0.05, 0.95, 0], [0.2, 0.6, 0.2]]] + >>> for t, p in zip(target, preds): + ... metric.update(anp.asarray(t), anp.asarray(p)) + >>> metric.compute() + Array(0.33333334, dtype=float32) + + """ + + name: str = "True Positive Rate" + + class MultilabelRecall(_AbstractMultilabelStatScores, registry_key="multilabel_recall"): """The proportion of positive predictions that are classified correctly. @@ -349,3 +662,111 @@ def _compute_metric(self) -> Array: fp=fp, fn=fn, ) + + +class MultilabelSensitivity(MultilabelRecall, registry_key="multilabel_sensitivity"): + """The proportion of positive predictions that are classified correctly. + + Parameters + ---------- + num_labels : int + The number of labels in the classification task. + threshold : float, optional, default=0.5 + The threshold used to convert probabilities to binary values. + top_k : int, optional, default=1 + The number of highest probability predictions to assign the value `1` + (all other predictions are assigned the value `0`). By default, only the + highest probability prediction is considered. This parameter is ignored + if `preds` does not contain floating point values. + average : {'micro', 'macro', 'weighted', 'none'}, optional, default='macro' + Specifies the type of averaging to apply to the sensitivity scores. Should + be one of the following: + - `'micro'`: Compute the sensitivity score globally by considering all + predictions and all targets. + - `'macro'`: Compute the sensitivity score for each label individually + and then take the unweighted mean of the sensitivity scores. + - `'weighted'`: Compute the sensitivity score for each label individually + and then take the mean of the sensitivity scores weighted by the support + (the number of true positives + the number of false negatives) for each + label. + - `'none'` or `None`: Compute the sensitivity score for each label individually + and return the scores as an array. + ignore_index : int, optional, default=None + Specifies a value in the target array(s) that is ignored when computing + the sensitivity score. + + Examples + -------- + >>> from cyclops.evaluate.metrics.experimental import MultilabelSensitivity + >>> import numpy.array_api as anp + >>> target = anp.asarray([[0, 1, 1], [1, 0, 0]]) + >>> preds = anp.asarray([[0, 1, 0], [1, 0, 1]]) + >>> metric = MultilabelSensitivity(num_labels=3) + >>> metric(target, preds) + Array(0.6666667, dtype=float32) + >>> metric.reset() + >>> target = [[[0, 1, 1], [1, 0, 0]], [[1, 0, 0], [0, 1, 1]]] + >>> preds = [[[0.05, 0.95, 0], [0.1, 0.8, 0.1]], + ... [[0.1, 0.8, 0.1], [0.05, 0.95, 0]]] + >>> for t, p in zip(target, preds): + ... metric.update(anp.asarray(t), anp.asarray(p)) + >>> metric.compute() + Array(0.33333334, dtype=float32) + + """ + + name: str = "Sensitivity Score" + + +class MultilabelTPR(MultilabelRecall, registry_key="multilabel_tpr"): + """The proportion of predicted classes that match the target classes. + + Parameters + ---------- + num_labels : int + The number of labels in the classification task. + threshold : float, optional, default=0.5 + The threshold used to convert probabilities to binary values. + top_k : int, optional, default=1 + The number of highest probability predictions to assign the value `1` + (all other predictions are assigned the value `0`). By default, only the + highest probability prediction is considered. This parameter is ignored + if `preds` does not contain floating point values. + average : {'micro', 'macro', 'weighted', 'none'}, optional, default='macro' + Specifies the type of averaging to apply to the true positive rates. Should + be one of the following: + - `'micro'`: Compute the true positive rate globally by considering all + predictions and all targets. + - `'macro'`: Compute the true positive rate for each label individually + and then take the unweighted mean of the true positive rates. + - `'weighted'`: Compute the true positive rate for each label individually + and then take the mean of the true positive rates weighted by the support + (the number of true positives + the number of false negatives) for each + label. + - `'none'` or `None`: Compute the true positive rate for each label individually + and return the scores as an array. + ignore_index : int, optional, default=None + Specifies a value in the target array(s) that is ignored when computing + the true positive rate. + + Examples + -------- + >>> from cyclops.evaluate.metrics.experimental import MultilabelTPR + >>> import numpy.array_api as anp + >>> target = anp.asarray([[0, 1, 1], [1, 0, 0]]) + >>> preds = anp.asarray([[0, 1, 0], [1, 0, 1]]) + >>> metric = MultilabelTPR(num_labels=3) + >>> metric(target, preds) + Array(0.6666667, dtype=float32) + >>> metric.reset() + >>> target = [[[0, 1, 1], [1, 0, 0]], [[1, 0, 0], [0, 1, 1]]] + >>> preds = [[[0.05, 0.95, 0], [0.1, 0.8, 0.1]], + ... [[0.1, 0.8, 0.1], [0.05, 0.95, 0]]] + >>> for t, p in zip(target, preds): + ... metric.update(anp.asarray(t), anp.asarray(p)) + >>> metric.compute() + Array(0.33333334, dtype=float32) + + """ + + name: str = "True Positive Rate"