From 1ebf921a05fe4e6d70ea01733f4e884786a01335 Mon Sep 17 00:00:00 2001 From: Franklin <41602287+fcogidi@users.noreply.github.com> Date: Fri, 2 Feb 2024 11:04:43 -0500 Subject: [PATCH 01/14] Add Matthews Correlation Coefficient (MCC) metric --- .../evaluate/metrics/experimental/__init__.py | 5 + .../experimental/functional/__init__.py | 5 + .../functional/matthews_corr_coef.py | 350 ++++++++++++++++ .../experimental/matthews_corr_coef.py | 187 +++++++++ .../experimental/test_matthews_corr_coef.py | 378 ++++++++++++++++++ 5 files changed, 925 insertions(+) create mode 100644 cyclops/evaluate/metrics/experimental/functional/matthews_corr_coef.py create mode 100644 cyclops/evaluate/metrics/experimental/matthews_corr_coef.py create mode 100644 tests/cyclops/evaluate/metrics/experimental/test_matthews_corr_coef.py diff --git a/cyclops/evaluate/metrics/experimental/__init__.py b/cyclops/evaluate/metrics/experimental/__init__.py index 3a5b9974a..51ab7a21e 100644 --- a/cyclops/evaluate/metrics/experimental/__init__.py +++ b/cyclops/evaluate/metrics/experimental/__init__.py @@ -29,6 +29,11 @@ ) from cyclops.evaluate.metrics.experimental.mae import MeanAbsoluteError from cyclops.evaluate.metrics.experimental.mape import MeanAbsolutePercentageError +from cyclops.evaluate.metrics.experimental.matthews_corr_coef import ( + BinaryMCC, + MulticlassMCC, + MultilabelMCC, +) from cyclops.evaluate.metrics.experimental.metric_dict import MetricDict from cyclops.evaluate.metrics.experimental.mse import MeanSquaredError from cyclops.evaluate.metrics.experimental.negative_predictive_value import ( diff --git a/cyclops/evaluate/metrics/experimental/functional/__init__.py b/cyclops/evaluate/metrics/experimental/functional/__init__.py index 1a2e5902b..56b7e825e 100644 --- a/cyclops/evaluate/metrics/experimental/functional/__init__.py +++ b/cyclops/evaluate/metrics/experimental/functional/__init__.py @@ -31,6 +31,11 @@ from cyclops.evaluate.metrics.experimental.functional.mape import ( mean_absolute_percentage_error, ) +from cyclops.evaluate.metrics.experimental.functional.matthews_corr_coef import ( + binary_mcc, + multiclass_mcc, + multilabel_mcc, +) from cyclops.evaluate.metrics.experimental.functional.mse import mean_squared_error from cyclops.evaluate.metrics.experimental.functional.negative_predictive_value import ( binary_npv, diff --git a/cyclops/evaluate/metrics/experimental/functional/matthews_corr_coef.py b/cyclops/evaluate/metrics/experimental/functional/matthews_corr_coef.py new file mode 100644 index 000000000..5bb34375f --- /dev/null +++ b/cyclops/evaluate/metrics/experimental/functional/matthews_corr_coef.py @@ -0,0 +1,350 @@ +"""Functional API for the matthews correlation coefficient (MCC) metric.""" +from typing import Optional, Tuple, Union + +import array_api_compat as apc + +from cyclops.evaluate.metrics.experimental.functional.confusion_matrix import ( + _binary_confusion_matrix_compute, + _binary_confusion_matrix_format_arrays, + _binary_confusion_matrix_update_state, + _binary_confusion_matrix_validate_args, + _binary_confusion_matrix_validate_arrays, + _multiclass_confusion_matrix_format_arrays, + _multiclass_confusion_matrix_update_state, + _multiclass_confusion_matrix_validate_args, + _multiclass_confusion_matrix_validate_arrays, + _multilabel_confusion_matrix_compute, + _multilabel_confusion_matrix_format_arrays, + _multilabel_confusion_matrix_update_state, + _multilabel_confusion_matrix_validate_args, + _multilabel_confusion_matrix_validate_arrays, +) +from cyclops.evaluate.metrics.experimental.utils.types import Array + + +def _mcc_reduce(confmat: Array) -> Array: + """Reduce an un-normalized confusion matrix into the matthews corrcoef.""" + xp = apc.array_namespace(confmat) + # convert multilabel into binary + confmat = xp.sum(confmat, axis=0) if confmat.ndim == 3 else confmat + + if int(apc.size(confmat) or 0) == 4: # binary case + tn, fp, fn, tp = xp.reshape(xp.astype(confmat, xp.float32), (-1,)) + if tp + tn != 0 and fp + fn == 0: + return xp.asarray(1.0, dtype=xp.float32, device=apc.device(confmat)) # type: ignore[no-any-return] + + if tp + tn == 0 and fp + fn != 0: + return xp.asarray(-1.0, dtype=xp.float32, device=apc.device(confmat)) # type: ignore[no-any-return] + + tk = xp.sum(confmat, axis=-1, dtype=xp.float32) + pk = xp.sum(confmat, axis=-2, dtype=xp.float32) + c = xp.astype(xp.linalg.trace(confmat), xp.float32) + s = xp.sum(confmat, dtype=xp.float32) + + cov_ytyp = c * s - sum(tk * pk) + cov_ypyp = s**2 - sum(pk * pk) + cov_ytyt = s**2 - sum(tk * tk) + + numerator = cov_ytyp + denom = cov_ypyp * cov_ytyt + + if denom == 0 and int(apc.size(confmat) or 0) == 4: + if tp == 0 or tn == 0: + a = tp + tn + + if fp == 0 or fn == 0: + b = fp + fn + + eps = xp.asarray( + xp.finfo(xp.float32).eps, + dtype=xp.float32, + device=apc.device(confmat), + ) + numerator = xp.sqrt(eps) * (a - b) + denom = (tp + fp + eps) * (tp + fn + eps) * (tn + fp + eps) * (tn + fn + eps) + elif denom == 0: + return xp.asarray(0.0, dtype=xp.float32, device=apc.device(confmat)) # type: ignore[no-any-return] + return numerator / xp.sqrt(denom) # type: ignore[no-any-return] + + +def binary_mcc( + target: Array, + preds: Array, + threshold: float = 0.5, + ignore_index: Optional[int] = None, +) -> Array: + """Compute the matthews correlation coefficient for binary classification. + + Parameters + ---------- + target : Array + An array object that is compatible with the Python array API standard + and contains the ground truth labels. The expected shape of the array + is `(N, ...)`, where `N` is the number of samples. + preds : Array + An array object that is compatible with the Python array API standard and + contains the predictions of a binary classifier. the expected shape of the + array is `(N, ...)` where `N` is the number of samples. If `preds` contains + floating point values that are not in the range `[0, 1]`, a sigmoid function + will be applied to each value before thresholding. + threshold : float, default=0.5 + The threshold to use when converting probabilities to binary predictions. + ignore_index : int, optional, default=None + Specifies a target value that is ignored and does not contribute to the + metric. If `None`, ignore nothing. + + Returns + ------- + Array + The matthews correlation coefficient. + + Raises + ------ + ValueError + If `target` and `preds` have different shapes. + ValueError + If `target` and `preds` are not array-API-compatible. + ValueError + If `target` or `preds` are empty. + ValueError + If `target` or `preds` are not numeric arrays. + ValueError + If `threshold` is not a float in the [0,1] range. + ValueError + If `normalize` is not one of `'pred'`, `'true'`, `'all'`, `'none'`, or `None`. + ValueError + If `ignore_index` is not `None` or an integer. + + Examples + -------- + >>> import numpy.array_api as anp + >>> from cyclops.evaluate.metrics.experimental.functional import binary_mcc + >>> target = anp.asarray([0, 1, 0, 1, 0, 1]) + >>> preds = anp.asarray([0, 0, 1, 1, 0, 1]) + >>> binary_mcc(target, preds) + Array(0.33333334, dtype=float32) + >>> target = anp.asarray([0, 1, 0, 1, 0, 1]) + >>> preds = anp.asarray([0.11, 0.22, 0.84, 0.73, 0.33, 0.92]) + >>> binary_mcc(target, preds) + Array(0.33333334, dtype=float32) + + """ + _binary_confusion_matrix_validate_args( + threshold=threshold, + normalize=None, + ignore_index=ignore_index, + ) + xp = _binary_confusion_matrix_validate_arrays(target, preds, ignore_index) + + target, preds = _binary_confusion_matrix_format_arrays( + target, + preds, + threshold, + ignore_index, + xp=xp, + ) + tn, fp, fn, tp = _binary_confusion_matrix_update_state(target, preds, xp=xp) + + confmat = _binary_confusion_matrix_compute(tn, fp, fn, tp, normalize=None) + return _mcc_reduce(confmat) + + +def multiclass_mcc( + target: Array, + preds: Array, + num_classes: int, + ignore_index: Optional[Union[int, Tuple[int]]] = None, +) -> Array: + """Compute the matthews correlation coefficient for multiclass classification. + + Parameters + ---------- + target : Array + The target array of shape `(N, ...)`, where `N` is the number of samples. + preds : Array + The prediction array with shape `(N, ...)`, for integer inputs, or + `(N, C, ...)`, for float inputs, where `N` is the number of samples and + `C` is the number of classes. + num_classes : int + The number of classes. + ignore_index : int, Tuple[int], optional, default=None + Specifies a target value(s) that is ignored and does not contribute to the + metric. If `None`, ignore nothing. + + Returns + ------- + Array + The matthews correlation coefficient. + + Raises + ------ + ValueError + If `target` and `preds` are not array-API-compatible. + ValueError + If `target` or `preds` are empty. + ValueError + If `target` or `preds` are not numeric arrays. + ValueError + If `num_classes` is not an integer larger than 1. + ValueError + If `normalize` is not one of `'pred'`, `'true'`, `'all'`, `'none'`, or `None`. + ValueError + If `ignore_index` is not `None`, an integer or a tuple of integers. + ValueError + If `preds` contains floats but `target` does not have one dimension less than + `preds`. + ValueError + If the second dimension of `preds` is not equal to `num_classes`. + ValueError + If when `target` has one dimension less than `preds`, the shape of `preds` is + not `(N, C, ...)` while the shape of `target` is `(N, ...)`. + ValueError + If when `target` and `preds` have the same number of dimensions, they + do not have the same shape. + RuntimeError + If `target` contains values that are not in the range [0, `num_classes`). + + Examples + -------- + >>> import numpy.array_api as anp + >>> from cyclops.evaluate.metrics.experimental.functional import multiclass_mcc + >>> target = anp.asarray([2, 1, 0, 0]) + >>> preds = anp.asarray([2, 1, 0, 1]) + >>> multiclass_mcc(target, preds, num_classes=3) + Array(0.7, dtype=float32) + >>> target = anp.asarray([2, 1, 0, 0]) + >>> preds = anp.asarray([[0.16, 0.26, 0.58], + ... [0.22, 0.61, 0.17], + ... [0.71, 0.09, 0.20], + ... [0.05, 0.82, 0.13]]) + >>> multiclass_mcc(target, preds, num_classes=3) + Array(0.7, dtype=float32) + + """ + _multiclass_confusion_matrix_validate_args( + num_classes, + normalize=None, + ignore_index=ignore_index, + ) + xp = _multiclass_confusion_matrix_validate_arrays( + target, + preds, + num_classes, + ignore_index=ignore_index, + ) + + target, preds = _multiclass_confusion_matrix_format_arrays( + target, + preds, + ignore_index=ignore_index, + xp=xp, + ) + confmat = _multiclass_confusion_matrix_update_state( + target, + preds, + num_classes, + xp=xp, + ) + return _mcc_reduce(confmat) + + +def multilabel_mcc( + target: Array, + preds: Array, + num_labels: int, + threshold: float = 0.5, + ignore_index: Optional[int] = None, +) -> Array: + """Compute the matthews correlation coefficient for multilabel classification. + + Parameters + ---------- + target : Array + The target array of shape `(N, L, ...)`, where `N` is the number of samples + and `L` is the number of labels. + preds : Array + The prediction array of shape `(N, L, ...)`, where `N` is the number of + samples and `L` is the number of labels. If `preds` contains floats that + are not in the range [0,1], they will be converted to probabilities using + the sigmoid function. + num_labels : int + The number of labels. + threshold : float, default=0.5 + The threshold to use for binarizing the predictions. + ignore_index : int, optional, default=None + Specifies a target value that is ignored and does not contribute to the + metric. If `None`, ignore nothing. + + Returns + ------- + Array + The matthews correlation coefficient. + + Raises + ------ + ValueError + If `target` and `preds` are not array-API-compatible. + ValueError + If `target` or `preds` are empty. + ValueError + If `target` or `preds` are not numeric arrays. + ValueError + If `threshold` is not a float in the [0,1] range. + ValueError + If `normalize` is not one of `'pred'`, `'true'`, `'all'`, `'none'`, or `None`. + ValueError + If `ignore_index` is not `None` or a non-negative integer. + ValueError + If `num_labels` is not an integer larger than 1. + ValueError + If `target` and `preds` do not have the same shape. + ValueError + If the second dimension of `preds` is not equal to `num_labels`. + RuntimeError + If `target` contains values that are not in the range [0, 1]. + + Examples + -------- + >>> import numpy.array_api as anp + >>> from cyclops.evaluate.metrics.experimental.functional import multilabel_mcc + >>> target = anp.asarray([[0, 1, 0], [1, 0, 1]]) + >>> preds = anp.asarray([[0, 0, 1], [1, 0, 1]]) + >>> multilabel_mcc(target, preds, num_labels=3) + Array(0.33333334, dtype=float32) + >>> target = anp.asarray([[0, 1, 0], [1, 0, 1]]) + >>> preds = anp.asarray([[0.11, 0.22, 0.84], [0.73, 0.33, 0.92]]) + >>> multilabel_mcc(target, preds, num_labels=3) + Array(0.33333334, dtype=float32) + + """ + _multilabel_confusion_matrix_validate_args( + num_labels, + threshold=threshold, + normalize=None, + ignore_index=ignore_index, + ) + xp = _multilabel_confusion_matrix_validate_arrays( + target, + preds, + num_labels, + ignore_index=ignore_index, + ) + + target, preds = _multilabel_confusion_matrix_format_arrays( + target, + preds, + threshold=threshold, + ignore_index=ignore_index, + xp=xp, + ) + tn, fp, fn, tp = _multilabel_confusion_matrix_update_state(target, preds, xp=xp) + + confmat = _multilabel_confusion_matrix_compute( + tn, + fp, + fn, + tp, + num_labels, + normalize=None, + ) + return _mcc_reduce(confmat) diff --git a/cyclops/evaluate/metrics/experimental/matthews_corr_coef.py b/cyclops/evaluate/metrics/experimental/matthews_corr_coef.py new file mode 100644 index 000000000..804f3a857 --- /dev/null +++ b/cyclops/evaluate/metrics/experimental/matthews_corr_coef.py @@ -0,0 +1,187 @@ +"""Matthews Correlation Coefficient (MCC) metric.""" +from typing import Any, Optional, Tuple, Union + +from cyclops.evaluate.metrics.experimental.confusion_matrix import ( + BinaryConfusionMatrix, + MulticlassConfusionMatrix, + MultilabelConfusionMatrix, +) +from cyclops.evaluate.metrics.experimental.functional.confusion_matrix import ( + _binary_confusion_matrix_compute, + _multilabel_confusion_matrix_compute, +) +from cyclops.evaluate.metrics.experimental.functional.matthews_corr_coef import ( + _mcc_reduce, +) +from cyclops.evaluate.metrics.experimental.utils.types import Array + + +class BinaryMCC(BinaryConfusionMatrix, registry_key="binary_mcc"): + """A measure of the agreement between predicted and actual values. + + Parameters + ---------- + threshold : float, default=0.5 + The threshold value to use when binarizing the inputs. + ignore_index : int, optional, default=None + Specifies a target value that is ignored and does not contribute to the + metric. If `None`, all values are used. + **kwargs : Any + Additional keyword arguments common to all metrics. + + Examples + -------- + >>> import numpy.array_api as anp + >>> from cyclops.evaluate.metrics.experimental import BinaryMCC + >>> target = anp.asarray([0, 1, 0, 1, 0, 1]) + >>> preds = anp.asarray([0, 0, 1, 1, 0, 1]) + >>> metric = BinaryMCC() + >>> metric(target, preds) + Array(0.33333334, dtype=float32) + >>> target = anp.asarray([0, 1, 0, 1, 0, 1]) + >>> preds = anp.asarray([0.11, 0.22, 0.84, 0.73, 0.33, 0.92]) + >>> metric = BinaryMCC() + >>> metric(target, preds) + Array(0.33333334, dtype=float32) + + """ + + name: str = "Matthews Correlation Coefficient" + + def __init__( + self, + threshold: float = 0.5, + ignore_index: Optional[int] = None, + **kwargs: Any, + ) -> None: + """Initialize the class.""" + super().__init__(threshold, normalize=None, ignore_index=ignore_index, **kwargs) + + def _compute_metric(self) -> Array: + """Compute the confusion matrix.""" + tn, fp, fn, tp = self._final_state() + confmat = _binary_confusion_matrix_compute( + tp=tp, + fp=fp, + tn=tn, + fn=fn, + normalize=self.normalize, + ) + return _mcc_reduce(confmat) + + +class MulticlassMCC(MulticlassConfusionMatrix, registry_key="multiclass_mcc"): + """A measure of the agreement between predicted and actual values. + + Parameters + ---------- + num_classes : int + The number of classes. + ignore_index : int, optional, default=None + Specifies a target value that is ignored and does not contribute to the + metric. If `None`, all values are used. + **kwargs : Any + Additional keyword arguments common to all metrics. + + Examples + -------- + >>> import numpy.array_api as anp + >>> from cyclops.evaluate.metrics.experimental import MulticlassMCC + >>> target = anp.asarray([2, 1, 0, 0]) + >>> preds = anp.asarray([2, 1, 0, 1]) + >>> metric = MulticlassMCC(num_classes=3) + >>> metric(target, preds) + Array(0.7, dtype=float32) + >>> target = anp.asarray([2, 1, 0, 0]) + >>> preds = anp.asarray([[0.16, 0.26, 0.58], + ... [0.22, 0.61, 0.17], + ... [0.71, 0.09, 0.20], + ... [0.05, 0.82, 0.13]]) + >>> metric = MulticlassMCC(num_classes=3) + >>> metric(target, preds) + Array(0.7, dtype=float32) + """ + + name: str = "Matthews Correlation Coefficient" + + def __init__( + self, + num_classes: int, + ignore_index: Optional[Union[int, Tuple[int]]] = None, + **kwargs: Any, + ) -> None: + """Initialize the class.""" + super().__init__( + num_classes=num_classes, + normalize=None, + ignore_index=ignore_index, + **kwargs, + ) + + def _compute_metric(self) -> Array: + """Compute the confusion matrix.""" + return _mcc_reduce(self.confmat) # type: ignore + + +class MultilabelMCC(MultilabelConfusionMatrix, registry_key="multilabel_mcc"): + """A measure of the agreement between predicted and actual values. + + Parameters + ---------- + num_labels : int + The number of labels. + threshold : float, default=0.5 + The threshold value to use when binarizing the inputs. + ignore_index : int, optional, default=None + Specifies a target value that is ignored and does not contribute to the + metric. If `None`, all values are used. + **kwargs : Any + Additional keyword arguments common to all metrics. + + Examples + -------- + >>> import numpy.array_api as anp + >>> from cyclops.evaluate.metrics.experimental import MultilabelMCC + >>> target = anp.asarray([[0, 1, 0], [1, 0, 1]]) + >>> preds = anp.asarray([[0, 0, 1], [1, 0, 1]]) + >>> metric = MultilabelMCC(num_labels=3) + >>> metric(target, preds) + Array(0.33333334, dtype=float32) + >>> target = anp.asarray([[0, 1, 0], [1, 0, 1]]) + >>> preds = anp.asarray([[0.11, 0.22, 0.84], [0.73, 0.33, 0.92]]) + >>> metric = MultilabelMCC(num_labels=3) + >>> metric(target, preds) + Array(0.33333334, dtype=float32) + + """ + + name: str = "Matthews Correlation Coefficient" + + def __init__( + self, + num_labels: int, + threshold: float = 0.5, + ignore_index: Optional[int] = None, + **kwargs: Any, + ) -> None: + """Initialize the class.""" + super().__init__( + num_labels=num_labels, + threshold=threshold, + normalize=None, + ignore_index=ignore_index, + **kwargs, + ) + + def _compute_metric(self) -> Array: + """Compute the confusion matrix.""" + tn, fp, fn, tp = self._final_state() + confmat = _multilabel_confusion_matrix_compute( + tp=tp, + fp=fp, + tn=tn, + fn=fn, + num_labels=self.num_labels, + normalize=self.normalize, + ) + return _mcc_reduce(confmat) diff --git a/tests/cyclops/evaluate/metrics/experimental/test_matthews_corr_coef.py b/tests/cyclops/evaluate/metrics/experimental/test_matthews_corr_coef.py new file mode 100644 index 000000000..92c8ca390 --- /dev/null +++ b/tests/cyclops/evaluate/metrics/experimental/test_matthews_corr_coef.py @@ -0,0 +1,378 @@ +"""Test matthews correlation coefficient metrics.""" +from functools import partial + +import array_api_compat as apc +import array_api_compat.torch +import numpy.array_api as anp +import pytest +import torch.utils.dlpack +from torchmetrics.functional.classification import ( + binary_matthews_corrcoef, + multiclass_matthews_corrcoef, + multilabel_matthews_corrcoef, +) + +from cyclops.evaluate.metrics.experimental.functional.matthews_corr_coef import ( + binary_mcc, + multiclass_mcc, + multilabel_mcc, +) +from cyclops.evaluate.metrics.experimental.matthews_corr_coef import ( + BinaryMCC, + MulticlassMCC, + MultilabelMCC, +) +from cyclops.evaluate.metrics.experimental.utils.ops import to_int +from cyclops.evaluate.metrics.experimental.utils.validation import is_floating_point + +from ..conftest import NUM_CLASSES, NUM_LABELS, THRESHOLD +from .inputs import _binary_cases, _multiclass_cases, _multilabel_cases +from .testers import MetricTester, _inject_ignore_index + + +def _binary_mcc_reference( + target, + preds, + threshold, + ignore_index, +) -> torch.Tensor: + """Return the reference binary matthews correlation coefficient.""" + return binary_matthews_corrcoef( + torch.utils.dlpack.from_dlpack(preds), + torch.utils.dlpack.from_dlpack(target), + threshold=threshold, + ignore_index=ignore_index, + ) + + +class TestBinaryMCC(MetricTester): + """Test binary matthews correlation coefficient function and class.""" + + @pytest.mark.parametrize("inputs", _binary_cases(xp=anp)) + @pytest.mark.parametrize("ignore_index", [None, 0, -1]) + def test_binary_mcc_function_with_numpy_array_api_arrays( + self, + inputs, + ignore_index, + ) -> None: + """Test function for binary matthews corrcoef using numpy.array_api arrays.""" + target, preds = inputs + + if ignore_index is not None: + target = _inject_ignore_index(target, ignore_index) + + self.run_metric_function_implementation_test( + target, + preds, + metric_function=binary_mcc, + metric_args={ + "threshold": THRESHOLD, + "ignore_index": ignore_index, + }, + reference_metric=partial( + _binary_mcc_reference, + threshold=THRESHOLD, + ignore_index=ignore_index, + ), + ) + + @pytest.mark.parametrize("inputs", _binary_cases(xp=anp)) + @pytest.mark.parametrize("ignore_index", [None, 0, -1]) + def test_binary_mcc_class_with_numpy_array_api_arrays( + self, + inputs, + ignore_index, + ) -> None: + """Test class for binary matthews correlation coefficient.""" + target, preds = inputs + + if ( + preds.ndim == 1 + and is_floating_point(preds) + and not anp.all(to_int((preds >= 0)) * to_int((preds <= 1))) + ): + pytest.skip( + "When using 0-D logits, batch result will be different from local " + "result because the `sigmoid` operation may not be applied to each " + "batch (some values may be in [0, 1] and some may not).", + ) + + if ignore_index is not None: + target = _inject_ignore_index(target, ignore_index) + + self.run_metric_class_implementation_test( + target, + preds, + metric_class=BinaryMCC, + metric_args={ + "threshold": THRESHOLD, + "ignore_index": ignore_index, + }, + reference_metric=partial( + _binary_mcc_reference, + threshold=THRESHOLD, + ignore_index=ignore_index, + ), + ) + + @pytest.mark.integration_test() # machine for integration tests has GPU + @pytest.mark.parametrize("inputs", _binary_cases(xp=array_api_compat.torch)) + @pytest.mark.parametrize("ignore_index", [None, 0, -1]) + def test_binary_mcc_class_with_torch_tensors( + self, + inputs, + ignore_index, + ) -> None: + """Test binary matthews correlation coefficient class with torch tensors.""" + target, preds = inputs + + if ( + preds.ndim == 1 + and is_floating_point(preds) + and not torch.all(to_int((preds >= 0)) * to_int((preds <= 1))) + ): + pytest.skip( + "When using 0-D logits, batch result will be different from local " + "result because the `sigmoid` operation may not be applied to each " + "batch (some values may be in [0, 1] and some may not).", + ) + + if ignore_index is not None: + target = _inject_ignore_index(target, ignore_index) + + device = "cuda" if torch.cuda.is_available() else "cpu" + + self.run_metric_class_implementation_test( + target, + preds, + metric_class=BinaryMCC, + metric_args={ + "threshold": THRESHOLD, + "ignore_index": ignore_index, + }, + reference_metric=partial( + _binary_mcc_reference, + threshold=THRESHOLD, + ignore_index=ignore_index, + ), + device=device, + use_device_for_ref=True, + ) + + +def _multiclass_mcc_reference( + target, + preds, + num_classes=NUM_CLASSES, + ignore_index=None, +) -> torch.Tensor: + """Return the reference multiclass matthews correlation coefficient.""" + if preds.ndim == 1 and is_floating_point(preds): + xp = apc.array_namespace(preds) + preds = xp.argmax(preds, axis=0) + + return multiclass_matthews_corrcoef( + torch.utils.dlpack.from_dlpack(preds), + torch.utils.dlpack.from_dlpack(target), + num_classes, + ignore_index=ignore_index, + ) + + +class TestMulticlassMCC(MetricTester): + """Test multiclass matthews correlation coefficient function and class.""" + + @pytest.mark.parametrize("inputs", _multiclass_cases(xp=anp)) + @pytest.mark.parametrize("ignore_index", [None, 0, -1]) + def test_multiclass_mcc_with_numpy_array_api_arrays( + self, + inputs, + ignore_index, + ) -> None: + """Test function for multiclass matthews correlation coefficient.""" + target, preds = inputs + + if ignore_index is not None: + target = _inject_ignore_index(target, ignore_index) + + self.run_metric_function_implementation_test( + target, + preds, + metric_function=multiclass_mcc, + metric_args={ + "num_classes": NUM_CLASSES, + "ignore_index": ignore_index, + }, + reference_metric=partial( + _multiclass_mcc_reference, + ignore_index=ignore_index, + ), + ) + + @pytest.mark.parametrize("inputs", _multiclass_cases(xp=anp)) + @pytest.mark.parametrize("ignore_index", [None, 1, -1]) + def test_multiclass_mcc_class_with_numpy_array_api_arrays( + self, + inputs, + ignore_index, + ) -> None: + """Test class for multiclass matthews correlation coefficient.""" + target, preds = inputs + + if ignore_index is not None: + target = _inject_ignore_index(target, ignore_index) + + self.run_metric_class_implementation_test( + target, + preds, + metric_class=MulticlassMCC, + reference_metric=partial( + _multiclass_mcc_reference, + ignore_index=ignore_index, + ), + metric_args={ + "num_classes": NUM_CLASSES, + "ignore_index": ignore_index, + }, + ) + + @pytest.mark.integration_test() # machine for integration tests has GPU + @pytest.mark.parametrize("inputs", _multiclass_cases(xp=array_api_compat.torch)) + @pytest.mark.parametrize("ignore_index", [None, 1, -1]) + def test_multiclass_mcc_class_with_torch_tensors( + self, + inputs, + ignore_index, + ) -> None: + """Test class for multiclass matthews correlation coefficient.""" + target, preds = inputs + + if ignore_index is not None: + target = _inject_ignore_index(target, ignore_index) + + device = "cuda" if torch.cuda.is_available() else "cpu" + + self.run_metric_class_implementation_test( + target, + preds, + metric_class=MulticlassMCC, + reference_metric=partial( + _multiclass_mcc_reference, + ignore_index=ignore_index, + ), + metric_args={ + "num_classes": NUM_CLASSES, + "ignore_index": ignore_index, + }, + device=device, + use_device_for_ref=True, + ) + + +def _multilabel_mcc_reference( + preds, + target, + threshold, + num_labels=NUM_LABELS, + ignore_index=None, +) -> torch.Tensor: + """Return the reference multilabel matthews correlation coefficient.""" + return multilabel_matthews_corrcoef( + torch.utils.dlpack.from_dlpack(preds), + torch.utils.dlpack.from_dlpack(target), + num_labels, + threshold=threshold, + ignore_index=ignore_index, + ) + + +class TestMultilabelMCC(MetricTester): + """Test multilabel matthews correlation coefficient function and class.""" + + @pytest.mark.parametrize("inputs", _multilabel_cases(xp=anp)) + @pytest.mark.parametrize("ignore_index", [None, 0, -1]) + def test_multilabel_mcc_with_numpy_array_api_arrays( + self, + inputs, + ignore_index, + ) -> None: + """Test function for multilabel matthews correlation coefficient.""" + target, preds = inputs + + if ignore_index is not None: + target = _inject_ignore_index(target, ignore_index) + + self.run_metric_function_implementation_test( + target, + preds, + metric_function=multilabel_mcc, + reference_metric=partial( + _multilabel_mcc_reference, + threshold=THRESHOLD, + ignore_index=ignore_index, + ), + metric_args={ + "threshold": THRESHOLD, + "num_labels": NUM_LABELS, + "ignore_index": ignore_index, + }, + ) + + @pytest.mark.parametrize("inputs", _multilabel_cases(xp=anp)) + @pytest.mark.parametrize("ignore_index", [None, 0, -1]) + def test_multilabel_mcc_class_with_numpy_array_api_arrays( + self, + inputs, + ignore_index, + ) -> None: + """Test class for multilabel matthews correlation coefficient.""" + target, preds = inputs + + if ignore_index is not None: + target = _inject_ignore_index(target, ignore_index) + + self.run_metric_class_implementation_test( + target, + preds, + metric_class=MultilabelMCC, + reference_metric=partial( + _multilabel_mcc_reference, + threshold=THRESHOLD, + ignore_index=ignore_index, + ), + metric_args={ + "threshold": THRESHOLD, + "num_labels": NUM_LABELS, + "ignore_index": ignore_index, + }, + ) + + @pytest.mark.integration_test() # machine for integration tests has GPU + @pytest.mark.parametrize("inputs", _multilabel_cases(xp=anp)) + @pytest.mark.parametrize("ignore_index", [None, 0, -1]) + def test_multilabel_mcc_class_with_torch_tensors( + self, + inputs, + ignore_index, + ) -> None: + """Test class for multilabel matthews correlation coefficient.""" + target, preds = inputs + + if ignore_index is not None: + target = _inject_ignore_index(target, ignore_index) + + self.run_metric_class_implementation_test( + target, + preds, + metric_class=MultilabelMCC, + reference_metric=partial( + _multilabel_mcc_reference, + threshold=THRESHOLD, + ignore_index=ignore_index, + ), + metric_args={ + "threshold": THRESHOLD, + "num_labels": NUM_LABELS, + "ignore_index": ignore_index, + }, + ) From f6987e134c62dddab79dc998313ebad8d28a5474 Mon Sep 17 00:00:00 2001 From: Franklin <41602287+fcogidi@users.noreply.github.com> Date: Fri, 2 Feb 2024 12:13:35 -0500 Subject: [PATCH 02/14] Reorder arguments in _multilabel_mcc_reference function --- .../evaluate/metrics/experimental/test_matthews_corr_coef.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/cyclops/evaluate/metrics/experimental/test_matthews_corr_coef.py b/tests/cyclops/evaluate/metrics/experimental/test_matthews_corr_coef.py index 92c8ca390..831bababf 100644 --- a/tests/cyclops/evaluate/metrics/experimental/test_matthews_corr_coef.py +++ b/tests/cyclops/evaluate/metrics/experimental/test_matthews_corr_coef.py @@ -270,8 +270,8 @@ def test_multiclass_mcc_class_with_torch_tensors( def _multilabel_mcc_reference( - preds, target, + preds, threshold, num_labels=NUM_LABELS, ignore_index=None, From 5614997efc8f3c976d8e26f24389c759a3ef9da2 Mon Sep 17 00:00:00 2001 From: Franklin <41602287+fcogidi@users.noreply.github.com> Date: Wed, 7 Feb 2024 13:54:56 -0500 Subject: [PATCH 03/14] update linux version for code check --- .github/workflows/code_checks.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/code_checks.yml b/.github/workflows/code_checks.yml index e26b716ff..8414a5b1f 100644 --- a/.github/workflows/code_checks.yml +++ b/.github/workflows/code_checks.yml @@ -24,7 +24,7 @@ on: jobs: run-code-check: - runs-on: ubuntu-latest + runs-on: ubuntu-20.04 steps: - uses: actions/checkout@v3 - name: Install poetry From 81c7e7e7eab461ebb657dc97f76d6c2bbb7389b1 Mon Sep 17 00:00:00 2001 From: Franklin <41602287+fcogidi@users.noreply.github.com> Date: Wed, 7 Feb 2024 14:12:54 -0500 Subject: [PATCH 04/14] revert ubuntu version change & update action versions --- .github/workflows/code_checks.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/code_checks.yml b/.github/workflows/code_checks.yml index 8414a5b1f..242c2ae30 100644 --- a/.github/workflows/code_checks.yml +++ b/.github/workflows/code_checks.yml @@ -24,12 +24,12 @@ on: jobs: run-code-check: - runs-on: ubuntu-20.04 + runs-on: ubuntu-latest steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Install poetry run: python3 -m pip install --upgrade pip && python3 -m pip install poetry - - uses: actions/setup-python@v4.7.1 + - uses: actions/setup-python@v5.0.0 with: python-version: '3.10' cache: 'poetry' From be63d4a046becfb8e036a51bba70521863527758 Mon Sep 17 00:00:00 2001 From: Franklin <41602287+fcogidi@users.noreply.github.com> Date: Wed, 7 Feb 2024 14:33:15 -0500 Subject: [PATCH 05/14] use torch instead of dlpack to convert tensors to numpy --- .../evaluate/metrics/experimental/testers.py | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/tests/cyclops/evaluate/metrics/experimental/testers.py b/tests/cyclops/evaluate/metrics/experimental/testers.py index 4ae8775dc..c5bc95291 100644 --- a/tests/cyclops/evaluate/metrics/experimental/testers.py +++ b/tests/cyclops/evaluate/metrics/experimental/testers.py @@ -1,9 +1,11 @@ """Testers for metrics.""" + from functools import partial from typing import Any, Callable, Dict, Optional, Sequence, Type import array_api_compat as apc import numpy as np +from array_api_compat.common._helpers import _is_torch_array from cyclops.evaluate.metrics.experimental.metric import Metric from cyclops.evaluate.metrics.experimental.utils.ops import clone, flatten @@ -19,8 +21,15 @@ def _assert_allclose( """Recursively assert that two results are within a certain tolerance.""" if apc.is_array_api_obj(cyclops_result) and apc.is_array_api_obj(ref_result): # move to cpu and convert to numpy - cyclops_result = np.from_dlpack(apc.to_device(cyclops_result, "cpu")) - ref_result = np.from_dlpack(apc.to_device(ref_result, "cpu")) + if _is_torch_array(cyclops_result): + cyclops_result = cyclops_result.cpu().numpy() + else: + cyclops_result = np.from_dlpack(apc.to_device(cyclops_result, "cpu")) + + if _is_torch_array(ref_result): + ref_result = ref_result.cpu().numpy() + else: + ref_result = np.from_dlpack(apc.to_device(ref_result, "cpu")) np.testing.assert_allclose( cyclops_result, From b6651f46a6b6ff71944142c66b8d76f8413e375a Mon Sep 17 00:00:00 2001 From: Franklin <41602287+fcogidi@users.noreply.github.com> Date: Wed, 7 Feb 2024 15:08:00 -0500 Subject: [PATCH 06/14] update numpy conversion methods --- .../evaluate/metrics/experimental/testers.py | 24 +++++++++++-------- 1 file changed, 14 insertions(+), 10 deletions(-) diff --git a/tests/cyclops/evaluate/metrics/experimental/testers.py b/tests/cyclops/evaluate/metrics/experimental/testers.py index c5bc95291..4d58305d3 100644 --- a/tests/cyclops/evaluate/metrics/experimental/testers.py +++ b/tests/cyclops/evaluate/metrics/experimental/testers.py @@ -5,7 +5,6 @@ import array_api_compat as apc import numpy as np -from array_api_compat.common._helpers import _is_torch_array from cyclops.evaluate.metrics.experimental.metric import Metric from cyclops.evaluate.metrics.experimental.utils.ops import clone, flatten @@ -21,15 +20,20 @@ def _assert_allclose( """Recursively assert that two results are within a certain tolerance.""" if apc.is_array_api_obj(cyclops_result) and apc.is_array_api_obj(ref_result): # move to cpu and convert to numpy - if _is_torch_array(cyclops_result): - cyclops_result = cyclops_result.cpu().numpy() - else: - cyclops_result = np.from_dlpack(apc.to_device(cyclops_result, "cpu")) - - if _is_torch_array(ref_result): - ref_result = ref_result.cpu().numpy() - else: - ref_result = np.from_dlpack(apc.to_device(ref_result, "cpu")) + cyclops_result = np.from_dlpack( + ( + apc.to_device(cyclops_result, "cpu") + if apc.device(cyclops_result) != "cpu" + else cyclops_result + ), + ) + ref_result = np.from_dlpack( + ( + apc.to_device(ref_result, "cpu") + if apc.device(ref_result) != "cpu" + else ref_result + ), + ) np.testing.assert_allclose( cyclops_result, From 2834deb03815d425eb21c18499ecb830107d55fa Mon Sep 17 00:00:00 2001 From: Franklin <41602287+fcogidi@users.noreply.github.com> Date: Wed, 7 Feb 2024 16:22:18 -0500 Subject: [PATCH 07/14] set dtype for multilabel test inputs --- .../evaluate/metrics/experimental/inputs.py | 28 +++++++++++-------- .../test_precision_recall_curve.py | 27 ++++++++++++------ .../evaluate/metrics/experimental/test_roc.py | 27 ++++++++++++------ 3 files changed, 52 insertions(+), 30 deletions(-) diff --git a/tests/cyclops/evaluate/metrics/experimental/inputs.py b/tests/cyclops/evaluate/metrics/experimental/inputs.py index 92af7b9e6..d38d1d852 100644 --- a/tests/cyclops/evaluate/metrics/experimental/inputs.py +++ b/tests/cyclops/evaluate/metrics/experimental/inputs.py @@ -1,4 +1,5 @@ """Input data for tests of metrics in cyclops/evaluate/metrics/experimental.""" + import random from collections import namedtuple from types import ModuleType @@ -296,43 +297,46 @@ def _multilabel_cases(*, xp: Any): return ( pytest.param( InputSpec( - target=xp.asarray(_multilabel_labels), - preds=xp.asarray(_multilabel_preds), + target=xp.asarray(_multilabel_labels, dtype=xp.int32), + preds=xp.asarray(_multilabel_preds, dtype=xp.int32), ), id="input[2d-labels]", ), pytest.param( InputSpec( - target=xp.asarray(_multilabel_labels_multidim), - preds=xp.asarray(_multilabel_preds_multidim), + target=xp.asarray(_multilabel_labels_multidim, dtype=xp.int32), + preds=xp.asarray(_multilabel_preds_multidim, dtype=xp.int32), ), id="input[multidim-labels]", ), pytest.param( InputSpec( - target=xp.asarray(_multilabel_labels), - preds=xp.asarray(_multilabel_probs), + target=xp.asarray(_multilabel_labels, dtype=xp.int32), + preds=xp.asarray(_multilabel_probs, dtype=xp.float32), ), id="input[2d-probs]", ), pytest.param( InputSpec( - target=xp.asarray(_multilabel_labels), - preds=xp.asarray(_inv_sigmoid(_multilabel_probs)), + target=xp.asarray(_multilabel_labels, dtype=xp.int32), + preds=xp.asarray(_inv_sigmoid(_multilabel_probs), dtype=xp.float32), ), id="input[2d-logits]", ), pytest.param( InputSpec( - target=xp.asarray(_multilabel_labels_multidim), - preds=xp.asarray(_multilabel_probs_multidim), + target=xp.asarray(_multilabel_labels_multidim, dtype=xp.int32), + preds=xp.asarray(_multilabel_probs_multidim, dtype=xp.float32), ), id="input[multidim-probs]", ), pytest.param( InputSpec( - target=xp.asarray(_multilabel_labels_multidim), - preds=xp.asarray(_inv_sigmoid(_multilabel_probs_multidim)), + target=xp.asarray(_multilabel_labels_multidim, dtype=xp.int32), + preds=xp.asarray( + _inv_sigmoid(_multilabel_probs_multidim), + dtype=xp.float32, + ), ), id="input[multidim-logits]", ), diff --git a/tests/cyclops/evaluate/metrics/experimental/test_precision_recall_curve.py b/tests/cyclops/evaluate/metrics/experimental/test_precision_recall_curve.py index 4dc5989fd..081ebd1e9 100644 --- a/tests/cyclops/evaluate/metrics/experimental/test_precision_recall_curve.py +++ b/tests/cyclops/evaluate/metrics/experimental/test_precision_recall_curve.py @@ -1,4 +1,5 @@ """Test precision-recall curve metric.""" + from functools import partial from typing import List, Tuple, Union @@ -45,9 +46,11 @@ def _binary_precision_recall_curve_reference( return tm_binary_precision_recall_curve( torch.utils.dlpack.from_dlpack(preds), torch.utils.dlpack.from_dlpack(target), - thresholds=torch.utils.dlpack.from_dlpack(thresholds) - if apc.is_array_api_obj(thresholds) - else thresholds, + thresholds=( + torch.utils.dlpack.from_dlpack(thresholds) + if apc.is_array_api_obj(thresholds) + else thresholds + ), ignore_index=ignore_index, ) @@ -215,9 +218,11 @@ def _multiclass_precision_recall_curve_reference( torch.utils.dlpack.from_dlpack(preds), torch.utils.dlpack.from_dlpack(target), num_classes, - thresholds=torch.utils.dlpack.from_dlpack(thresholds) - if apc.is_array_api_obj(thresholds) - else thresholds, + thresholds=( + torch.utils.dlpack.from_dlpack(thresholds) + if apc.is_array_api_obj(thresholds) + else thresholds + ), ignore_index=ignore_index, ) @@ -371,9 +376,11 @@ def _multilabel_precision_recall_curve_reference( torch.utils.dlpack.from_dlpack(preds), torch.utils.dlpack.from_dlpack(target), num_labels, - thresholds=torch.utils.dlpack.from_dlpack(thresholds) - if apc.is_array_api_obj(thresholds) - else thresholds, + thresholds=( + torch.utils.dlpack.from_dlpack(thresholds) + if apc.is_array_api_obj(thresholds) + else thresholds + ), ignore_index=ignore_index, ) @@ -381,6 +388,8 @@ def _multilabel_precision_recall_curve_reference( class TestMultilabelPrecisionRecallCurve(MetricTester): """Test multilabel precision-recall curve function and class.""" + atol: float = 2e-7 + @pytest.mark.parametrize("inputs", _multilabel_cases(xp=anp)[2:]) @pytest.mark.parametrize("thresholds", _thresholds(xp=anp)) @pytest.mark.parametrize("ignore_index", [None, 0, -1]) diff --git a/tests/cyclops/evaluate/metrics/experimental/test_roc.py b/tests/cyclops/evaluate/metrics/experimental/test_roc.py index ddc4f9556..17a4fff5a 100644 --- a/tests/cyclops/evaluate/metrics/experimental/test_roc.py +++ b/tests/cyclops/evaluate/metrics/experimental/test_roc.py @@ -1,4 +1,5 @@ """Test roc curve metric.""" + from functools import partial from typing import List, Tuple, Union @@ -45,9 +46,11 @@ def _binary_roc_reference( return tm_binary_roc( torch.utils.dlpack.from_dlpack(preds), torch.utils.dlpack.from_dlpack(target), - thresholds=torch.utils.dlpack.from_dlpack(thresholds) - if apc.is_array_api_obj(thresholds) - else thresholds, + thresholds=( + torch.utils.dlpack.from_dlpack(thresholds) + if apc.is_array_api_obj(thresholds) + else thresholds + ), ignore_index=ignore_index, ) @@ -215,9 +218,11 @@ def _multiclass_roc_reference( torch.utils.dlpack.from_dlpack(preds), torch.utils.dlpack.from_dlpack(target), num_classes, - thresholds=torch.utils.dlpack.from_dlpack(thresholds) - if apc.is_array_api_obj(thresholds) - else thresholds, + thresholds=( + torch.utils.dlpack.from_dlpack(thresholds) + if apc.is_array_api_obj(thresholds) + else thresholds + ), ignore_index=ignore_index, ) @@ -371,9 +376,11 @@ def _multilabel_roc_reference( torch.utils.dlpack.from_dlpack(preds), torch.utils.dlpack.from_dlpack(target), num_labels, - thresholds=torch.utils.dlpack.from_dlpack(thresholds) - if apc.is_array_api_obj(thresholds) - else thresholds, + thresholds=( + torch.utils.dlpack.from_dlpack(thresholds) + if apc.is_array_api_obj(thresholds) + else thresholds + ), ignore_index=ignore_index, ) @@ -381,6 +388,8 @@ def _multilabel_roc_reference( class TestMultilabelROC(MetricTester): """Test multilabel roc curve function and class.""" + atol: float = 9e-8 + @pytest.mark.parametrize("inputs", _multilabel_cases(xp=anp)[2:]) @pytest.mark.parametrize("thresholds", _thresholds(xp=anp)) @pytest.mark.parametrize("ignore_index", [None, 0, -1]) From 8c380c8105a1d0c82d8c679c56091ed097b9c9a9 Mon Sep 17 00:00:00 2001 From: Franklin <41602287+fcogidi@users.noreply.github.com> Date: Mon, 12 Feb 2024 11:14:42 -0500 Subject: [PATCH 08/14] fix doctest error --- cyclops/utils/index.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/cyclops/utils/index.py b/cyclops/utils/index.py index 2e3941a02..3b281ec7a 100644 --- a/cyclops/utils/index.py +++ b/cyclops/utils/index.py @@ -3,6 +3,7 @@ from typing import Any, List, Optional, Sequence, Tuple, Union import numpy as np +import numpy.typing as npt def index_axis(ind: int, axis: int, shape: Tuple[int, ...]) -> Tuple[Any, ...]: @@ -33,9 +34,9 @@ def index_axis(ind: int, axis: int, shape: Tuple[int, ...]) -> Tuple[Any, ...]: def take_indices( - data: np.typing.NDArray[Any], - indexes: Sequence[Optional[Union[Sequence[int], np.typing.NDArray[Any]]]], -) -> np.typing.NDArray[Any]: + data: npt.NDArray[Any], + indexes: Sequence[Optional[Union[Sequence[int], npt.NDArray[Any]]]], +) -> npt.NDArray[Any]: """Index array by specifying the indices to take on each axis. Parameters @@ -69,10 +70,10 @@ def take_indices( def take_indices_over_axis( - data: np.typing.NDArray[Any], + data: npt.NDArray[Any], axis: int, - index: Union[np.typing.NDArray[Any], Sequence[int]], -) -> np.typing.NDArray[Any]: + index: Union[npt.NDArray[Any], Sequence[int]], +) -> npt.NDArray[Any]: """Take indices along an axis. Parameters From dfa89cf7bed9ee1c02b3b98cc288f6e586b281df Mon Sep 17 00:00:00 2001 From: Franklin <41602287+fcogidi@users.noreply.github.com> Date: Fri, 16 Feb 2024 10:35:44 -0500 Subject: [PATCH 09/14] update implementation of multilabel confusion matrix --- .../metrics/experimental/confusion_matrix.py | 31 +++++----- .../functional/confusion_matrix.py | 56 +++++++++---------- .../functional/matthews_corr_coef.py | 25 ++++----- .../experimental/matthews_corr_coef.py | 13 +---- 4 files changed, 59 insertions(+), 66 deletions(-) diff --git a/cyclops/evaluate/metrics/experimental/confusion_matrix.py b/cyclops/evaluate/metrics/experimental/confusion_matrix.py index 744ea1720..b1e19d27f 100644 --- a/cyclops/evaluate/metrics/experimental/confusion_matrix.py +++ b/cyclops/evaluate/metrics/experimental/confusion_matrix.py @@ -1,4 +1,5 @@ """Confusion matrix.""" + from types import ModuleType from typing import Any, Optional, Tuple, Union @@ -276,10 +277,7 @@ def _compute_metric(self) -> Array: ) -class MultilabelConfusionMatrix( - _AbstractConfusionMatrix, - registry_key="multilabel_confusion_matrix", -): +class MultilabelConfusionMatrix(Metric, registry_key="multilabel_confusion_matrix"): """Confusion matrix for multilabel classification tasks. Parameters @@ -329,6 +327,8 @@ class MultilabelConfusionMatrix( """ + name: str = "Confusion Matrix" + def __init__( self, num_labels: int, @@ -352,7 +352,11 @@ def __init__( self.normalize = normalize self.ignore_index = ignore_index - self._create_state(size=num_labels) + self.add_state_default_factory( + "confmat", + lambda xp: xp.zeros((num_labels, 2, 2), dtype=xp.int64, device=self.device), # type: ignore + dist_reduce_fn="sum", + ) def _update_state(self, target: Array, preds: Array) -> None: """Update the state variables.""" @@ -365,21 +369,22 @@ def _update_state(self, target: Array, preds: Array) -> None: target, preds = _multilabel_confusion_matrix_format_arrays( target, preds, + self.num_labels, threshold=self.threshold, ignore_index=self.ignore_index, xp=xp, ) - tn, fp, fn, tp = _multilabel_confusion_matrix_update_state(target, preds, xp=xp) - self._update_stat_scores(tn=tn, fp=fp, fn=fn, tp=tp) + confmat = _multilabel_confusion_matrix_update_state( + target, + preds, + self.num_labels, + xp=xp, + ) + self.confmat += confmat # type: ignore def _compute_metric(self) -> Array: """Compute the confusion matrix.""" - tn, fp, fn, tp = self._final_state() return _multilabel_confusion_matrix_compute( - tp=tp, - fp=fp, - tn=tn, - fn=fn, - num_labels=self.num_labels, + self.confmat, # type: ignore normalize=self.normalize, ) diff --git a/cyclops/evaluate/metrics/experimental/functional/confusion_matrix.py b/cyclops/evaluate/metrics/experimental/functional/confusion_matrix.py index cfc462269..fa2766f5a 100644 --- a/cyclops/evaluate/metrics/experimental/functional/confusion_matrix.py +++ b/cyclops/evaluate/metrics/experimental/functional/confusion_matrix.py @@ -1,4 +1,5 @@ """Functions for computing the confusion matrix for classification tasks.""" + # mypy: disable-error-code="no-any-return" from types import ModuleType from typing import Literal, Optional, Tuple, Union @@ -9,6 +10,7 @@ bincount, clone, flatten, + moveaxis, remove_ignore_index, safe_divide, sigmoid, @@ -599,6 +601,7 @@ def _multilabel_confusion_matrix_validate_arrays( def _multilabel_confusion_matrix_format_arrays( target: Array, preds: Array, + num_labels: int, threshold: float = 0.5, ignore_index: Optional[int] = None, *, @@ -613,13 +616,15 @@ def _multilabel_confusion_matrix_format_arrays( preds = sigmoid(preds) # convert logits to probabilities preds = to_int(preds > threshold) - preds = xp.reshape(preds, shape=(*preds.shape[:2], -1)) - target = xp.reshape(target, shape=(*target.shape[:2], -1)) + preds = xp.reshape(moveaxis(preds, 1, -1), shape=(-1, num_labels)) + target = xp.reshape(moveaxis(target, 1, -1), shape=(-1, num_labels)) if ignore_index is not None: - idx = target == ignore_index target = clone(target) - target[idx] = -1 + preds = clone(preds) + idx = target == ignore_index + target[idx] = -4 * num_labels + preds[idx] = -4 * num_labels return target, preds @@ -627,34 +632,25 @@ def _multilabel_confusion_matrix_format_arrays( def _multilabel_confusion_matrix_update_state( target: Array, preds: Array, + num_labels: int, *, xp: ModuleType, -) -> Tuple[Array, Array, Array, Array]: +) -> Array: """Compute the statistics for the given `target` and `preds` arrays.""" - sum_axis = (0, -1) - tp = squeeze_all(xp.sum(to_int((target == preds) & (target == 1)), axis=sum_axis)) - fn = squeeze_all(xp.sum(to_int((target != preds) & (target == 1)), axis=sum_axis)) - fp = squeeze_all(xp.sum(to_int((target != preds) & (target == 0)), axis=sum_axis)) - tn = squeeze_all(xp.sum(to_int((target == preds) & (target == 0)), axis=sum_axis)) - - return tn, fp, fn, tp + unique_mapping = (2 * target + preds) + 4 * flatten( + xp.arange(num_labels, device=apc.device(preds)), + ) + unique_mapping = unique_mapping[unique_mapping >= 0] + bins = bincount(unique_mapping, minlength=4 * num_labels) + return xp.reshape(bins, shape=(num_labels, 2, 2)) def _multilabel_confusion_matrix_compute( - tn: Array, - fp: Array, - fn: Array, - tp: Array, - num_labels: int, + confmat: Array, normalize: Optional[str] = None, ) -> Array: """Compute the confusion matrix from the given stat scores.""" - xp = apc.array_namespace(tn, fp, fn, tp) - - confmat = squeeze_all( - xp.reshape(xp.stack([tn, fp, fn, tp], axis=-1), shape=(-1, num_labels, 2, 2)), - ) - + xp = apc.array_namespace(confmat) return _normalize_confusion_matrix(confmat, normalize=normalize, xp=xp) @@ -768,17 +764,19 @@ class over the number of true samples for each class. target, preds = _multilabel_confusion_matrix_format_arrays( target, preds, + num_labels, threshold=threshold, ignore_index=ignore_index, xp=xp, ) - tn, fp, fn, tp = _multilabel_confusion_matrix_update_state(target, preds, xp=xp) + confmat = _multilabel_confusion_matrix_update_state( + target, + preds, + num_labels, + xp=xp, + ) return _multilabel_confusion_matrix_compute( - tn, - fp, - fn, - tp, - num_labels, + confmat, normalize=normalize, ) diff --git a/cyclops/evaluate/metrics/experimental/functional/matthews_corr_coef.py b/cyclops/evaluate/metrics/experimental/functional/matthews_corr_coef.py index 5bb34375f..4bb9cb927 100644 --- a/cyclops/evaluate/metrics/experimental/functional/matthews_corr_coef.py +++ b/cyclops/evaluate/metrics/experimental/functional/matthews_corr_coef.py @@ -1,4 +1,5 @@ """Functional API for the matthews correlation coefficient (MCC) metric.""" + from typing import Optional, Tuple, Union import array_api_compat as apc @@ -13,7 +14,6 @@ _multiclass_confusion_matrix_update_state, _multiclass_confusion_matrix_validate_args, _multiclass_confusion_matrix_validate_arrays, - _multilabel_confusion_matrix_compute, _multilabel_confusion_matrix_format_arrays, _multilabel_confusion_matrix_update_state, _multilabel_confusion_matrix_validate_args, @@ -25,6 +25,7 @@ def _mcc_reduce(confmat: Array) -> Array: """Reduce an un-normalized confusion matrix into the matthews corrcoef.""" xp = apc.array_namespace(confmat) + # convert multilabel into binary confmat = xp.sum(confmat, axis=0) if confmat.ndim == 3 else confmat @@ -36,10 +37,10 @@ def _mcc_reduce(confmat: Array) -> Array: if tp + tn == 0 and fp + fn != 0: return xp.asarray(-1.0, dtype=xp.float32, device=apc.device(confmat)) # type: ignore[no-any-return] - tk = xp.sum(confmat, axis=-1, dtype=xp.float32) - pk = xp.sum(confmat, axis=-2, dtype=xp.float32) - c = xp.astype(xp.linalg.trace(confmat), xp.float32) - s = xp.sum(confmat, dtype=xp.float32) + tk = xp.sum(confmat, axis=-1, dtype=xp.float32) # tn + fp and tp + fn + pk = xp.sum(confmat, axis=-2, dtype=xp.float32) # tn + fn and tp + fp + c = xp.astype(xp.linalg.trace(confmat), xp.float32) # tn and tp + s = xp.sum(confmat, dtype=xp.float32) # tn + tp + fn + fp cov_ytyp = c * s - sum(tk * pk) cov_ypyp = s**2 - sum(pk * pk) @@ -333,18 +334,16 @@ def multilabel_mcc( target, preds = _multilabel_confusion_matrix_format_arrays( target, preds, + num_labels, threshold=threshold, ignore_index=ignore_index, xp=xp, ) - tn, fp, fn, tp = _multilabel_confusion_matrix_update_state(target, preds, xp=xp) - - confmat = _multilabel_confusion_matrix_compute( - tn, - fp, - fn, - tp, + confmat = _multilabel_confusion_matrix_update_state( + target, + preds, num_labels, - normalize=None, + xp=xp, ) + return _mcc_reduce(confmat) diff --git a/cyclops/evaluate/metrics/experimental/matthews_corr_coef.py b/cyclops/evaluate/metrics/experimental/matthews_corr_coef.py index 804f3a857..bbfc4856e 100644 --- a/cyclops/evaluate/metrics/experimental/matthews_corr_coef.py +++ b/cyclops/evaluate/metrics/experimental/matthews_corr_coef.py @@ -1,4 +1,5 @@ """Matthews Correlation Coefficient (MCC) metric.""" + from typing import Any, Optional, Tuple, Union from cyclops.evaluate.metrics.experimental.confusion_matrix import ( @@ -8,7 +9,6 @@ ) from cyclops.evaluate.metrics.experimental.functional.confusion_matrix import ( _binary_confusion_matrix_compute, - _multilabel_confusion_matrix_compute, ) from cyclops.evaluate.metrics.experimental.functional.matthews_corr_coef import ( _mcc_reduce, @@ -175,13 +175,4 @@ def __init__( def _compute_metric(self) -> Array: """Compute the confusion matrix.""" - tn, fp, fn, tp = self._final_state() - confmat = _multilabel_confusion_matrix_compute( - tp=tp, - fp=fp, - tn=tn, - fn=fn, - num_labels=self.num_labels, - normalize=self.normalize, - ) - return _mcc_reduce(confmat) + return _mcc_reduce(self.confmat) # type: ignore From 2612934d59705d3bd27f23652c6248db405173f7 Mon Sep 17 00:00:00 2001 From: Franklin <41602287+fcogidi@users.noreply.github.com> Date: Fri, 16 Feb 2024 12:10:13 -0500 Subject: [PATCH 10/14] revert implementation update & add print statements for debugging --- .../metrics/experimental/confusion_matrix.py | 30 +++++----- .../functional/confusion_matrix.py | 55 ++++++++++--------- .../functional/matthews_corr_coef.py | 30 +++++++--- .../experimental/matthews_corr_coef.py | 12 +++- 4 files changed, 76 insertions(+), 51 deletions(-) diff --git a/cyclops/evaluate/metrics/experimental/confusion_matrix.py b/cyclops/evaluate/metrics/experimental/confusion_matrix.py index b1e19d27f..d2623e5e4 100644 --- a/cyclops/evaluate/metrics/experimental/confusion_matrix.py +++ b/cyclops/evaluate/metrics/experimental/confusion_matrix.py @@ -277,7 +277,10 @@ def _compute_metric(self) -> Array: ) -class MultilabelConfusionMatrix(Metric, registry_key="multilabel_confusion_matrix"): +class MultilabelConfusionMatrix( + _AbstractConfusionMatrix, + registry_key="multilabel_confusion_matrix", +): """Confusion matrix for multilabel classification tasks. Parameters @@ -327,8 +330,6 @@ class MultilabelConfusionMatrix(Metric, registry_key="multilabel_confusion_matri """ - name: str = "Confusion Matrix" - def __init__( self, num_labels: int, @@ -352,11 +353,7 @@ def __init__( self.normalize = normalize self.ignore_index = ignore_index - self.add_state_default_factory( - "confmat", - lambda xp: xp.zeros((num_labels, 2, 2), dtype=xp.int64, device=self.device), # type: ignore - dist_reduce_fn="sum", - ) + self._create_state(size=num_labels) def _update_state(self, target: Array, preds: Array) -> None: """Update the state variables.""" @@ -369,22 +366,21 @@ def _update_state(self, target: Array, preds: Array) -> None: target, preds = _multilabel_confusion_matrix_format_arrays( target, preds, - self.num_labels, threshold=self.threshold, ignore_index=self.ignore_index, xp=xp, ) - confmat = _multilabel_confusion_matrix_update_state( - target, - preds, - self.num_labels, - xp=xp, - ) - self.confmat += confmat # type: ignore + tn, fp, fn, tp = _multilabel_confusion_matrix_update_state(target, preds, xp=xp) + self._update_stat_scores(tn=tn, fp=fp, fn=fn, tp=tp) def _compute_metric(self) -> Array: """Compute the confusion matrix.""" + tn, fp, fn, tp = self._final_state() return _multilabel_confusion_matrix_compute( - self.confmat, # type: ignore + tp=tp, + fp=fp, + tn=tn, + fn=fn, + num_labels=self.num_labels, normalize=self.normalize, ) diff --git a/cyclops/evaluate/metrics/experimental/functional/confusion_matrix.py b/cyclops/evaluate/metrics/experimental/functional/confusion_matrix.py index fa2766f5a..14d26d6a0 100644 --- a/cyclops/evaluate/metrics/experimental/functional/confusion_matrix.py +++ b/cyclops/evaluate/metrics/experimental/functional/confusion_matrix.py @@ -10,7 +10,6 @@ bincount, clone, flatten, - moveaxis, remove_ignore_index, safe_divide, sigmoid, @@ -601,7 +600,6 @@ def _multilabel_confusion_matrix_validate_arrays( def _multilabel_confusion_matrix_format_arrays( target: Array, preds: Array, - num_labels: int, threshold: float = 0.5, ignore_index: Optional[int] = None, *, @@ -616,15 +614,13 @@ def _multilabel_confusion_matrix_format_arrays( preds = sigmoid(preds) # convert logits to probabilities preds = to_int(preds > threshold) - preds = xp.reshape(moveaxis(preds, 1, -1), shape=(-1, num_labels)) - target = xp.reshape(moveaxis(target, 1, -1), shape=(-1, num_labels)) + preds = xp.reshape(preds, shape=(*preds.shape[:2], -1)) + target = xp.reshape(target, shape=(*target.shape[:2], -1)) if ignore_index is not None: - target = clone(target) - preds = clone(preds) idx = target == ignore_index - target[idx] = -4 * num_labels - preds[idx] = -4 * num_labels + target = clone(target) + target[idx] = -1 return target, preds @@ -632,25 +628,34 @@ def _multilabel_confusion_matrix_format_arrays( def _multilabel_confusion_matrix_update_state( target: Array, preds: Array, - num_labels: int, *, xp: ModuleType, -) -> Array: +) -> Tuple[Array, Array, Array, Array]: """Compute the statistics for the given `target` and `preds` arrays.""" - unique_mapping = (2 * target + preds) + 4 * flatten( - xp.arange(num_labels, device=apc.device(preds)), - ) - unique_mapping = unique_mapping[unique_mapping >= 0] - bins = bincount(unique_mapping, minlength=4 * num_labels) - return xp.reshape(bins, shape=(num_labels, 2, 2)) + sum_axis = (0, -1) + tp = squeeze_all(xp.sum(to_int((target == preds) & (target == 1)), axis=sum_axis)) + fn = squeeze_all(xp.sum(to_int((target != preds) & (target == 1)), axis=sum_axis)) + fp = squeeze_all(xp.sum(to_int((target != preds) & (target == 0)), axis=sum_axis)) + tn = squeeze_all(xp.sum(to_int((target == preds) & (target == 0)), axis=sum_axis)) + + return tn, fp, fn, tp def _multilabel_confusion_matrix_compute( - confmat: Array, + tn: Array, + fp: Array, + fn: Array, + tp: Array, + num_labels: int, normalize: Optional[str] = None, ) -> Array: """Compute the confusion matrix from the given stat scores.""" - xp = apc.array_namespace(confmat) + xp = apc.array_namespace(tn, fp, fn, tp) + + confmat = squeeze_all( + xp.reshape(xp.stack([tn, fp, fn, tp], axis=-1), shape=(-1, num_labels, 2, 2)), + ) + return _normalize_confusion_matrix(confmat, normalize=normalize, xp=xp) @@ -764,19 +769,17 @@ class over the number of true samples for each class. target, preds = _multilabel_confusion_matrix_format_arrays( target, preds, - num_labels, threshold=threshold, ignore_index=ignore_index, xp=xp, ) - confmat = _multilabel_confusion_matrix_update_state( - target, - preds, - num_labels, - xp=xp, - ) + tn, fp, fn, tp = _multilabel_confusion_matrix_update_state(target, preds, xp=xp) return _multilabel_confusion_matrix_compute( - confmat, + tn, + fp, + fn, + tp, + num_labels, normalize=normalize, ) diff --git a/cyclops/evaluate/metrics/experimental/functional/matthews_corr_coef.py b/cyclops/evaluate/metrics/experimental/functional/matthews_corr_coef.py index 4bb9cb927..80e358d02 100644 --- a/cyclops/evaluate/metrics/experimental/functional/matthews_corr_coef.py +++ b/cyclops/evaluate/metrics/experimental/functional/matthews_corr_coef.py @@ -14,6 +14,7 @@ _multiclass_confusion_matrix_update_state, _multiclass_confusion_matrix_validate_args, _multiclass_confusion_matrix_validate_arrays, + _multilabel_confusion_matrix_compute, _multilabel_confusion_matrix_format_arrays, _multilabel_confusion_matrix_update_state, _multilabel_confusion_matrix_validate_args, @@ -25,9 +26,10 @@ def _mcc_reduce(confmat: Array) -> Array: """Reduce an un-normalized confusion matrix into the matthews corrcoef.""" xp = apc.array_namespace(confmat) - # convert multilabel into binary confmat = xp.sum(confmat, axis=0) if confmat.ndim == 3 else confmat + print("confmat: ", confmat) + print("numel: ", apc.size(confmat)) if int(apc.size(confmat) or 0) == 4: # binary case tn, fp, fn, tp = xp.reshape(xp.astype(confmat, xp.float32), (-1,)) @@ -38,16 +40,25 @@ def _mcc_reduce(confmat: Array) -> Array: return xp.asarray(-1.0, dtype=xp.float32, device=apc.device(confmat)) # type: ignore[no-any-return] tk = xp.sum(confmat, axis=-1, dtype=xp.float32) # tn + fp and tp + fn + print("tk: ", tk) pk = xp.sum(confmat, axis=-2, dtype=xp.float32) # tn + fn and tp + fp + print("pk: ", pk) c = xp.astype(xp.linalg.trace(confmat), xp.float32) # tn and tp + print("c: ", c) s = xp.sum(confmat, dtype=xp.float32) # tn + tp + fn + fp + print("s: ", s) cov_ytyp = c * s - sum(tk * pk) + print("cov_ytyp: ", cov_ytyp) cov_ypyp = s**2 - sum(pk * pk) + print("cov_ypyp: ", cov_ypyp) cov_ytyt = s**2 - sum(tk * tk) + print("cov_ytyt: ", cov_ytyt) numerator = cov_ytyp + print("numerator: ", numerator) denom = cov_ypyp * cov_ytyt + print("denom: ", denom) if denom == 0 and int(apc.size(confmat) or 0) == 4: if tp == 0 or tn == 0: @@ -61,8 +72,11 @@ def _mcc_reduce(confmat: Array) -> Array: dtype=xp.float32, device=apc.device(confmat), ) + print("eps: ", eps) numerator = xp.sqrt(eps) * (a - b) + print("numerator: ", numerator) denom = (tp + fp + eps) * (tp + fn + eps) * (tn + fp + eps) * (tn + fn + eps) + print("denom: ", denom) elif denom == 0: return xp.asarray(0.0, dtype=xp.float32, device=apc.device(confmat)) # type: ignore[no-any-return] return numerator / xp.sqrt(denom) # type: ignore[no-any-return] @@ -334,16 +348,18 @@ def multilabel_mcc( target, preds = _multilabel_confusion_matrix_format_arrays( target, preds, - num_labels, threshold=threshold, ignore_index=ignore_index, xp=xp, ) - confmat = _multilabel_confusion_matrix_update_state( - target, - preds, + tn, fp, fn, tp = _multilabel_confusion_matrix_update_state(target, preds, xp=xp) + + confmat = _multilabel_confusion_matrix_compute( + tn, + fp, + fn, + tp, num_labels, - xp=xp, + normalize=None, ) - return _mcc_reduce(confmat) diff --git a/cyclops/evaluate/metrics/experimental/matthews_corr_coef.py b/cyclops/evaluate/metrics/experimental/matthews_corr_coef.py index bbfc4856e..a05980dcf 100644 --- a/cyclops/evaluate/metrics/experimental/matthews_corr_coef.py +++ b/cyclops/evaluate/metrics/experimental/matthews_corr_coef.py @@ -9,6 +9,7 @@ ) from cyclops.evaluate.metrics.experimental.functional.confusion_matrix import ( _binary_confusion_matrix_compute, + _multilabel_confusion_matrix_compute, ) from cyclops.evaluate.metrics.experimental.functional.matthews_corr_coef import ( _mcc_reduce, @@ -175,4 +176,13 @@ def __init__( def _compute_metric(self) -> Array: """Compute the confusion matrix.""" - return _mcc_reduce(self.confmat) # type: ignore + tn, fp, fn, tp = self._final_state() + confmat = _multilabel_confusion_matrix_compute( + tp=tp, + fp=fp, + tn=tn, + fn=fn, + num_labels=self.num_labels, + normalize=self.normalize, + ) + return _mcc_reduce(confmat) From 4e17d12fae61fe6ebbad00f551819c434bddadfb Mon Sep 17 00:00:00 2001 From: Franklin <41602287+fcogidi@users.noreply.github.com> Date: Fri, 16 Feb 2024 13:00:38 -0500 Subject: [PATCH 11/14] fix tests --- .../functional/matthews_corr_coef.py | 14 ---- .../experimental/test_confusion_matrix.py | 7 +- .../metrics/experimental/test_f_score.py | 78 ++++++++++++------- .../experimental/test_matthews_corr_coef.py | 7 +- .../test_negative_predicitve_value.py | 10 ++- .../experimental/test_precision_recall.py | 10 ++- .../metrics/experimental/test_specificity.py | 10 ++- .../evaluate/metrics/experimental/testers.py | 9 +++ 8 files changed, 98 insertions(+), 47 deletions(-) diff --git a/cyclops/evaluate/metrics/experimental/functional/matthews_corr_coef.py b/cyclops/evaluate/metrics/experimental/functional/matthews_corr_coef.py index 80e358d02..07f542eab 100644 --- a/cyclops/evaluate/metrics/experimental/functional/matthews_corr_coef.py +++ b/cyclops/evaluate/metrics/experimental/functional/matthews_corr_coef.py @@ -28,8 +28,6 @@ def _mcc_reduce(confmat: Array) -> Array: xp = apc.array_namespace(confmat) # convert multilabel into binary confmat = xp.sum(confmat, axis=0) if confmat.ndim == 3 else confmat - print("confmat: ", confmat) - print("numel: ", apc.size(confmat)) if int(apc.size(confmat) or 0) == 4: # binary case tn, fp, fn, tp = xp.reshape(xp.astype(confmat, xp.float32), (-1,)) @@ -40,25 +38,16 @@ def _mcc_reduce(confmat: Array) -> Array: return xp.asarray(-1.0, dtype=xp.float32, device=apc.device(confmat)) # type: ignore[no-any-return] tk = xp.sum(confmat, axis=-1, dtype=xp.float32) # tn + fp and tp + fn - print("tk: ", tk) pk = xp.sum(confmat, axis=-2, dtype=xp.float32) # tn + fn and tp + fp - print("pk: ", pk) c = xp.astype(xp.linalg.trace(confmat), xp.float32) # tn and tp - print("c: ", c) s = xp.sum(confmat, dtype=xp.float32) # tn + tp + fn + fp - print("s: ", s) cov_ytyp = c * s - sum(tk * pk) - print("cov_ytyp: ", cov_ytyp) cov_ypyp = s**2 - sum(pk * pk) - print("cov_ypyp: ", cov_ypyp) cov_ytyt = s**2 - sum(tk * tk) - print("cov_ytyt: ", cov_ytyt) numerator = cov_ytyp - print("numerator: ", numerator) denom = cov_ypyp * cov_ytyt - print("denom: ", denom) if denom == 0 and int(apc.size(confmat) or 0) == 4: if tp == 0 or tn == 0: @@ -72,11 +61,8 @@ def _mcc_reduce(confmat: Array) -> Array: dtype=xp.float32, device=apc.device(confmat), ) - print("eps: ", eps) numerator = xp.sqrt(eps) * (a - b) - print("numerator: ", numerator) denom = (tp + fp + eps) * (tp + fn + eps) * (tn + fp + eps) * (tn + fn + eps) - print("denom: ", denom) elif denom == 0: return xp.asarray(0.0, dtype=xp.float32, device=apc.device(confmat)) # type: ignore[no-any-return] return numerator / xp.sqrt(denom) # type: ignore[no-any-return] diff --git a/tests/cyclops/evaluate/metrics/experimental/test_confusion_matrix.py b/tests/cyclops/evaluate/metrics/experimental/test_confusion_matrix.py index 2d94c03fa..860bf4ba3 100644 --- a/tests/cyclops/evaluate/metrics/experimental/test_confusion_matrix.py +++ b/tests/cyclops/evaluate/metrics/experimental/test_confusion_matrix.py @@ -1,4 +1,5 @@ """Test confusion matrix metrics.""" + from functools import partial import array_api_compat as apc @@ -390,7 +391,7 @@ def test_multilabel_confusion_matrix_class_with_numpy_array_api_arrays( ) @pytest.mark.integration_test() # machine for integration tests has GPU - @pytest.mark.parametrize("inputs", _multilabel_cases(xp=anp)) + @pytest.mark.parametrize("inputs", _multilabel_cases(xp=array_api_compat.torch)) @pytest.mark.parametrize("normalize", [None, "true", "pred", "all"]) @pytest.mark.parametrize("ignore_index", [None, 0, -1]) def test_multilabel_confusion_matrix_class_with_torch_tensors( @@ -405,6 +406,8 @@ def test_multilabel_confusion_matrix_class_with_torch_tensors( if ignore_index is not None: target = _inject_ignore_index(target, ignore_index) + device = "cuda" if torch.cuda.is_available() else "cpu" + self.run_metric_class_implementation_test( target, preds, @@ -421,4 +424,6 @@ def test_multilabel_confusion_matrix_class_with_torch_tensors( "normalize": normalize, "ignore_index": ignore_index, }, + device=device, + use_device_for_ref=True, ) diff --git a/tests/cyclops/evaluate/metrics/experimental/test_f_score.py b/tests/cyclops/evaluate/metrics/experimental/test_f_score.py index 5c9afe687..73f83dda5 100644 --- a/tests/cyclops/evaluate/metrics/experimental/test_f_score.py +++ b/tests/cyclops/evaluate/metrics/experimental/test_f_score.py @@ -1,4 +1,5 @@ """Tests for the F-score metric.""" + from functools import partial from typing import Literal, Optional @@ -79,9 +80,11 @@ def test_binary_fbeta_score_function_with_numpy_array_api_arrays( self.run_metric_function_implementation_test( target, preds, - metric_function=binary_f1_score - if beta == 1.0 - else partial(binary_fbeta_score, beta=beta), + metric_function=( + binary_f1_score + if beta == 1.0 + else partial(binary_fbeta_score, beta=beta) + ), metric_args={"threshold": THRESHOLD, "ignore_index": ignore_index}, reference_metric=partial( _binary_fbeta_score_reference, @@ -119,9 +122,9 @@ def test_binary_fbeta_score_class_with_numpy_array_api_arrays( self.run_metric_class_implementation_test( target, preds, - metric_class=BinaryF1Score - if beta == 1.0 - else partial(BinaryFBetaScore, beta=beta), + metric_class=( + BinaryF1Score if beta == 1.0 else partial(BinaryFBetaScore, beta=beta) + ), metric_args={"threshold": THRESHOLD, "ignore_index": ignore_index}, reference_metric=partial( _binary_fbeta_score_reference, @@ -162,9 +165,9 @@ def test_binary_fbeta_class_with_torch_tensors( self.run_metric_class_implementation_test( target, preds, - metric_class=BinaryF1Score - if beta == 1.0 - else partial(BinaryFBetaScore, beta=beta), + metric_class=( + BinaryF1Score if beta == 1.0 else partial(BinaryFBetaScore, beta=beta) + ), metric_args={"threshold": THRESHOLD, "ignore_index": ignore_index}, reference_metric=partial( _binary_fbeta_score_reference, @@ -241,9 +244,11 @@ def test_multiclass_fbeta_score_with_numpy_array_api_arrays( self.run_metric_function_implementation_test( target, preds, - metric_function=multiclass_f1_score - if beta == 1.0 - else partial(multiclass_fbeta_score, beta=beta), + metric_function=( + multiclass_f1_score + if beta == 1.0 + else partial(multiclass_fbeta_score, beta=beta) + ), metric_args={ "num_classes": NUM_CLASSES, "top_k": top_k, @@ -292,9 +297,11 @@ def test_multiclass_fbeta_score_class_with_numpy_array_api_arrays( self.run_metric_class_implementation_test( target, preds, - metric_class=MulticlassF1Score - if beta == 1.0 - else partial(MulticlassFBetaScore, beta=beta), + metric_class=( + MulticlassF1Score + if beta == 1.0 + else partial(MulticlassFBetaScore, beta=beta) + ), reference_metric=partial( _multiclass_fbeta_score_reference, beta=beta, @@ -346,9 +353,11 @@ def test_multiclass_fbeta_score_class_with_torch_tensors( self.run_metric_class_implementation_test( target, preds, - metric_class=MulticlassF1Score - if beta == 1.0 - else partial(MulticlassFBetaScore, beta=beta), + metric_class=( + MulticlassF1Score + if beta == 1.0 + else partial(MulticlassFBetaScore, beta=beta) + ), reference_metric=partial( _multiclass_fbeta_score_reference, beta=beta, @@ -411,9 +420,11 @@ def test_multilabel_fbeta_score_with_numpy_array_api_arrays( self.run_metric_function_implementation_test( target, preds, - metric_function=multilabel_f1_score - if beta == 1.0 - else partial(multilabel_fbeta_score, beta=beta), + metric_function=( + multilabel_f1_score + if beta == 1.0 + else partial(multilabel_fbeta_score, beta=beta) + ), reference_metric=partial( _multilabel_fbeta_score_reference, beta=beta, @@ -446,9 +457,11 @@ def test_multilabel_fbeta_score_class_with_numpy_array_api_arrays( self.run_metric_class_implementation_test( target, preds, - metric_class=MultilabelF1Score - if beta == 1.0 - else partial(MultilabelFBetaScore, beta=beta), + metric_class=( + MultilabelF1Score + if beta == 1.0 + else partial(MultilabelFBetaScore, beta=beta) + ), reference_metric=partial( _multilabel_fbeta_score_reference, beta=beta, @@ -466,7 +479,7 @@ def test_multilabel_fbeta_score_class_with_numpy_array_api_arrays( ) @pytest.mark.integration_test() # machine for integration tests has GPU - @pytest.mark.parametrize("inputs", _multilabel_cases(xp=anp)) + @pytest.mark.parametrize("inputs", _multilabel_cases(xp=array_api_compat.torch)) @pytest.mark.parametrize("average", [None, "micro", "macro", "weighted"]) @pytest.mark.parametrize("ignore_index", [None, 0, -1]) def test_multilabel_fbeta_score_class_with_torch_tensors( @@ -479,12 +492,19 @@ def test_multilabel_fbeta_score_class_with_torch_tensors( """Test class for multilabel fbeta score with torch tensors.""" target, preds = inputs + if ignore_index is not None: + target = _inject_ignore_index(target, ignore_index) + + device = "cuda" if torch.cuda.is_available() else "cpu" + self.run_metric_class_implementation_test( target, preds, - metric_class=MultilabelF1Score - if beta == 1.0 - else partial(MultilabelFBetaScore, beta=beta), + metric_class=( + MultilabelF1Score + if beta == 1.0 + else partial(MultilabelFBetaScore, beta=beta) + ), reference_metric=partial( _multilabel_fbeta_score_reference, beta=beta, @@ -499,6 +519,8 @@ def test_multilabel_fbeta_score_class_with_torch_tensors( "average": average, "ignore_index": ignore_index, }, + device=device, + use_device_for_ref=True, ) diff --git a/tests/cyclops/evaluate/metrics/experimental/test_matthews_corr_coef.py b/tests/cyclops/evaluate/metrics/experimental/test_matthews_corr_coef.py index 831bababf..8d43d66cb 100644 --- a/tests/cyclops/evaluate/metrics/experimental/test_matthews_corr_coef.py +++ b/tests/cyclops/evaluate/metrics/experimental/test_matthews_corr_coef.py @@ -1,4 +1,5 @@ """Test matthews correlation coefficient metrics.""" + from functools import partial import array_api_compat as apc @@ -348,7 +349,7 @@ def test_multilabel_mcc_class_with_numpy_array_api_arrays( ) @pytest.mark.integration_test() # machine for integration tests has GPU - @pytest.mark.parametrize("inputs", _multilabel_cases(xp=anp)) + @pytest.mark.parametrize("inputs", _multilabel_cases(xp=array_api_compat.torch)) @pytest.mark.parametrize("ignore_index", [None, 0, -1]) def test_multilabel_mcc_class_with_torch_tensors( self, @@ -361,6 +362,8 @@ def test_multilabel_mcc_class_with_torch_tensors( if ignore_index is not None: target = _inject_ignore_index(target, ignore_index) + device = "cuda" if torch.cuda.is_available() else "cpu" + self.run_metric_class_implementation_test( target, preds, @@ -375,4 +378,6 @@ def test_multilabel_mcc_class_with_torch_tensors( "num_labels": NUM_LABELS, "ignore_index": ignore_index, }, + device=device, + use_device_for_ref=True, ) diff --git a/tests/cyclops/evaluate/metrics/experimental/test_negative_predicitve_value.py b/tests/cyclops/evaluate/metrics/experimental/test_negative_predicitve_value.py index b07f2e7ea..6d0d057be 100644 --- a/tests/cyclops/evaluate/metrics/experimental/test_negative_predicitve_value.py +++ b/tests/cyclops/evaluate/metrics/experimental/test_negative_predicitve_value.py @@ -1,4 +1,5 @@ """Test negative predictive value.""" + from functools import partial from typing import Literal, Optional @@ -476,7 +477,7 @@ def test_multilabel_npv_class_with_numpy_array_api_arrays( ) @pytest.mark.integration_test() # machine for integration tests has GPU - @pytest.mark.parametrize("inputs", _multilabel_cases(xp=anp)) + @pytest.mark.parametrize("inputs", _multilabel_cases(xp=array_api_compat.torch)) @pytest.mark.parametrize("average", [None, "micro", "macro", "weighted"]) @pytest.mark.parametrize("ignore_index", [None, 0, -1]) def test_multilabel_npv_class_with_torch_tensors( @@ -488,6 +489,11 @@ def test_multilabel_npv_class_with_torch_tensors( """Test class for multilabel negative predictive value with torch tensors.""" target, preds = inputs + if ignore_index is not None: + target = _inject_ignore_index(target, ignore_index) + + device = "cuda" if torch.cuda.is_available() else "cpu" + self.run_metric_class_implementation_test( target, preds, @@ -505,6 +511,8 @@ def test_multilabel_npv_class_with_torch_tensors( "average": average, "ignore_index": ignore_index, }, + device=device, + use_device_for_ref=True, ) diff --git a/tests/cyclops/evaluate/metrics/experimental/test_precision_recall.py b/tests/cyclops/evaluate/metrics/experimental/test_precision_recall.py index 14c3c3a96..8b24b2d75 100644 --- a/tests/cyclops/evaluate/metrics/experimental/test_precision_recall.py +++ b/tests/cyclops/evaluate/metrics/experimental/test_precision_recall.py @@ -1,4 +1,5 @@ """Test precision recall metrics.""" + from functools import partial from typing import Literal, Optional @@ -730,7 +731,7 @@ def test_multilabel_precision_class_with_numpy_array_api_arrays( ) @pytest.mark.integration_test() # machine for integration tests has GPU - @pytest.mark.parametrize("inputs", _multilabel_cases(xp=anp)) + @pytest.mark.parametrize("inputs", _multilabel_cases(xp=array_api_compat.torch)) @pytest.mark.parametrize("average", [None, "micro", "macro", "weighted"]) @pytest.mark.parametrize("ignore_index", [None, 0, -1]) def test_multilabel_precision_class_with_torch_tensors( @@ -742,6 +743,11 @@ def test_multilabel_precision_class_with_torch_tensors( """Test class for multilabel precision with torch tensors.""" target, preds = inputs + if ignore_index is not None: + target = _inject_ignore_index(target, ignore_index) + + device = "cuda" if torch.cuda.is_available() else "cpu" + self.run_metric_class_implementation_test( target, preds, @@ -760,6 +766,8 @@ def test_multilabel_precision_class_with_torch_tensors( "average": average, "ignore_index": ignore_index, }, + device=device, + use_device_for_ref=True, ) diff --git a/tests/cyclops/evaluate/metrics/experimental/test_specificity.py b/tests/cyclops/evaluate/metrics/experimental/test_specificity.py index 035edbada..b4f40b12c 100644 --- a/tests/cyclops/evaluate/metrics/experimental/test_specificity.py +++ b/tests/cyclops/evaluate/metrics/experimental/test_specificity.py @@ -1,4 +1,5 @@ """Test specificity.""" + from functools import partial from typing import Literal, Optional @@ -417,7 +418,7 @@ def test_multilabel_specificity_class_with_numpy_array_api_arrays( ) @pytest.mark.integration_test() # machine for integration tests has GPU - @pytest.mark.parametrize("inputs", _multilabel_cases(xp=anp)) + @pytest.mark.parametrize("inputs", _multilabel_cases(xp=array_api_compat.torch)) @pytest.mark.parametrize("average", [None, "micro", "macro", "weighted"]) @pytest.mark.parametrize("ignore_index", [None, 0, -1]) def test_multilabel_specificity_class_with_torch_tensors( @@ -429,6 +430,11 @@ def test_multilabel_specificity_class_with_torch_tensors( """Test class for multilabel specificity with torch tensors.""" target, preds = inputs + if ignore_index is not None: + target = _inject_ignore_index(target, ignore_index) + + device = "cuda" if torch.cuda.is_available() else "cpu" + self.run_metric_class_implementation_test( target, preds, @@ -446,6 +452,8 @@ def test_multilabel_specificity_class_with_torch_tensors( "average": average, "ignore_index": ignore_index, }, + device=device, + use_device_for_ref=True, ) diff --git a/tests/cyclops/evaluate/metrics/experimental/testers.py b/tests/cyclops/evaluate/metrics/experimental/testers.py index 4d58305d3..77d87436e 100644 --- a/tests/cyclops/evaluate/metrics/experimental/testers.py +++ b/tests/cyclops/evaluate/metrics/experimental/testers.py @@ -150,6 +150,15 @@ def _class_impl_test( # noqa: PLR0912 preds=apc.to_device(total_preds, device if use_device_for_ref else "cpu"), ) + # DEBUG + metric.reset() + print( + metric( + target=apc.to_device(total_target, device if use_device_for_ref else "cpu"), + preds=apc.to_device(total_preds, device if use_device_for_ref else "cpu"), + ), + ) + # assert after aggregation if isinstance(ref_result, dict): for key in ref_result: From 668ac1fbda3564d092b6fb5b584d2019e8cb2d77 Mon Sep 17 00:00:00 2001 From: fcogidi <41602287+fcogidi@users.noreply.github.com> Date: Tue, 20 Feb 2024 17:44:42 -0500 Subject: [PATCH 12/14] use float64 for internal computations --- .../functional/matthews_corr_coef.py | 24 +++++++++++-------- .../experimental/matthews_corr_coef.py | 12 ++++++---- 2 files changed, 22 insertions(+), 14 deletions(-) diff --git a/cyclops/evaluate/metrics/experimental/functional/matthews_corr_coef.py b/cyclops/evaluate/metrics/experimental/functional/matthews_corr_coef.py index 07f542eab..794645e60 100644 --- a/cyclops/evaluate/metrics/experimental/functional/matthews_corr_coef.py +++ b/cyclops/evaluate/metrics/experimental/functional/matthews_corr_coef.py @@ -30,17 +30,17 @@ def _mcc_reduce(confmat: Array) -> Array: confmat = xp.sum(confmat, axis=0) if confmat.ndim == 3 else confmat if int(apc.size(confmat) or 0) == 4: # binary case - tn, fp, fn, tp = xp.reshape(xp.astype(confmat, xp.float32), (-1,)) + tn, fp, fn, tp = xp.reshape(xp.astype(confmat, xp.float64), (-1,)) if tp + tn != 0 and fp + fn == 0: return xp.asarray(1.0, dtype=xp.float32, device=apc.device(confmat)) # type: ignore[no-any-return] if tp + tn == 0 and fp + fn != 0: return xp.asarray(-1.0, dtype=xp.float32, device=apc.device(confmat)) # type: ignore[no-any-return] - tk = xp.sum(confmat, axis=-1, dtype=xp.float32) # tn + fp and tp + fn - pk = xp.sum(confmat, axis=-2, dtype=xp.float32) # tn + fn and tp + fp - c = xp.astype(xp.linalg.trace(confmat), xp.float32) # tn and tp - s = xp.sum(confmat, dtype=xp.float32) # tn + tp + fn + fp + tk = xp.sum(confmat, axis=-1, dtype=xp.float64) # tn + fp and tp + fn + pk = xp.sum(confmat, axis=-2, dtype=xp.float64) # tn + fn and tp + fp + c = xp.astype(xp.linalg.trace(confmat), xp.float64) # tn and tp + s = xp.sum(confmat, dtype=xp.float64) # tn + tp + fn + fp cov_ytyp = c * s - sum(tk * pk) cov_ypyp = s**2 - sum(pk * pk) @@ -65,7 +65,7 @@ def _mcc_reduce(confmat: Array) -> Array: denom = (tp + fp + eps) * (tp + fn + eps) * (tn + fp + eps) * (tn + fn + eps) elif denom == 0: return xp.asarray(0.0, dtype=xp.float32, device=apc.device(confmat)) # type: ignore[no-any-return] - return numerator / xp.sqrt(denom) # type: ignore[no-any-return] + return xp.astype(numerator / xp.sqrt(denom), xp.float64) # type: ignore[no-any-return] def binary_mcc( @@ -214,10 +214,14 @@ def multiclass_mcc( >>> multiclass_mcc(target, preds, num_classes=3) Array(0.7, dtype=float32) >>> target = anp.asarray([2, 1, 0, 0]) - >>> preds = anp.asarray([[0.16, 0.26, 0.58], - ... [0.22, 0.61, 0.17], - ... [0.71, 0.09, 0.20], - ... [0.05, 0.82, 0.13]]) + >>> preds = anp.asarray( + ... [ + ... [0.16, 0.26, 0.58], + ... [0.22, 0.61, 0.17], + ... [0.71, 0.09, 0.20], + ... [0.05, 0.82, 0.13], + ... ] + ... ) >>> multiclass_mcc(target, preds, num_classes=3) Array(0.7, dtype=float32) diff --git a/cyclops/evaluate/metrics/experimental/matthews_corr_coef.py b/cyclops/evaluate/metrics/experimental/matthews_corr_coef.py index a05980dcf..ce9c75c20 100644 --- a/cyclops/evaluate/metrics/experimental/matthews_corr_coef.py +++ b/cyclops/evaluate/metrics/experimental/matthews_corr_coef.py @@ -94,10 +94,14 @@ class MulticlassMCC(MulticlassConfusionMatrix, registry_key="multiclass_mcc"): >>> metric(target, preds) Array(0.7, dtype=float32) >>> target = anp.asarray([2, 1, 0, 0]) - >>> preds = anp.asarray([[0.16, 0.26, 0.58], - ... [0.22, 0.61, 0.17], - ... [0.71, 0.09, 0.20], - ... [0.05, 0.82, 0.13]]) + >>> preds = anp.asarray( + ... [ + ... [0.16, 0.26, 0.58], + ... [0.22, 0.61, 0.17], + ... [0.71, 0.09, 0.20], + ... [0.05, 0.82, 0.13], + ... ] + ... ) >>> metric = MulticlassMCC(num_classes=3) >>> metric(target, preds) Array(0.7, dtype=float32) From 18b06864bdf37670fc53a7476638f96da1ed8839 Mon Sep 17 00:00:00 2001 From: fcogidi <41602287+fcogidi@users.noreply.github.com> Date: Tue, 20 Feb 2024 17:54:45 -0500 Subject: [PATCH 13/14] Refactor test files and remove debug print statement --- .../metrics/experimental/test_matthews_corr_coef.py | 2 ++ tests/cyclops/evaluate/metrics/experimental/testers.py | 9 --------- 2 files changed, 2 insertions(+), 9 deletions(-) diff --git a/tests/cyclops/evaluate/metrics/experimental/test_matthews_corr_coef.py b/tests/cyclops/evaluate/metrics/experimental/test_matthews_corr_coef.py index 8d43d66cb..44aecbdbf 100644 --- a/tests/cyclops/evaluate/metrics/experimental/test_matthews_corr_coef.py +++ b/tests/cyclops/evaluate/metrics/experimental/test_matthews_corr_coef.py @@ -290,6 +290,8 @@ def _multilabel_mcc_reference( class TestMultilabelMCC(MetricTester): """Test multilabel matthews correlation coefficient function and class.""" + atol: float = 4e-8 + @pytest.mark.parametrize("inputs", _multilabel_cases(xp=anp)) @pytest.mark.parametrize("ignore_index", [None, 0, -1]) def test_multilabel_mcc_with_numpy_array_api_arrays( diff --git a/tests/cyclops/evaluate/metrics/experimental/testers.py b/tests/cyclops/evaluate/metrics/experimental/testers.py index 77d87436e..4d58305d3 100644 --- a/tests/cyclops/evaluate/metrics/experimental/testers.py +++ b/tests/cyclops/evaluate/metrics/experimental/testers.py @@ -150,15 +150,6 @@ def _class_impl_test( # noqa: PLR0912 preds=apc.to_device(total_preds, device if use_device_for_ref else "cpu"), ) - # DEBUG - metric.reset() - print( - metric( - target=apc.to_device(total_target, device if use_device_for_ref else "cpu"), - preds=apc.to_device(total_preds, device if use_device_for_ref else "cpu"), - ), - ) - # assert after aggregation if isinstance(ref_result, dict): for key in ref_result: From 7c33227a329ed596568a63dee43a1bb80b069ad1 Mon Sep 17 00:00:00 2001 From: fcogidi <41602287+fcogidi@users.noreply.github.com> Date: Tue, 20 Feb 2024 18:10:52 -0500 Subject: [PATCH 14/14] Fix data type in Matthews correlation coefficient calculation --- .../metrics/experimental/functional/matthews_corr_coef.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cyclops/evaluate/metrics/experimental/functional/matthews_corr_coef.py b/cyclops/evaluate/metrics/experimental/functional/matthews_corr_coef.py index 794645e60..89ed7c07e 100644 --- a/cyclops/evaluate/metrics/experimental/functional/matthews_corr_coef.py +++ b/cyclops/evaluate/metrics/experimental/functional/matthews_corr_coef.py @@ -65,7 +65,7 @@ def _mcc_reduce(confmat: Array) -> Array: denom = (tp + fp + eps) * (tp + fn + eps) * (tn + fp + eps) * (tn + fn + eps) elif denom == 0: return xp.asarray(0.0, dtype=xp.float32, device=apc.device(confmat)) # type: ignore[no-any-return] - return xp.astype(numerator / xp.sqrt(denom), xp.float64) # type: ignore[no-any-return] + return xp.astype(numerator / xp.sqrt(denom), xp.float32) # type: ignore[no-any-return] def binary_mcc(