diff --git a/cyclops/evaluate/metrics/experimental/__init__.py b/cyclops/evaluate/metrics/experimental/__init__.py index 3a5b9974a..51ab7a21e 100644 --- a/cyclops/evaluate/metrics/experimental/__init__.py +++ b/cyclops/evaluate/metrics/experimental/__init__.py @@ -29,6 +29,11 @@ ) from cyclops.evaluate.metrics.experimental.mae import MeanAbsoluteError from cyclops.evaluate.metrics.experimental.mape import MeanAbsolutePercentageError +from cyclops.evaluate.metrics.experimental.matthews_corr_coef import ( + BinaryMCC, + MulticlassMCC, + MultilabelMCC, +) from cyclops.evaluate.metrics.experimental.metric_dict import MetricDict from cyclops.evaluate.metrics.experimental.mse import MeanSquaredError from cyclops.evaluate.metrics.experimental.negative_predictive_value import ( diff --git a/cyclops/evaluate/metrics/experimental/confusion_matrix.py b/cyclops/evaluate/metrics/experimental/confusion_matrix.py index 9a14488f4..3140c89d9 100644 --- a/cyclops/evaluate/metrics/experimental/confusion_matrix.py +++ b/cyclops/evaluate/metrics/experimental/confusion_matrix.py @@ -1,4 +1,5 @@ """Confusion matrix.""" + from types import ModuleType from typing import Any, Optional, Tuple, Union diff --git a/cyclops/evaluate/metrics/experimental/functional/__init__.py b/cyclops/evaluate/metrics/experimental/functional/__init__.py index 1a2e5902b..56b7e825e 100644 --- a/cyclops/evaluate/metrics/experimental/functional/__init__.py +++ b/cyclops/evaluate/metrics/experimental/functional/__init__.py @@ -31,6 +31,11 @@ from cyclops.evaluate.metrics.experimental.functional.mape import ( mean_absolute_percentage_error, ) +from cyclops.evaluate.metrics.experimental.functional.matthews_corr_coef import ( + binary_mcc, + multiclass_mcc, + multilabel_mcc, +) from cyclops.evaluate.metrics.experimental.functional.mse import mean_squared_error from cyclops.evaluate.metrics.experimental.functional.negative_predictive_value import ( binary_npv, diff --git a/cyclops/evaluate/metrics/experimental/functional/confusion_matrix.py b/cyclops/evaluate/metrics/experimental/functional/confusion_matrix.py index 23faa208d..19b53b48c 100644 --- a/cyclops/evaluate/metrics/experimental/functional/confusion_matrix.py +++ b/cyclops/evaluate/metrics/experimental/functional/confusion_matrix.py @@ -1,4 +1,5 @@ """Functions for computing the confusion matrix for classification tasks.""" + # mypy: disable-error-code="no-any-return" from types import ModuleType from typing import Literal, Optional, Tuple, Union diff --git a/cyclops/evaluate/metrics/experimental/functional/matthews_corr_coef.py b/cyclops/evaluate/metrics/experimental/functional/matthews_corr_coef.py new file mode 100644 index 000000000..89ed7c07e --- /dev/null +++ b/cyclops/evaluate/metrics/experimental/functional/matthews_corr_coef.py @@ -0,0 +1,355 @@ +"""Functional API for the matthews correlation coefficient (MCC) metric.""" + +from typing import Optional, Tuple, Union + +import array_api_compat as apc + +from cyclops.evaluate.metrics.experimental.functional.confusion_matrix import ( + _binary_confusion_matrix_compute, + _binary_confusion_matrix_format_arrays, + _binary_confusion_matrix_update_state, + _binary_confusion_matrix_validate_args, + _binary_confusion_matrix_validate_arrays, + _multiclass_confusion_matrix_format_arrays, + _multiclass_confusion_matrix_update_state, + _multiclass_confusion_matrix_validate_args, + _multiclass_confusion_matrix_validate_arrays, + _multilabel_confusion_matrix_compute, + _multilabel_confusion_matrix_format_arrays, + _multilabel_confusion_matrix_update_state, + _multilabel_confusion_matrix_validate_args, + _multilabel_confusion_matrix_validate_arrays, +) +from cyclops.evaluate.metrics.experimental.utils.types import Array + + +def _mcc_reduce(confmat: Array) -> Array: + """Reduce an un-normalized confusion matrix into the matthews corrcoef.""" + xp = apc.array_namespace(confmat) + # convert multilabel into binary + confmat = xp.sum(confmat, axis=0) if confmat.ndim == 3 else confmat + + if int(apc.size(confmat) or 0) == 4: # binary case + tn, fp, fn, tp = xp.reshape(xp.astype(confmat, xp.float64), (-1,)) + if tp + tn != 0 and fp + fn == 0: + return xp.asarray(1.0, dtype=xp.float32, device=apc.device(confmat)) # type: ignore[no-any-return] + + if tp + tn == 0 and fp + fn != 0: + return xp.asarray(-1.0, dtype=xp.float32, device=apc.device(confmat)) # type: ignore[no-any-return] + + tk = xp.sum(confmat, axis=-1, dtype=xp.float64) # tn + fp and tp + fn + pk = xp.sum(confmat, axis=-2, dtype=xp.float64) # tn + fn and tp + fp + c = xp.astype(xp.linalg.trace(confmat), xp.float64) # tn and tp + s = xp.sum(confmat, dtype=xp.float64) # tn + tp + fn + fp + + cov_ytyp = c * s - sum(tk * pk) + cov_ypyp = s**2 - sum(pk * pk) + cov_ytyt = s**2 - sum(tk * tk) + + numerator = cov_ytyp + denom = cov_ypyp * cov_ytyt + + if denom == 0 and int(apc.size(confmat) or 0) == 4: + if tp == 0 or tn == 0: + a = tp + tn + + if fp == 0 or fn == 0: + b = fp + fn + + eps = xp.asarray( + xp.finfo(xp.float32).eps, + dtype=xp.float32, + device=apc.device(confmat), + ) + numerator = xp.sqrt(eps) * (a - b) + denom = (tp + fp + eps) * (tp + fn + eps) * (tn + fp + eps) * (tn + fn + eps) + elif denom == 0: + return xp.asarray(0.0, dtype=xp.float32, device=apc.device(confmat)) # type: ignore[no-any-return] + return xp.astype(numerator / xp.sqrt(denom), xp.float32) # type: ignore[no-any-return] + + +def binary_mcc( + target: Array, + preds: Array, + threshold: float = 0.5, + ignore_index: Optional[int] = None, +) -> Array: + """Compute the matthews correlation coefficient for binary classification. + + Parameters + ---------- + target : Array + An array object that is compatible with the Python array API standard + and contains the ground truth labels. The expected shape of the array + is `(N, ...)`, where `N` is the number of samples. + preds : Array + An array object that is compatible with the Python array API standard and + contains the predictions of a binary classifier. the expected shape of the + array is `(N, ...)` where `N` is the number of samples. If `preds` contains + floating point values that are not in the range `[0, 1]`, a sigmoid function + will be applied to each value before thresholding. + threshold : float, default=0.5 + The threshold to use when converting probabilities to binary predictions. + ignore_index : int, optional, default=None + Specifies a target value that is ignored and does not contribute to the + metric. If `None`, ignore nothing. + + Returns + ------- + Array + The matthews correlation coefficient. + + Raises + ------ + ValueError + If `target` and `preds` have different shapes. + ValueError + If `target` and `preds` are not array-API-compatible. + ValueError + If `target` or `preds` are empty. + ValueError + If `target` or `preds` are not numeric arrays. + ValueError + If `threshold` is not a float in the [0,1] range. + ValueError + If `normalize` is not one of `'pred'`, `'true'`, `'all'`, `'none'`, or `None`. + ValueError + If `ignore_index` is not `None` or an integer. + + Examples + -------- + >>> import numpy.array_api as anp + >>> from cyclops.evaluate.metrics.experimental.functional import binary_mcc + >>> target = anp.asarray([0, 1, 0, 1, 0, 1]) + >>> preds = anp.asarray([0, 0, 1, 1, 0, 1]) + >>> binary_mcc(target, preds) + Array(0.33333334, dtype=float32) + >>> target = anp.asarray([0, 1, 0, 1, 0, 1]) + >>> preds = anp.asarray([0.11, 0.22, 0.84, 0.73, 0.33, 0.92]) + >>> binary_mcc(target, preds) + Array(0.33333334, dtype=float32) + + """ + _binary_confusion_matrix_validate_args( + threshold=threshold, + normalize=None, + ignore_index=ignore_index, + ) + xp = _binary_confusion_matrix_validate_arrays(target, preds, ignore_index) + + target, preds = _binary_confusion_matrix_format_arrays( + target, + preds, + threshold, + ignore_index, + xp=xp, + ) + tn, fp, fn, tp = _binary_confusion_matrix_update_state(target, preds, xp=xp) + + confmat = _binary_confusion_matrix_compute(tn, fp, fn, tp, normalize=None) + return _mcc_reduce(confmat) + + +def multiclass_mcc( + target: Array, + preds: Array, + num_classes: int, + ignore_index: Optional[Union[int, Tuple[int]]] = None, +) -> Array: + """Compute the matthews correlation coefficient for multiclass classification. + + Parameters + ---------- + target : Array + The target array of shape `(N, ...)`, where `N` is the number of samples. + preds : Array + The prediction array with shape `(N, ...)`, for integer inputs, or + `(N, C, ...)`, for float inputs, where `N` is the number of samples and + `C` is the number of classes. + num_classes : int + The number of classes. + ignore_index : int, Tuple[int], optional, default=None + Specifies a target value(s) that is ignored and does not contribute to the + metric. If `None`, ignore nothing. + + Returns + ------- + Array + The matthews correlation coefficient. + + Raises + ------ + ValueError + If `target` and `preds` are not array-API-compatible. + ValueError + If `target` or `preds` are empty. + ValueError + If `target` or `preds` are not numeric arrays. + ValueError + If `num_classes` is not an integer larger than 1. + ValueError + If `normalize` is not one of `'pred'`, `'true'`, `'all'`, `'none'`, or `None`. + ValueError + If `ignore_index` is not `None`, an integer or a tuple of integers. + ValueError + If `preds` contains floats but `target` does not have one dimension less than + `preds`. + ValueError + If the second dimension of `preds` is not equal to `num_classes`. + ValueError + If when `target` has one dimension less than `preds`, the shape of `preds` is + not `(N, C, ...)` while the shape of `target` is `(N, ...)`. + ValueError + If when `target` and `preds` have the same number of dimensions, they + do not have the same shape. + RuntimeError + If `target` contains values that are not in the range [0, `num_classes`). + + Examples + -------- + >>> import numpy.array_api as anp + >>> from cyclops.evaluate.metrics.experimental.functional import multiclass_mcc + >>> target = anp.asarray([2, 1, 0, 0]) + >>> preds = anp.asarray([2, 1, 0, 1]) + >>> multiclass_mcc(target, preds, num_classes=3) + Array(0.7, dtype=float32) + >>> target = anp.asarray([2, 1, 0, 0]) + >>> preds = anp.asarray( + ... [ + ... [0.16, 0.26, 0.58], + ... [0.22, 0.61, 0.17], + ... [0.71, 0.09, 0.20], + ... [0.05, 0.82, 0.13], + ... ] + ... ) + >>> multiclass_mcc(target, preds, num_classes=3) + Array(0.7, dtype=float32) + + """ + _multiclass_confusion_matrix_validate_args( + num_classes, + normalize=None, + ignore_index=ignore_index, + ) + xp = _multiclass_confusion_matrix_validate_arrays( + target, + preds, + num_classes, + ignore_index=ignore_index, + ) + + target, preds = _multiclass_confusion_matrix_format_arrays( + target, + preds, + ignore_index=ignore_index, + xp=xp, + ) + confmat = _multiclass_confusion_matrix_update_state( + target, + preds, + num_classes, + xp=xp, + ) + return _mcc_reduce(confmat) + + +def multilabel_mcc( + target: Array, + preds: Array, + num_labels: int, + threshold: float = 0.5, + ignore_index: Optional[int] = None, +) -> Array: + """Compute the matthews correlation coefficient for multilabel classification. + + Parameters + ---------- + target : Array + The target array of shape `(N, L, ...)`, where `N` is the number of samples + and `L` is the number of labels. + preds : Array + The prediction array of shape `(N, L, ...)`, where `N` is the number of + samples and `L` is the number of labels. If `preds` contains floats that + are not in the range [0,1], they will be converted to probabilities using + the sigmoid function. + num_labels : int + The number of labels. + threshold : float, default=0.5 + The threshold to use for binarizing the predictions. + ignore_index : int, optional, default=None + Specifies a target value that is ignored and does not contribute to the + metric. If `None`, ignore nothing. + + Returns + ------- + Array + The matthews correlation coefficient. + + Raises + ------ + ValueError + If `target` and `preds` are not array-API-compatible. + ValueError + If `target` or `preds` are empty. + ValueError + If `target` or `preds` are not numeric arrays. + ValueError + If `threshold` is not a float in the [0,1] range. + ValueError + If `normalize` is not one of `'pred'`, `'true'`, `'all'`, `'none'`, or `None`. + ValueError + If `ignore_index` is not `None` or a non-negative integer. + ValueError + If `num_labels` is not an integer larger than 1. + ValueError + If `target` and `preds` do not have the same shape. + ValueError + If the second dimension of `preds` is not equal to `num_labels`. + RuntimeError + If `target` contains values that are not in the range [0, 1]. + + Examples + -------- + >>> import numpy.array_api as anp + >>> from cyclops.evaluate.metrics.experimental.functional import multilabel_mcc + >>> target = anp.asarray([[0, 1, 0], [1, 0, 1]]) + >>> preds = anp.asarray([[0, 0, 1], [1, 0, 1]]) + >>> multilabel_mcc(target, preds, num_labels=3) + Array(0.33333334, dtype=float32) + >>> target = anp.asarray([[0, 1, 0], [1, 0, 1]]) + >>> preds = anp.asarray([[0.11, 0.22, 0.84], [0.73, 0.33, 0.92]]) + >>> multilabel_mcc(target, preds, num_labels=3) + Array(0.33333334, dtype=float32) + + """ + _multilabel_confusion_matrix_validate_args( + num_labels, + threshold=threshold, + normalize=None, + ignore_index=ignore_index, + ) + xp = _multilabel_confusion_matrix_validate_arrays( + target, + preds, + num_labels, + ignore_index=ignore_index, + ) + + target, preds = _multilabel_confusion_matrix_format_arrays( + target, + preds, + threshold=threshold, + ignore_index=ignore_index, + xp=xp, + ) + tn, fp, fn, tp = _multilabel_confusion_matrix_update_state(target, preds, xp=xp) + + confmat = _multilabel_confusion_matrix_compute( + tn, + fp, + fn, + tp, + num_labels, + normalize=None, + ) + return _mcc_reduce(confmat) diff --git a/cyclops/evaluate/metrics/experimental/matthews_corr_coef.py b/cyclops/evaluate/metrics/experimental/matthews_corr_coef.py new file mode 100644 index 000000000..ce9c75c20 --- /dev/null +++ b/cyclops/evaluate/metrics/experimental/matthews_corr_coef.py @@ -0,0 +1,192 @@ +"""Matthews Correlation Coefficient (MCC) metric.""" + +from typing import Any, Optional, Tuple, Union + +from cyclops.evaluate.metrics.experimental.confusion_matrix import ( + BinaryConfusionMatrix, + MulticlassConfusionMatrix, + MultilabelConfusionMatrix, +) +from cyclops.evaluate.metrics.experimental.functional.confusion_matrix import ( + _binary_confusion_matrix_compute, + _multilabel_confusion_matrix_compute, +) +from cyclops.evaluate.metrics.experimental.functional.matthews_corr_coef import ( + _mcc_reduce, +) +from cyclops.evaluate.metrics.experimental.utils.types import Array + + +class BinaryMCC(BinaryConfusionMatrix, registry_key="binary_mcc"): + """A measure of the agreement between predicted and actual values. + + Parameters + ---------- + threshold : float, default=0.5 + The threshold value to use when binarizing the inputs. + ignore_index : int, optional, default=None + Specifies a target value that is ignored and does not contribute to the + metric. If `None`, all values are used. + **kwargs : Any + Additional keyword arguments common to all metrics. + + Examples + -------- + >>> import numpy.array_api as anp + >>> from cyclops.evaluate.metrics.experimental import BinaryMCC + >>> target = anp.asarray([0, 1, 0, 1, 0, 1]) + >>> preds = anp.asarray([0, 0, 1, 1, 0, 1]) + >>> metric = BinaryMCC() + >>> metric(target, preds) + Array(0.33333334, dtype=float32) + >>> target = anp.asarray([0, 1, 0, 1, 0, 1]) + >>> preds = anp.asarray([0.11, 0.22, 0.84, 0.73, 0.33, 0.92]) + >>> metric = BinaryMCC() + >>> metric(target, preds) + Array(0.33333334, dtype=float32) + + """ + + name: str = "Matthews Correlation Coefficient" + + def __init__( + self, + threshold: float = 0.5, + ignore_index: Optional[int] = None, + **kwargs: Any, + ) -> None: + """Initialize the class.""" + super().__init__(threshold, normalize=None, ignore_index=ignore_index, **kwargs) + + def _compute_metric(self) -> Array: + """Compute the confusion matrix.""" + tn, fp, fn, tp = self._final_state() + confmat = _binary_confusion_matrix_compute( + tp=tp, + fp=fp, + tn=tn, + fn=fn, + normalize=self.normalize, + ) + return _mcc_reduce(confmat) + + +class MulticlassMCC(MulticlassConfusionMatrix, registry_key="multiclass_mcc"): + """A measure of the agreement between predicted and actual values. + + Parameters + ---------- + num_classes : int + The number of classes. + ignore_index : int, optional, default=None + Specifies a target value that is ignored and does not contribute to the + metric. If `None`, all values are used. + **kwargs : Any + Additional keyword arguments common to all metrics. + + Examples + -------- + >>> import numpy.array_api as anp + >>> from cyclops.evaluate.metrics.experimental import MulticlassMCC + >>> target = anp.asarray([2, 1, 0, 0]) + >>> preds = anp.asarray([2, 1, 0, 1]) + >>> metric = MulticlassMCC(num_classes=3) + >>> metric(target, preds) + Array(0.7, dtype=float32) + >>> target = anp.asarray([2, 1, 0, 0]) + >>> preds = anp.asarray( + ... [ + ... [0.16, 0.26, 0.58], + ... [0.22, 0.61, 0.17], + ... [0.71, 0.09, 0.20], + ... [0.05, 0.82, 0.13], + ... ] + ... ) + >>> metric = MulticlassMCC(num_classes=3) + >>> metric(target, preds) + Array(0.7, dtype=float32) + """ + + name: str = "Matthews Correlation Coefficient" + + def __init__( + self, + num_classes: int, + ignore_index: Optional[Union[int, Tuple[int]]] = None, + **kwargs: Any, + ) -> None: + """Initialize the class.""" + super().__init__( + num_classes=num_classes, + normalize=None, + ignore_index=ignore_index, + **kwargs, + ) + + def _compute_metric(self) -> Array: + """Compute the confusion matrix.""" + return _mcc_reduce(self.confmat) # type: ignore + + +class MultilabelMCC(MultilabelConfusionMatrix, registry_key="multilabel_mcc"): + """A measure of the agreement between predicted and actual values. + + Parameters + ---------- + num_labels : int + The number of labels. + threshold : float, default=0.5 + The threshold value to use when binarizing the inputs. + ignore_index : int, optional, default=None + Specifies a target value that is ignored and does not contribute to the + metric. If `None`, all values are used. + **kwargs : Any + Additional keyword arguments common to all metrics. + + Examples + -------- + >>> import numpy.array_api as anp + >>> from cyclops.evaluate.metrics.experimental import MultilabelMCC + >>> target = anp.asarray([[0, 1, 0], [1, 0, 1]]) + >>> preds = anp.asarray([[0, 0, 1], [1, 0, 1]]) + >>> metric = MultilabelMCC(num_labels=3) + >>> metric(target, preds) + Array(0.33333334, dtype=float32) + >>> target = anp.asarray([[0, 1, 0], [1, 0, 1]]) + >>> preds = anp.asarray([[0.11, 0.22, 0.84], [0.73, 0.33, 0.92]]) + >>> metric = MultilabelMCC(num_labels=3) + >>> metric(target, preds) + Array(0.33333334, dtype=float32) + + """ + + name: str = "Matthews Correlation Coefficient" + + def __init__( + self, + num_labels: int, + threshold: float = 0.5, + ignore_index: Optional[int] = None, + **kwargs: Any, + ) -> None: + """Initialize the class.""" + super().__init__( + num_labels=num_labels, + threshold=threshold, + normalize=None, + ignore_index=ignore_index, + **kwargs, + ) + + def _compute_metric(self) -> Array: + """Compute the confusion matrix.""" + tn, fp, fn, tp = self._final_state() + confmat = _multilabel_confusion_matrix_compute( + tp=tp, + fp=fp, + tn=tn, + fn=fn, + num_labels=self.num_labels, + normalize=self.normalize, + ) + return _mcc_reduce(confmat) diff --git a/cyclops/utils/index.py b/cyclops/utils/index.py index 2e3941a02..3b281ec7a 100644 --- a/cyclops/utils/index.py +++ b/cyclops/utils/index.py @@ -3,6 +3,7 @@ from typing import Any, List, Optional, Sequence, Tuple, Union import numpy as np +import numpy.typing as npt def index_axis(ind: int, axis: int, shape: Tuple[int, ...]) -> Tuple[Any, ...]: @@ -33,9 +34,9 @@ def index_axis(ind: int, axis: int, shape: Tuple[int, ...]) -> Tuple[Any, ...]: def take_indices( - data: np.typing.NDArray[Any], - indexes: Sequence[Optional[Union[Sequence[int], np.typing.NDArray[Any]]]], -) -> np.typing.NDArray[Any]: + data: npt.NDArray[Any], + indexes: Sequence[Optional[Union[Sequence[int], npt.NDArray[Any]]]], +) -> npt.NDArray[Any]: """Index array by specifying the indices to take on each axis. Parameters @@ -69,10 +70,10 @@ def take_indices( def take_indices_over_axis( - data: np.typing.NDArray[Any], + data: npt.NDArray[Any], axis: int, - index: Union[np.typing.NDArray[Any], Sequence[int]], -) -> np.typing.NDArray[Any]: + index: Union[npt.NDArray[Any], Sequence[int]], +) -> npt.NDArray[Any]: """Take indices along an axis. Parameters diff --git a/tests/cyclops/evaluate/metrics/experimental/inputs.py b/tests/cyclops/evaluate/metrics/experimental/inputs.py index 92af7b9e6..d38d1d852 100644 --- a/tests/cyclops/evaluate/metrics/experimental/inputs.py +++ b/tests/cyclops/evaluate/metrics/experimental/inputs.py @@ -1,4 +1,5 @@ """Input data for tests of metrics in cyclops/evaluate/metrics/experimental.""" + import random from collections import namedtuple from types import ModuleType @@ -296,43 +297,46 @@ def _multilabel_cases(*, xp: Any): return ( pytest.param( InputSpec( - target=xp.asarray(_multilabel_labels), - preds=xp.asarray(_multilabel_preds), + target=xp.asarray(_multilabel_labels, dtype=xp.int32), + preds=xp.asarray(_multilabel_preds, dtype=xp.int32), ), id="input[2d-labels]", ), pytest.param( InputSpec( - target=xp.asarray(_multilabel_labels_multidim), - preds=xp.asarray(_multilabel_preds_multidim), + target=xp.asarray(_multilabel_labels_multidim, dtype=xp.int32), + preds=xp.asarray(_multilabel_preds_multidim, dtype=xp.int32), ), id="input[multidim-labels]", ), pytest.param( InputSpec( - target=xp.asarray(_multilabel_labels), - preds=xp.asarray(_multilabel_probs), + target=xp.asarray(_multilabel_labels, dtype=xp.int32), + preds=xp.asarray(_multilabel_probs, dtype=xp.float32), ), id="input[2d-probs]", ), pytest.param( InputSpec( - target=xp.asarray(_multilabel_labels), - preds=xp.asarray(_inv_sigmoid(_multilabel_probs)), + target=xp.asarray(_multilabel_labels, dtype=xp.int32), + preds=xp.asarray(_inv_sigmoid(_multilabel_probs), dtype=xp.float32), ), id="input[2d-logits]", ), pytest.param( InputSpec( - target=xp.asarray(_multilabel_labels_multidim), - preds=xp.asarray(_multilabel_probs_multidim), + target=xp.asarray(_multilabel_labels_multidim, dtype=xp.int32), + preds=xp.asarray(_multilabel_probs_multidim, dtype=xp.float32), ), id="input[multidim-probs]", ), pytest.param( InputSpec( - target=xp.asarray(_multilabel_labels_multidim), - preds=xp.asarray(_inv_sigmoid(_multilabel_probs_multidim)), + target=xp.asarray(_multilabel_labels_multidim, dtype=xp.int32), + preds=xp.asarray( + _inv_sigmoid(_multilabel_probs_multidim), + dtype=xp.float32, + ), ), id="input[multidim-logits]", ), diff --git a/tests/cyclops/evaluate/metrics/experimental/test_confusion_matrix.py b/tests/cyclops/evaluate/metrics/experimental/test_confusion_matrix.py index 2d94c03fa..860bf4ba3 100644 --- a/tests/cyclops/evaluate/metrics/experimental/test_confusion_matrix.py +++ b/tests/cyclops/evaluate/metrics/experimental/test_confusion_matrix.py @@ -1,4 +1,5 @@ """Test confusion matrix metrics.""" + from functools import partial import array_api_compat as apc @@ -390,7 +391,7 @@ def test_multilabel_confusion_matrix_class_with_numpy_array_api_arrays( ) @pytest.mark.integration_test() # machine for integration tests has GPU - @pytest.mark.parametrize("inputs", _multilabel_cases(xp=anp)) + @pytest.mark.parametrize("inputs", _multilabel_cases(xp=array_api_compat.torch)) @pytest.mark.parametrize("normalize", [None, "true", "pred", "all"]) @pytest.mark.parametrize("ignore_index", [None, 0, -1]) def test_multilabel_confusion_matrix_class_with_torch_tensors( @@ -405,6 +406,8 @@ def test_multilabel_confusion_matrix_class_with_torch_tensors( if ignore_index is not None: target = _inject_ignore_index(target, ignore_index) + device = "cuda" if torch.cuda.is_available() else "cpu" + self.run_metric_class_implementation_test( target, preds, @@ -421,4 +424,6 @@ def test_multilabel_confusion_matrix_class_with_torch_tensors( "normalize": normalize, "ignore_index": ignore_index, }, + device=device, + use_device_for_ref=True, ) diff --git a/tests/cyclops/evaluate/metrics/experimental/test_f_score.py b/tests/cyclops/evaluate/metrics/experimental/test_f_score.py index 5c9afe687..73f83dda5 100644 --- a/tests/cyclops/evaluate/metrics/experimental/test_f_score.py +++ b/tests/cyclops/evaluate/metrics/experimental/test_f_score.py @@ -1,4 +1,5 @@ """Tests for the F-score metric.""" + from functools import partial from typing import Literal, Optional @@ -79,9 +80,11 @@ def test_binary_fbeta_score_function_with_numpy_array_api_arrays( self.run_metric_function_implementation_test( target, preds, - metric_function=binary_f1_score - if beta == 1.0 - else partial(binary_fbeta_score, beta=beta), + metric_function=( + binary_f1_score + if beta == 1.0 + else partial(binary_fbeta_score, beta=beta) + ), metric_args={"threshold": THRESHOLD, "ignore_index": ignore_index}, reference_metric=partial( _binary_fbeta_score_reference, @@ -119,9 +122,9 @@ def test_binary_fbeta_score_class_with_numpy_array_api_arrays( self.run_metric_class_implementation_test( target, preds, - metric_class=BinaryF1Score - if beta == 1.0 - else partial(BinaryFBetaScore, beta=beta), + metric_class=( + BinaryF1Score if beta == 1.0 else partial(BinaryFBetaScore, beta=beta) + ), metric_args={"threshold": THRESHOLD, "ignore_index": ignore_index}, reference_metric=partial( _binary_fbeta_score_reference, @@ -162,9 +165,9 @@ def test_binary_fbeta_class_with_torch_tensors( self.run_metric_class_implementation_test( target, preds, - metric_class=BinaryF1Score - if beta == 1.0 - else partial(BinaryFBetaScore, beta=beta), + metric_class=( + BinaryF1Score if beta == 1.0 else partial(BinaryFBetaScore, beta=beta) + ), metric_args={"threshold": THRESHOLD, "ignore_index": ignore_index}, reference_metric=partial( _binary_fbeta_score_reference, @@ -241,9 +244,11 @@ def test_multiclass_fbeta_score_with_numpy_array_api_arrays( self.run_metric_function_implementation_test( target, preds, - metric_function=multiclass_f1_score - if beta == 1.0 - else partial(multiclass_fbeta_score, beta=beta), + metric_function=( + multiclass_f1_score + if beta == 1.0 + else partial(multiclass_fbeta_score, beta=beta) + ), metric_args={ "num_classes": NUM_CLASSES, "top_k": top_k, @@ -292,9 +297,11 @@ def test_multiclass_fbeta_score_class_with_numpy_array_api_arrays( self.run_metric_class_implementation_test( target, preds, - metric_class=MulticlassF1Score - if beta == 1.0 - else partial(MulticlassFBetaScore, beta=beta), + metric_class=( + MulticlassF1Score + if beta == 1.0 + else partial(MulticlassFBetaScore, beta=beta) + ), reference_metric=partial( _multiclass_fbeta_score_reference, beta=beta, @@ -346,9 +353,11 @@ def test_multiclass_fbeta_score_class_with_torch_tensors( self.run_metric_class_implementation_test( target, preds, - metric_class=MulticlassF1Score - if beta == 1.0 - else partial(MulticlassFBetaScore, beta=beta), + metric_class=( + MulticlassF1Score + if beta == 1.0 + else partial(MulticlassFBetaScore, beta=beta) + ), reference_metric=partial( _multiclass_fbeta_score_reference, beta=beta, @@ -411,9 +420,11 @@ def test_multilabel_fbeta_score_with_numpy_array_api_arrays( self.run_metric_function_implementation_test( target, preds, - metric_function=multilabel_f1_score - if beta == 1.0 - else partial(multilabel_fbeta_score, beta=beta), + metric_function=( + multilabel_f1_score + if beta == 1.0 + else partial(multilabel_fbeta_score, beta=beta) + ), reference_metric=partial( _multilabel_fbeta_score_reference, beta=beta, @@ -446,9 +457,11 @@ def test_multilabel_fbeta_score_class_with_numpy_array_api_arrays( self.run_metric_class_implementation_test( target, preds, - metric_class=MultilabelF1Score - if beta == 1.0 - else partial(MultilabelFBetaScore, beta=beta), + metric_class=( + MultilabelF1Score + if beta == 1.0 + else partial(MultilabelFBetaScore, beta=beta) + ), reference_metric=partial( _multilabel_fbeta_score_reference, beta=beta, @@ -466,7 +479,7 @@ def test_multilabel_fbeta_score_class_with_numpy_array_api_arrays( ) @pytest.mark.integration_test() # machine for integration tests has GPU - @pytest.mark.parametrize("inputs", _multilabel_cases(xp=anp)) + @pytest.mark.parametrize("inputs", _multilabel_cases(xp=array_api_compat.torch)) @pytest.mark.parametrize("average", [None, "micro", "macro", "weighted"]) @pytest.mark.parametrize("ignore_index", [None, 0, -1]) def test_multilabel_fbeta_score_class_with_torch_tensors( @@ -479,12 +492,19 @@ def test_multilabel_fbeta_score_class_with_torch_tensors( """Test class for multilabel fbeta score with torch tensors.""" target, preds = inputs + if ignore_index is not None: + target = _inject_ignore_index(target, ignore_index) + + device = "cuda" if torch.cuda.is_available() else "cpu" + self.run_metric_class_implementation_test( target, preds, - metric_class=MultilabelF1Score - if beta == 1.0 - else partial(MultilabelFBetaScore, beta=beta), + metric_class=( + MultilabelF1Score + if beta == 1.0 + else partial(MultilabelFBetaScore, beta=beta) + ), reference_metric=partial( _multilabel_fbeta_score_reference, beta=beta, @@ -499,6 +519,8 @@ def test_multilabel_fbeta_score_class_with_torch_tensors( "average": average, "ignore_index": ignore_index, }, + device=device, + use_device_for_ref=True, ) diff --git a/tests/cyclops/evaluate/metrics/experimental/test_matthews_corr_coef.py b/tests/cyclops/evaluate/metrics/experimental/test_matthews_corr_coef.py new file mode 100644 index 000000000..44aecbdbf --- /dev/null +++ b/tests/cyclops/evaluate/metrics/experimental/test_matthews_corr_coef.py @@ -0,0 +1,385 @@ +"""Test matthews correlation coefficient metrics.""" + +from functools import partial + +import array_api_compat as apc +import array_api_compat.torch +import numpy.array_api as anp +import pytest +import torch.utils.dlpack +from torchmetrics.functional.classification import ( + binary_matthews_corrcoef, + multiclass_matthews_corrcoef, + multilabel_matthews_corrcoef, +) + +from cyclops.evaluate.metrics.experimental.functional.matthews_corr_coef import ( + binary_mcc, + multiclass_mcc, + multilabel_mcc, +) +from cyclops.evaluate.metrics.experimental.matthews_corr_coef import ( + BinaryMCC, + MulticlassMCC, + MultilabelMCC, +) +from cyclops.evaluate.metrics.experimental.utils.ops import to_int +from cyclops.evaluate.metrics.experimental.utils.validation import is_floating_point + +from ..conftest import NUM_CLASSES, NUM_LABELS, THRESHOLD +from .inputs import _binary_cases, _multiclass_cases, _multilabel_cases +from .testers import MetricTester, _inject_ignore_index + + +def _binary_mcc_reference( + target, + preds, + threshold, + ignore_index, +) -> torch.Tensor: + """Return the reference binary matthews correlation coefficient.""" + return binary_matthews_corrcoef( + torch.utils.dlpack.from_dlpack(preds), + torch.utils.dlpack.from_dlpack(target), + threshold=threshold, + ignore_index=ignore_index, + ) + + +class TestBinaryMCC(MetricTester): + """Test binary matthews correlation coefficient function and class.""" + + @pytest.mark.parametrize("inputs", _binary_cases(xp=anp)) + @pytest.mark.parametrize("ignore_index", [None, 0, -1]) + def test_binary_mcc_function_with_numpy_array_api_arrays( + self, + inputs, + ignore_index, + ) -> None: + """Test function for binary matthews corrcoef using numpy.array_api arrays.""" + target, preds = inputs + + if ignore_index is not None: + target = _inject_ignore_index(target, ignore_index) + + self.run_metric_function_implementation_test( + target, + preds, + metric_function=binary_mcc, + metric_args={ + "threshold": THRESHOLD, + "ignore_index": ignore_index, + }, + reference_metric=partial( + _binary_mcc_reference, + threshold=THRESHOLD, + ignore_index=ignore_index, + ), + ) + + @pytest.mark.parametrize("inputs", _binary_cases(xp=anp)) + @pytest.mark.parametrize("ignore_index", [None, 0, -1]) + def test_binary_mcc_class_with_numpy_array_api_arrays( + self, + inputs, + ignore_index, + ) -> None: + """Test class for binary matthews correlation coefficient.""" + target, preds = inputs + + if ( + preds.ndim == 1 + and is_floating_point(preds) + and not anp.all(to_int((preds >= 0)) * to_int((preds <= 1))) + ): + pytest.skip( + "When using 0-D logits, batch result will be different from local " + "result because the `sigmoid` operation may not be applied to each " + "batch (some values may be in [0, 1] and some may not).", + ) + + if ignore_index is not None: + target = _inject_ignore_index(target, ignore_index) + + self.run_metric_class_implementation_test( + target, + preds, + metric_class=BinaryMCC, + metric_args={ + "threshold": THRESHOLD, + "ignore_index": ignore_index, + }, + reference_metric=partial( + _binary_mcc_reference, + threshold=THRESHOLD, + ignore_index=ignore_index, + ), + ) + + @pytest.mark.integration_test() # machine for integration tests has GPU + @pytest.mark.parametrize("inputs", _binary_cases(xp=array_api_compat.torch)) + @pytest.mark.parametrize("ignore_index", [None, 0, -1]) + def test_binary_mcc_class_with_torch_tensors( + self, + inputs, + ignore_index, + ) -> None: + """Test binary matthews correlation coefficient class with torch tensors.""" + target, preds = inputs + + if ( + preds.ndim == 1 + and is_floating_point(preds) + and not torch.all(to_int((preds >= 0)) * to_int((preds <= 1))) + ): + pytest.skip( + "When using 0-D logits, batch result will be different from local " + "result because the `sigmoid` operation may not be applied to each " + "batch (some values may be in [0, 1] and some may not).", + ) + + if ignore_index is not None: + target = _inject_ignore_index(target, ignore_index) + + device = "cuda" if torch.cuda.is_available() else "cpu" + + self.run_metric_class_implementation_test( + target, + preds, + metric_class=BinaryMCC, + metric_args={ + "threshold": THRESHOLD, + "ignore_index": ignore_index, + }, + reference_metric=partial( + _binary_mcc_reference, + threshold=THRESHOLD, + ignore_index=ignore_index, + ), + device=device, + use_device_for_ref=True, + ) + + +def _multiclass_mcc_reference( + target, + preds, + num_classes=NUM_CLASSES, + ignore_index=None, +) -> torch.Tensor: + """Return the reference multiclass matthews correlation coefficient.""" + if preds.ndim == 1 and is_floating_point(preds): + xp = apc.array_namespace(preds) + preds = xp.argmax(preds, axis=0) + + return multiclass_matthews_corrcoef( + torch.utils.dlpack.from_dlpack(preds), + torch.utils.dlpack.from_dlpack(target), + num_classes, + ignore_index=ignore_index, + ) + + +class TestMulticlassMCC(MetricTester): + """Test multiclass matthews correlation coefficient function and class.""" + + @pytest.mark.parametrize("inputs", _multiclass_cases(xp=anp)) + @pytest.mark.parametrize("ignore_index", [None, 0, -1]) + def test_multiclass_mcc_with_numpy_array_api_arrays( + self, + inputs, + ignore_index, + ) -> None: + """Test function for multiclass matthews correlation coefficient.""" + target, preds = inputs + + if ignore_index is not None: + target = _inject_ignore_index(target, ignore_index) + + self.run_metric_function_implementation_test( + target, + preds, + metric_function=multiclass_mcc, + metric_args={ + "num_classes": NUM_CLASSES, + "ignore_index": ignore_index, + }, + reference_metric=partial( + _multiclass_mcc_reference, + ignore_index=ignore_index, + ), + ) + + @pytest.mark.parametrize("inputs", _multiclass_cases(xp=anp)) + @pytest.mark.parametrize("ignore_index", [None, 1, -1]) + def test_multiclass_mcc_class_with_numpy_array_api_arrays( + self, + inputs, + ignore_index, + ) -> None: + """Test class for multiclass matthews correlation coefficient.""" + target, preds = inputs + + if ignore_index is not None: + target = _inject_ignore_index(target, ignore_index) + + self.run_metric_class_implementation_test( + target, + preds, + metric_class=MulticlassMCC, + reference_metric=partial( + _multiclass_mcc_reference, + ignore_index=ignore_index, + ), + metric_args={ + "num_classes": NUM_CLASSES, + "ignore_index": ignore_index, + }, + ) + + @pytest.mark.integration_test() # machine for integration tests has GPU + @pytest.mark.parametrize("inputs", _multiclass_cases(xp=array_api_compat.torch)) + @pytest.mark.parametrize("ignore_index", [None, 1, -1]) + def test_multiclass_mcc_class_with_torch_tensors( + self, + inputs, + ignore_index, + ) -> None: + """Test class for multiclass matthews correlation coefficient.""" + target, preds = inputs + + if ignore_index is not None: + target = _inject_ignore_index(target, ignore_index) + + device = "cuda" if torch.cuda.is_available() else "cpu" + + self.run_metric_class_implementation_test( + target, + preds, + metric_class=MulticlassMCC, + reference_metric=partial( + _multiclass_mcc_reference, + ignore_index=ignore_index, + ), + metric_args={ + "num_classes": NUM_CLASSES, + "ignore_index": ignore_index, + }, + device=device, + use_device_for_ref=True, + ) + + +def _multilabel_mcc_reference( + target, + preds, + threshold, + num_labels=NUM_LABELS, + ignore_index=None, +) -> torch.Tensor: + """Return the reference multilabel matthews correlation coefficient.""" + return multilabel_matthews_corrcoef( + torch.utils.dlpack.from_dlpack(preds), + torch.utils.dlpack.from_dlpack(target), + num_labels, + threshold=threshold, + ignore_index=ignore_index, + ) + + +class TestMultilabelMCC(MetricTester): + """Test multilabel matthews correlation coefficient function and class.""" + + atol: float = 4e-8 + + @pytest.mark.parametrize("inputs", _multilabel_cases(xp=anp)) + @pytest.mark.parametrize("ignore_index", [None, 0, -1]) + def test_multilabel_mcc_with_numpy_array_api_arrays( + self, + inputs, + ignore_index, + ) -> None: + """Test function for multilabel matthews correlation coefficient.""" + target, preds = inputs + + if ignore_index is not None: + target = _inject_ignore_index(target, ignore_index) + + self.run_metric_function_implementation_test( + target, + preds, + metric_function=multilabel_mcc, + reference_metric=partial( + _multilabel_mcc_reference, + threshold=THRESHOLD, + ignore_index=ignore_index, + ), + metric_args={ + "threshold": THRESHOLD, + "num_labels": NUM_LABELS, + "ignore_index": ignore_index, + }, + ) + + @pytest.mark.parametrize("inputs", _multilabel_cases(xp=anp)) + @pytest.mark.parametrize("ignore_index", [None, 0, -1]) + def test_multilabel_mcc_class_with_numpy_array_api_arrays( + self, + inputs, + ignore_index, + ) -> None: + """Test class for multilabel matthews correlation coefficient.""" + target, preds = inputs + + if ignore_index is not None: + target = _inject_ignore_index(target, ignore_index) + + self.run_metric_class_implementation_test( + target, + preds, + metric_class=MultilabelMCC, + reference_metric=partial( + _multilabel_mcc_reference, + threshold=THRESHOLD, + ignore_index=ignore_index, + ), + metric_args={ + "threshold": THRESHOLD, + "num_labels": NUM_LABELS, + "ignore_index": ignore_index, + }, + ) + + @pytest.mark.integration_test() # machine for integration tests has GPU + @pytest.mark.parametrize("inputs", _multilabel_cases(xp=array_api_compat.torch)) + @pytest.mark.parametrize("ignore_index", [None, 0, -1]) + def test_multilabel_mcc_class_with_torch_tensors( + self, + inputs, + ignore_index, + ) -> None: + """Test class for multilabel matthews correlation coefficient.""" + target, preds = inputs + + if ignore_index is not None: + target = _inject_ignore_index(target, ignore_index) + + device = "cuda" if torch.cuda.is_available() else "cpu" + + self.run_metric_class_implementation_test( + target, + preds, + metric_class=MultilabelMCC, + reference_metric=partial( + _multilabel_mcc_reference, + threshold=THRESHOLD, + ignore_index=ignore_index, + ), + metric_args={ + "threshold": THRESHOLD, + "num_labels": NUM_LABELS, + "ignore_index": ignore_index, + }, + device=device, + use_device_for_ref=True, + ) diff --git a/tests/cyclops/evaluate/metrics/experimental/test_negative_predicitve_value.py b/tests/cyclops/evaluate/metrics/experimental/test_negative_predicitve_value.py index b07f2e7ea..6d0d057be 100644 --- a/tests/cyclops/evaluate/metrics/experimental/test_negative_predicitve_value.py +++ b/tests/cyclops/evaluate/metrics/experimental/test_negative_predicitve_value.py @@ -1,4 +1,5 @@ """Test negative predictive value.""" + from functools import partial from typing import Literal, Optional @@ -476,7 +477,7 @@ def test_multilabel_npv_class_with_numpy_array_api_arrays( ) @pytest.mark.integration_test() # machine for integration tests has GPU - @pytest.mark.parametrize("inputs", _multilabel_cases(xp=anp)) + @pytest.mark.parametrize("inputs", _multilabel_cases(xp=array_api_compat.torch)) @pytest.mark.parametrize("average", [None, "micro", "macro", "weighted"]) @pytest.mark.parametrize("ignore_index", [None, 0, -1]) def test_multilabel_npv_class_with_torch_tensors( @@ -488,6 +489,11 @@ def test_multilabel_npv_class_with_torch_tensors( """Test class for multilabel negative predictive value with torch tensors.""" target, preds = inputs + if ignore_index is not None: + target = _inject_ignore_index(target, ignore_index) + + device = "cuda" if torch.cuda.is_available() else "cpu" + self.run_metric_class_implementation_test( target, preds, @@ -505,6 +511,8 @@ def test_multilabel_npv_class_with_torch_tensors( "average": average, "ignore_index": ignore_index, }, + device=device, + use_device_for_ref=True, ) diff --git a/tests/cyclops/evaluate/metrics/experimental/test_precision_recall.py b/tests/cyclops/evaluate/metrics/experimental/test_precision_recall.py index 14c3c3a96..8b24b2d75 100644 --- a/tests/cyclops/evaluate/metrics/experimental/test_precision_recall.py +++ b/tests/cyclops/evaluate/metrics/experimental/test_precision_recall.py @@ -1,4 +1,5 @@ """Test precision recall metrics.""" + from functools import partial from typing import Literal, Optional @@ -730,7 +731,7 @@ def test_multilabel_precision_class_with_numpy_array_api_arrays( ) @pytest.mark.integration_test() # machine for integration tests has GPU - @pytest.mark.parametrize("inputs", _multilabel_cases(xp=anp)) + @pytest.mark.parametrize("inputs", _multilabel_cases(xp=array_api_compat.torch)) @pytest.mark.parametrize("average", [None, "micro", "macro", "weighted"]) @pytest.mark.parametrize("ignore_index", [None, 0, -1]) def test_multilabel_precision_class_with_torch_tensors( @@ -742,6 +743,11 @@ def test_multilabel_precision_class_with_torch_tensors( """Test class for multilabel precision with torch tensors.""" target, preds = inputs + if ignore_index is not None: + target = _inject_ignore_index(target, ignore_index) + + device = "cuda" if torch.cuda.is_available() else "cpu" + self.run_metric_class_implementation_test( target, preds, @@ -760,6 +766,8 @@ def test_multilabel_precision_class_with_torch_tensors( "average": average, "ignore_index": ignore_index, }, + device=device, + use_device_for_ref=True, ) diff --git a/tests/cyclops/evaluate/metrics/experimental/test_precision_recall_curve.py b/tests/cyclops/evaluate/metrics/experimental/test_precision_recall_curve.py index 4dc5989fd..081ebd1e9 100644 --- a/tests/cyclops/evaluate/metrics/experimental/test_precision_recall_curve.py +++ b/tests/cyclops/evaluate/metrics/experimental/test_precision_recall_curve.py @@ -1,4 +1,5 @@ """Test precision-recall curve metric.""" + from functools import partial from typing import List, Tuple, Union @@ -45,9 +46,11 @@ def _binary_precision_recall_curve_reference( return tm_binary_precision_recall_curve( torch.utils.dlpack.from_dlpack(preds), torch.utils.dlpack.from_dlpack(target), - thresholds=torch.utils.dlpack.from_dlpack(thresholds) - if apc.is_array_api_obj(thresholds) - else thresholds, + thresholds=( + torch.utils.dlpack.from_dlpack(thresholds) + if apc.is_array_api_obj(thresholds) + else thresholds + ), ignore_index=ignore_index, ) @@ -215,9 +218,11 @@ def _multiclass_precision_recall_curve_reference( torch.utils.dlpack.from_dlpack(preds), torch.utils.dlpack.from_dlpack(target), num_classes, - thresholds=torch.utils.dlpack.from_dlpack(thresholds) - if apc.is_array_api_obj(thresholds) - else thresholds, + thresholds=( + torch.utils.dlpack.from_dlpack(thresholds) + if apc.is_array_api_obj(thresholds) + else thresholds + ), ignore_index=ignore_index, ) @@ -371,9 +376,11 @@ def _multilabel_precision_recall_curve_reference( torch.utils.dlpack.from_dlpack(preds), torch.utils.dlpack.from_dlpack(target), num_labels, - thresholds=torch.utils.dlpack.from_dlpack(thresholds) - if apc.is_array_api_obj(thresholds) - else thresholds, + thresholds=( + torch.utils.dlpack.from_dlpack(thresholds) + if apc.is_array_api_obj(thresholds) + else thresholds + ), ignore_index=ignore_index, ) @@ -381,6 +388,8 @@ def _multilabel_precision_recall_curve_reference( class TestMultilabelPrecisionRecallCurve(MetricTester): """Test multilabel precision-recall curve function and class.""" + atol: float = 2e-7 + @pytest.mark.parametrize("inputs", _multilabel_cases(xp=anp)[2:]) @pytest.mark.parametrize("thresholds", _thresholds(xp=anp)) @pytest.mark.parametrize("ignore_index", [None, 0, -1]) diff --git a/tests/cyclops/evaluate/metrics/experimental/test_roc.py b/tests/cyclops/evaluate/metrics/experimental/test_roc.py index ddc4f9556..17a4fff5a 100644 --- a/tests/cyclops/evaluate/metrics/experimental/test_roc.py +++ b/tests/cyclops/evaluate/metrics/experimental/test_roc.py @@ -1,4 +1,5 @@ """Test roc curve metric.""" + from functools import partial from typing import List, Tuple, Union @@ -45,9 +46,11 @@ def _binary_roc_reference( return tm_binary_roc( torch.utils.dlpack.from_dlpack(preds), torch.utils.dlpack.from_dlpack(target), - thresholds=torch.utils.dlpack.from_dlpack(thresholds) - if apc.is_array_api_obj(thresholds) - else thresholds, + thresholds=( + torch.utils.dlpack.from_dlpack(thresholds) + if apc.is_array_api_obj(thresholds) + else thresholds + ), ignore_index=ignore_index, ) @@ -215,9 +218,11 @@ def _multiclass_roc_reference( torch.utils.dlpack.from_dlpack(preds), torch.utils.dlpack.from_dlpack(target), num_classes, - thresholds=torch.utils.dlpack.from_dlpack(thresholds) - if apc.is_array_api_obj(thresholds) - else thresholds, + thresholds=( + torch.utils.dlpack.from_dlpack(thresholds) + if apc.is_array_api_obj(thresholds) + else thresholds + ), ignore_index=ignore_index, ) @@ -371,9 +376,11 @@ def _multilabel_roc_reference( torch.utils.dlpack.from_dlpack(preds), torch.utils.dlpack.from_dlpack(target), num_labels, - thresholds=torch.utils.dlpack.from_dlpack(thresholds) - if apc.is_array_api_obj(thresholds) - else thresholds, + thresholds=( + torch.utils.dlpack.from_dlpack(thresholds) + if apc.is_array_api_obj(thresholds) + else thresholds + ), ignore_index=ignore_index, ) @@ -381,6 +388,8 @@ def _multilabel_roc_reference( class TestMultilabelROC(MetricTester): """Test multilabel roc curve function and class.""" + atol: float = 9e-8 + @pytest.mark.parametrize("inputs", _multilabel_cases(xp=anp)[2:]) @pytest.mark.parametrize("thresholds", _thresholds(xp=anp)) @pytest.mark.parametrize("ignore_index", [None, 0, -1]) diff --git a/tests/cyclops/evaluate/metrics/experimental/test_specificity.py b/tests/cyclops/evaluate/metrics/experimental/test_specificity.py index 035edbada..b4f40b12c 100644 --- a/tests/cyclops/evaluate/metrics/experimental/test_specificity.py +++ b/tests/cyclops/evaluate/metrics/experimental/test_specificity.py @@ -1,4 +1,5 @@ """Test specificity.""" + from functools import partial from typing import Literal, Optional @@ -417,7 +418,7 @@ def test_multilabel_specificity_class_with_numpy_array_api_arrays( ) @pytest.mark.integration_test() # machine for integration tests has GPU - @pytest.mark.parametrize("inputs", _multilabel_cases(xp=anp)) + @pytest.mark.parametrize("inputs", _multilabel_cases(xp=array_api_compat.torch)) @pytest.mark.parametrize("average", [None, "micro", "macro", "weighted"]) @pytest.mark.parametrize("ignore_index", [None, 0, -1]) def test_multilabel_specificity_class_with_torch_tensors( @@ -429,6 +430,11 @@ def test_multilabel_specificity_class_with_torch_tensors( """Test class for multilabel specificity with torch tensors.""" target, preds = inputs + if ignore_index is not None: + target = _inject_ignore_index(target, ignore_index) + + device = "cuda" if torch.cuda.is_available() else "cpu" + self.run_metric_class_implementation_test( target, preds, @@ -446,6 +452,8 @@ def test_multilabel_specificity_class_with_torch_tensors( "average": average, "ignore_index": ignore_index, }, + device=device, + use_device_for_ref=True, ) diff --git a/tests/cyclops/evaluate/metrics/experimental/testers.py b/tests/cyclops/evaluate/metrics/experimental/testers.py index 4ae8775dc..4d58305d3 100644 --- a/tests/cyclops/evaluate/metrics/experimental/testers.py +++ b/tests/cyclops/evaluate/metrics/experimental/testers.py @@ -1,4 +1,5 @@ """Testers for metrics.""" + from functools import partial from typing import Any, Callable, Dict, Optional, Sequence, Type @@ -19,8 +20,20 @@ def _assert_allclose( """Recursively assert that two results are within a certain tolerance.""" if apc.is_array_api_obj(cyclops_result) and apc.is_array_api_obj(ref_result): # move to cpu and convert to numpy - cyclops_result = np.from_dlpack(apc.to_device(cyclops_result, "cpu")) - ref_result = np.from_dlpack(apc.to_device(ref_result, "cpu")) + cyclops_result = np.from_dlpack( + ( + apc.to_device(cyclops_result, "cpu") + if apc.device(cyclops_result) != "cpu" + else cyclops_result + ), + ) + ref_result = np.from_dlpack( + ( + apc.to_device(ref_result, "cpu") + if apc.device(ref_result) != "cpu" + else ref_result + ), + ) np.testing.assert_allclose( cyclops_result,