diff --git a/CC/README.md b/CC/README.md new file mode 100644 index 00000000..242d07df --- /dev/null +++ b/CC/README.md @@ -0,0 +1,2 @@ +# Cognitive Computing + diff --git a/RE/README.md b/RE/README.md new file mode 100644 index 00000000..b4966c38 --- /dev/null +++ b/RE/README.md @@ -0,0 +1,4 @@ +# Research Efficiency + + + diff --git a/RE/paddleext/CHANGELOG.md b/RE/paddleext/CHANGELOG.md new file mode 100644 index 00000000..9aba09d6 --- /dev/null +++ b/RE/paddleext/CHANGELOG.md @@ -0,0 +1,21 @@ +Changelog +=== +以下记录了项目中所有值得关注的变更内容,其格式基于[Keep a Changelog]。 + +本项目版本遵守[Semantic Versioning]和[PEP-440]。 + +## [v1.0]- 2022-07-04 +--- +### Added +- Support the testing of some classification modules for paddlemetric +### Changed + + + + + +[v1.0]: https://console.cloud.baidu-int.com/devops/icode/repos/baidu/ccl/torch2paddle/commits/7476c4f8477d6161f8d5aaaf78f47d6bee990d42 + +[Keep a Changelog]: https://keepachangelog.com/zh-CN/1.0.0/ +[Semantic Versioning]: https://semver.org/lang/zh-CN/ +[PEP-440]: https://www.python.org/dev/peps/pep-0440/ diff --git a/RE/paddleext/README.md b/RE/paddleext/README.md new file mode 100644 index 00000000..03a9162c --- /dev/null +++ b/RE/paddleext/README.md @@ -0,0 +1,103 @@ +# Paddle Extension + +Paddle extensions, including implementation for torch apis. + +## Install + +* Clone the repo +* Add the path of paddleext folder to PYTHONPATH + +## Document + +### Seameless shift backend between Paddle and PyTorch + +* Add following code to the root __init__.py of your project +(assume your project name is PROJECT): + +```python + +import importlib +import sys +import os + +BACKEND = os.environ.get('BACKEND', 'paddle') + +if BACKEND == "paddle": + + from paddleext import torchapi + sys.modules["PROJECT.backend"] = torchapi + + try: + import paddlemetrics + sys.modules["PROJECT.metrics"] = paddlemetrics + except Exception as e: + pass + +elif BACKEND == "torch": + try: + import torch + import types + + class VirtualModule(types.ModuleType): + def __init__(self, module_name, sub_modules): + + super().__init__(module_name) + try: + import sys + sys.modules[module_name] = self + self._module_name = module_name + self._sub_modules = sub_modules + for sub_name, module in sub_modules.items(): + if sub_name is None: + sys.modules[f"{module_name}"] = module + else: + sys.modules[f"{module_name}.{sub_name}"] = module + except ImportError as err: + raise err # please signal error in some useful way :-) + + def __repr__(self): + return "Virtual module for " + self._module_name + + def __getattr__(self, attrname): + + if attrname in self._sub_modules.keys(): + import sys + return self._sub_modules[attrname] + else: + return super().__getattr__(attrname) + + + import pkgutil + + sub_modules = {None: torch} + for module_info in pkgutil.iter_modules(torch.__path__): + if not module_info.name.startswith("_"): + try: + module = importlib.import_module("torch." + module_info.name) + sub_modules[module_info.name] = module + except: + pass + + VirtualModule("PROJECT.backend", sub_modules) + + + except Exception as e: + raise e + + try: + import torchmetrics + + sys.modules["PROJECT.metrics"] = torchmetrics + except Exception as e: + pass + +``` +* set the environment variable BACKEND to "paddle" or "torch" to switch backend +* import the backend module in your code + +```python +import PROJECT.backend as B +from PROJECT.backend import nn +import PROJECT.metrics as M +``` +* replace all "torch." or "paddle." with "B." in your code \ No newline at end of file diff --git a/RE/paddleext/__init__.py b/RE/paddleext/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/RE/paddleext/paddleext/__init__.py b/RE/paddleext/paddleext/__init__.py new file mode 100644 index 00000000..7fd01739 --- /dev/null +++ b/RE/paddleext/paddleext/__init__.py @@ -0,0 +1,3 @@ + + +from . import torchapi \ No newline at end of file diff --git a/RE/paddleext/paddleext/torchapi/__init__.py b/RE/paddleext/paddleext/torchapi/__init__.py new file mode 100644 index 00000000..7ba3f89a --- /dev/null +++ b/RE/paddleext/paddleext/torchapi/__init__.py @@ -0,0 +1,74 @@ +import inspect + +from .core import * +from .tensor_ import * +from .functional import * +from . import sampler +from . import data +from . import nn +from . import distributed +from . import cuda +from . import optim + +#from . import paddle_func + +this_module = sys.modules[__name__] + + +def get_module_attribute(module, *args, **kwargs): + # Perform custom logic here + + obj = object.__getattribute__(module, *args, **kwargs) + + print("input module:", module) + print("result object", obj) + if isinstance(obj, types.FunctionType): + if not obj.__module__.startswith("paddleext.torchapi."): + return partial(paddle_delegate_func, obj) + else: + return obj + elif isinstance(obj, types.ModuleType): + print("result module: " + obj.__name__) + return ModuleDelegate(obj) + elif inspect.isclass(obj): + print("result class: " + obj.__name__) + return obj + else: + return obj + +class ModuleDelegate(object): + def __init__(self, module): + self.module = module + + def __getattribute__(self, *args, **kwargs): + + module = object.__getattribute__(self, "module") + result = object.__getattribute__(module, *args, **kwargs) + if isinstance(result, types.ModuleType): + return ModuleDelegate(result) + elif isinstance(result, types.FunctionType): + if not result.__module__.startswith("paddleext.torchapi."): + return partial(paddle_delegate_func, result) + else: + return result + elif inspect.isclass(result): + if result.__module__.startswith("paddle."): + return make_delegate_class(result) + else: + return result + else: + return result + + + # def __getattr__(self, *args, **kwargs): + # return get_module_attribute(self.module, *args, **kwargs), + + # def __delattr__(self, *args, **kwargs): + # return object.__delattr__(self.module, *args, **kwargs) + # + # def __dir__(self): + # return dir(self.module) + + + +sys.modules[__name__] = ModuleDelegate(sys.modules[__name__]) diff --git a/RE/paddleext/paddleext/torchapi/core.py b/RE/paddleext/paddleext/torchapi/core.py new file mode 100644 index 00000000..f7f1db0a --- /dev/null +++ b/RE/paddleext/paddleext/torchapi/core.py @@ -0,0 +1,115 @@ +""" +paddle core +""" +import sys +import types +from functools import partial +from types import MethodType +from typing import Any + +import paddle +import random +import numpy as np + +Module = paddle.nn.Layer +ModuleBase = paddle.nn.Layer +ModuleDict = paddle.nn.LayerDict +ModuleList = paddle.nn.LayerList +device=str + +dtype=paddle.dtype + +def load_state_dict(module: Module, state_dict, *args, **kwargs): + module.set_state_dict(state_dict, *args, **kwargs) + + +Module.load_state_dict = load_state_dict + +from paddle import * + +def deterministic(seed=0): + seed = 0 + random.seed(seed) + paddle.seed(seed) + np.random.seed(seed) + + +import paddle + +from paddle import bool, int32, int64, int8, float32, float64, float16 + +long = paddle.int64 +int = paddle.int32 +float = paddle.float32 +double = paddle.float64 + + +def platform(): + """ + + Returns: + + """ + + return "paddle" + + + +from paddle import no_grad, autograd + +class set_detect_anomaly(object): + r"""Context-manager that sets the anomaly detection for the autograd engine on or off. + ``set_detect_anomaly`` will enable or disable the autograd anomaly detection + based on its argument :attr:`mode`. + It can be used as a context-manager or as a function. + See ``detect_anomaly`` above for details of the anomaly detection behaviour. + Args: + mode (bool): Flag whether to enable anomaly detection (``True``), + or disable (``False``). + """ + + def __init__(self, mode: bool) -> None: + pass + + def __enter__(self) -> None: + pass + + def __exit__(self, *args: Any) -> None: + pass + + +setattr(autograd, "set_detect_anomaly", set_detect_anomaly) + + +def paddle_delegate_func(func, *args, **kwargs): + if "dim" in kwargs: + kwargs["axis"] = kwargs["dim"] + del kwargs["dim"] + + if "device" in kwargs: + del kwargs["device"] + + return func(*args, **kwargs) + +def make_delegate_class(class_): + + class DelegateClass(class_): + def __init__(self, *args, **kwargs): + + if class_.__name__.endswith("Linear"): + if "bias" in kwargs: + kwargs["bias_attr"] = kwargs["bias"] + del kwargs["bias"] + if "weight" in kwargs: + kwargs["weight_attr"] = kwargs["weight"] + del kwargs["weight"] + if class_.__name__.endswith("LayerNorm"): + if "eps" in kwargs: + kwargs["epsilon"] = kwargs["eps"] + del kwargs["eps"] + super().__init__(*args, **kwargs) +# self.__class__ = class_ + + return DelegateClass + + diff --git a/RE/paddleext/paddleext/torchapi/cuda.py b/RE/paddleext/paddleext/torchapi/cuda.py new file mode 100644 index 00000000..23c774c3 --- /dev/null +++ b/RE/paddleext/paddleext/torchapi/cuda.py @@ -0,0 +1,27 @@ + +import paddle + +_initialized=True +def is_available(): + + return paddle.device.cuda.device_count() > 0 + +def manual_seed_all(seed): + paddle.seed(seed) + + +def manual_seed(seed): + paddle.seed(seed) + + +def set_device(device): + return paddle.set_device(device) + + +def empty_cache(): + return + + +def device_count(): + + return paddle.device.cuda.device_count() \ No newline at end of file diff --git a/RE/paddleext/paddleext/torchapi/data.py b/RE/paddleext/paddleext/torchapi/data.py new file mode 100644 index 00000000..bd61f562 --- /dev/null +++ b/RE/paddleext/paddleext/torchapi/data.py @@ -0,0 +1,5 @@ +""" +data for paddle +""" + +from paddle.io import DataLoader, Dataset \ No newline at end of file diff --git a/RE/paddleext/paddleext/torchapi/distributed.py b/RE/paddleext/paddleext/torchapi/distributed.py new file mode 100644 index 00000000..b9bf7698 --- /dev/null +++ b/RE/paddleext/paddleext/torchapi/distributed.py @@ -0,0 +1,17 @@ + +import paddle + + +def is_available(): + return True + +DISTRIBUTED = False + +def is_initialized(): + return DISTRIBUTED + + +def init_process_group(*args, **kwargs): + + pass + diff --git a/RE/paddleext/paddleext/torchapi/functional.py b/RE/paddleext/paddleext/torchapi/functional.py new file mode 100644 index 00000000..8d2ad2d2 --- /dev/null +++ b/RE/paddleext/paddleext/torchapi/functional.py @@ -0,0 +1,485 @@ +import builtins +from collections import namedtuple + +import paddle +from paddle import Tensor +import numpy as np + +from paddle import is_tensor + +from paddle import less_than, less_equal, greater_than, greater_equal, equal + +from paddle.nn.functional import * + +from paddle import arange, ones_like, zeros_like, ones + +from paddle import logical_and, logical_not, logical_or, logical_xor + +from paddle import all, any + +from paddle import argmax, argmin + +from paddle import stack + +from paddle import einsum + +from paddle import inverse + +from paddle.linalg import * + + +def max_along_dim(input, dim=None, keepdim=False, *, out=None): + + if dim is None: + result = paddle.max(input) + return paddle.ones([], dtype=result.dtype) * result.item() + + max_val = paddle.max(input, axis=dim, keepdim=keepdim) + max_index = paddle.argmax(input, axis=dim) + + if out is not None: + out[0] = max_val + out[1] = max_index + + return (max_val, max_index) + +def max(input, *args, **kwargs): + + if len(args) == 0: + return max_along_dim(input, **kwargs) + + if isinstance(args[0], (int, list, tuple)): + return max_along_dim(input, *args, **kwargs) + elif isinstance(args[0], Tensor): + return paddle.maximum(input, args[0], *args[1:], **kwargs) + else: + raise Exception(f"unknown parameter combination") + + +def min_along_dim(input, dim=None, keepdim=False, *, out=None): + + if dim is None: + result = paddle.min(input) + return paddle.ones([], dtype=result.dtype) * result.item() + + min_val = paddle.min(input, axis=dim, keepdim=keepdim) + min_index = paddle.argmin(input, axis=dim) + + if out is not None: + out[0] = min_val + out[1] = min_index + + return (min_val, min_index) + + +def min(input, *args, **kwargs): + + if len(args) == 0: + return min_along_dim(input, **kwargs) + + if isinstance(args[0], (int, list, tuple)): + return min_along_dim(input, *args, **kwargs) + elif isinstance(args[0], Tensor): + return paddle.minimum(input, args[0], *args[1:], **kwargs) + else: + raise Exception(f"unknown parameter combination") + + +def lt(a, b): + if np.isscalar(a) or np.isscalar(b): + return a < b + else: + return less_than(a, b) + + +def le(a, b): + if np.isscalar(a) or np.isscalar(b): + return a <= b + else: + return less_equal(a, b) + + +def gt(a, b): + if np.isscalar(a) or np.isscalar(b): + return a > b + else: + return greater_than(a, b) + + +def ge(a, b): + if np.isscalar(a) or np.isscalar(b): + return a >= b + else: + return greater_equal(a, b) + + +def eq(a, b): + if np.isscalar(a) or np.isscalar(b): + return a == b + else: + return equal(a, b) + + +def standardize_dtype(type): + + if type == int: + return paddle.int64 + elif type == float: + return paddle.float32 + + return type + +def empty(*size, dtype=None, device=None): + + if len(size) == 1 and isinstance(size[0], (list, tuple)): + size = size[0] + + dtype = standardize_dtype(dtype) + x = paddle.empty(size, dtype=dtype) + + return x + +def zeros(*size, dtype=None, device=None): + if len(size) == 1 and isinstance(size[0], (list, tuple)): + size = size[0] + + dtype = standardize_dtype(dtype) + x = paddle.zeros(size, dtype=dtype) + # if device is not None: + # x = x.to(device) + return x + + +def ones(*size, dtype=None, device=None): + if len(size) == 1 and isinstance(size[0], (list, tuple)): + size = size[0] + dtype = standardize_dtype(dtype) + x = paddle.ones(size, dtype=dtype) + # if device is not None: + # x = x.to(device) + return x + + +def rand(*size, dtype=None, device=None): + if len(size) == 1 and isinstance(size[0], (list, tuple)): + size = size[0] + dtype = standardize_dtype(dtype) + x = paddle.rand(size, dtype=dtype) + # if device is not None: + # x = x.to(device) + return x + + +def randint(low=None, high=None, size=None, dtype=None, name=None, device=None): + + arg1 = low + arg2 = high + arg3 = size + + dtype = standardize_dtype(dtype) + + if dtype == paddle.int32 or dtype == paddle.int64: + int_dtype = dtype + target_dtype = None + else: + int_dtype = None + target_dtype = None + + if arg3 is not None: + assert isinstance(arg3, (list, tuple)) + if low is None and high is not None: + arg1 = high + arg2 = None + result = paddle.randint(low=arg1, high=arg2, shape=arg3, dtype=int_dtype, name=name) + return result.astype(target_dtype) if target_dtype else result + else: + assert isinstance(arg2, (list, tuple)) + result = paddle.randint(low=arg1, high=None, shape=arg2, dtype=int_dtype, name=name) + return result.astype(target_dtype) if target_dtype else result + + +def randn(*size, out=None, dtype=None, device=None): + if len(size) == 1 and isinstance(size[0], (list, tuple)): + size = size[0] + + dtype = standardize_dtype(dtype) + x = paddle.randn(size, dtype=dtype) + + if out is not None: + paddle.assign(x, out) + return out + + return x + + +def manual_seed_all(seed): + paddle.seed(seed) + + +def manual_seed(seed): + paddle.seed(seed) + + +def scalar_dtype(x): + from . import core + return getattr(core, type(x).__name__) + + +def tensor(x, dtype=None, device=None): + if np.isscalar(x): + if dtype is None: + dtype = scalar_dtype(x) + result = paddle.ones([], dtype=dtype) + if np.isnan(x): + result = (result * (-1)).sqrt() + else: + result.fill_(x) + return result + + return paddle.to_tensor(x, dtype=dtype) + +def from_numpy(x): + return paddle.to_tensor(x) + + +cat = paddle.concat + + +# different meaning of scatter +# in tensorflow/ paddle, scatter is : +# for idx, l in enumerate(index): +# output[l] = update[idx] +# in torch, scatter is: +# for i, j, k: +# output[i, j, index[i,j,k]] = update[i, j, k] + + +def sum(x, dim=None, dtype=None, keepdim=False, name=None): + + if x.ndim == 0: + return x + + result = paddle.sum(x, axis=dim, dtype=dtype, keepdim=keepdim, name=name) + + dim_len = 1 if np.isscalar(dim) else x.ndim if dim is None else len(dim) + + if not keepdim and x.ndim == dim_len: + return tensor(result.item(), dtype=result.dtype) + else: + return result + + +def nonzero(input, *, out=None, as_tuple=False): + + result = paddle.nonzero(input, as_tuple=as_tuple) + if not as_tuple: + if out is not None: + paddle.assign(result, out) + return out + else: + return result + else: + assert out is None + return tuple([x.squeeze(-1) for x in result]) + + +def where(condition, x=None, y=None, name=None): + + if x is not None and y is not None: + assert is_tensor(x) or is_tensor(y) + + if np.isscalar(x): + x = paddle.ones_like(condition, dtype=scalar_dtype(x)) * x + if x.ndim == 0: + x = paddle.ones_like(condition, dtype=x.dtype) * x.item() + + if np.isscalar(y): + y = paddle.ones_like(condition, dtype=scalar_dtype(y)) * y + if x.ndim == 0: + y = paddle.ones_like(condition, dtype=y.dtype) * y.item() + + return paddle.where(condition, x, y, name=name) + + elif x is None and y is None: + result = nonzero(condition, as_tuple=True) + + return result + else: + raise Exception("x and y must be None or not None at the sametime") + + +def is_nonzero(input): + + assert paddle.numel(input) == 1 + + return input.item() != 0.0 + + +def allclose(input, other, rtol=1e-05, atol=1e-08, equal_nan=False, name=None): + + if np.isscalar(input): + input = tensor(input) + if np.isscalar(other): + other = tensor(other) + + return paddle.allclose(input.float(), other.float(), rtol=rtol, atol=atol, equal_nan=equal_nan, name=name) + + +def scatter(input: Tensor, dim, index, value) -> Tensor: + if input.ndim == 1: + output = paddle.scatter(input, index, value, overwrite=True) + else: + +# index, selected = paddle.unique(index, axis=dim, return_index=True) +# if isinstance(value, Tensor): +# value = paddle.index_select(value, selected, axis=dim) + + grids = [paddle.arange(index.shape[x]) for x in range(index.ndim)] + inner_indexes = list(paddle.meshgrid(*grids)) + inner_indexes[dim] = index + inner_indexes = [x.flatten() for x in inner_indexes] + inner_indexes = paddle.stack(inner_indexes, axis=1) + + value_shape = list(inner_indexes.shape[:-1]) + list(input.shape[inner_indexes.shape[-1]:]) + + if paddle.is_tensor(value): + value = paddle.reshape(value, value_shape) + elif isinstance(value, (builtins.bool, builtins.int, builtins.float, np.integer, np.float32, np.float64)): + value = paddle.full(shape=value_shape, fill_value=value) + else: + raise Exception(f"unknown value type: {type(value)}") + + to_overwrite = paddle.scatter_nd(inner_indexes, value, shape=input.shape) + condition = paddle.scatter_nd(inner_indexes, paddle.ones_like(value), shape=input.shape) + output = paddle.where(condition > 0, to_overwrite.float(), input.float()).cast(input.dtype) + + return output + +def gather(x,dim,index): + index_shape=index.shape + index_flatten=index.flatten() + if dim<0: + dim=len(x.shape)+dim + nd_index=[] + for k in range(len(x.shape)): + if k==dim: + nd_index.append(index_flatten) + else: + reshape_shape=[1]*len(x.shape) + reshape_shape[k]=x.shape[k] + dim_index=paddle.expand( paddle.reshape(paddle.arange(x.shape[k],dtype=index.dtype), reshape_shape), index_shape).flatten() + nd_index.append(dim_index) + + ind2 = paddle.transpose(paddle.stack(nd_index),[1, 0]) + # ind2 = paddle.stack(nd_index).transpose([1, 0]) + paddle_out = paddle.gather_nd(x, ind2).reshape(index_shape) + return paddle_out + + +def scatter_(input: Tensor, dim, index, value): + + output = scatter(input, dim, index, value) + # return output + paddle.assign(output, input) + + return input + + + +def scatter_add(input: Tensor, dim, index, update) -> Tensor: + # donot use scatter with overwrite=False even for 1-d case; + # It does not produce correct result for duplicated indexes + # if input.ndim == 1: + # output = paddle.scatter(input, index, update, overwrite=False) + # else: + if index.ndim > 1: + grids = [paddle.arange(index.shape[x]) for x in range(index.ndim)] + inner_indexes = list(paddle.meshgrid(*grids)) + inner_indexes[dim] = index + else: + inner_indexes = [index] + inner_indexes = [x.flatten() for x in inner_indexes] + inner_indexes = paddle.stack(inner_indexes, axis=1) + + update_shape = list(inner_indexes.shape[:-1]) + list(input.shape[inner_indexes.shape[-1]:]) + update = paddle.reshape(update, update_shape) + output = paddle.scatter_nd_add(input, inner_indexes, update) + + return output + + +def scatter_add_(input: Tensor, dim, index, update) -> Tensor: + output = scatter_add(input, dim, index, update) + paddle.assign(output, input) + # return output + return input + + +def norm(input, p='fro', dim=None, keepdim=False, out=None, dtype=None): + + result = paddle.linalg.norm(input, p, axis=dim, keepdim=keepdim) + if dtype is not None: + result = result.cast(dtype) + + if out is not None: + out.assign(result) + + return result + +def isinf(x, name=None): + if x.dtype == paddle.bool: + return paddle.zeros_like(x, dtype=paddle.bool) + else: + return paddle.isinf(x, name=name) + +def isnan(x, name=None): + if x.dtype == paddle.bool: + return paddle.zeros_like(x, dtype=paddle.bool) + else: + return paddle.isnan(x, name=name) + +def broadcast_to(x, shape, name=None): + + if len(shape) == 1 and shape[0] == 0: + assert x.numel() == 1 + return tensor(x.item()) + else: + return paddle.broadcast_to(x, shape, name) + + +def as_tensor(data, dtype=None, device=None): + + return paddle.to_tensor(data, dtype=dtype) + + +TopKResult = namedtuple("TopKResult", ["values", "indices"]) +def topk(input, k, dim=None, largest=True, sorted=True, *, out=None): + + result, indice = paddle.topk(input, k, axis=dim, largest=largest, sorted=sorted) + + if out is not None: + out[0].set_value(result) + out[1].set_value(indice) + + return TopKResult(values=result, indices=indice) + + +def split(tensor, split_size_or_sections, dim=0): + """ + paddle interface is different from pytorch + + Args: + tensor: + split_size_or_sections: + dim: + + Returns: + + """ + if isinstance(split_size_or_sections, int): + sizes = [split_size_or_sections] * (tensor.shape[dim] // split_size_or_sections) + if tensor.shape[dim] % split_size_or_sections != 0: + sizes.append(tensor.shape[dim] % split_size_or_sections) + split_size_or_sections = sizes + + return paddle.split(tensor, split_size_or_sections, axis=dim) \ No newline at end of file diff --git a/RE/paddleext/paddleext/torchapi/machine.py b/RE/paddleext/paddleext/torchapi/machine.py new file mode 100644 index 00000000..bb0ee241 --- /dev/null +++ b/RE/paddleext/paddleext/torchapi/machine.py @@ -0,0 +1,35 @@ +""" +machine for paddle +""" + +import paddle + + +class PaddleTrainer(object): + """ + PaddleTrainer + """ + + def __init__(self, machine, loss, optimizer, + evaluator, *args, **kwargs): + + self.model = paddle.Model(machine) + + self.model.prepare(optimizer=optimizer, loss=loss, + metrics=evaluator) + + def fit(self, train_data_streams): + """ + + Args: + train_dataloader (): + val_dataloaders (): + test_dataloaders (): + + Returns: + + """ + + self.model.fit(train_data_streams.train, eval_data=train_data_streams.dev) + +Trainer = PaddleTrainer \ No newline at end of file diff --git a/RE/paddleext/paddleext/torchapi/metrics.py b/RE/paddleext/paddleext/torchapi/metrics.py new file mode 100644 index 00000000..8d5f431c --- /dev/null +++ b/RE/paddleext/paddleext/torchapi/metrics.py @@ -0,0 +1,7 @@ + +import sys + +import paddlemetrics + +sys.modules[__name__] = paddlemetrics + diff --git a/RE/paddleext/paddleext/torchapi/nn/__init__.py b/RE/paddleext/paddleext/torchapi/nn/__init__.py new file mode 100644 index 00000000..64b48d08 --- /dev/null +++ b/RE/paddleext/paddleext/torchapi/nn/__init__.py @@ -0,0 +1,47 @@ +import paddle +import random +import numpy as np + +ModuleBase = paddle.nn.Layer +ModuleDict = paddle.nn.LayerDict +ModuleList = paddle.nn.LayerList + +from paddle.nn import * + +Conv2d = Conv2D +Conv3d = Conv3D +from . import functional +from paddle.nn import initializer + +from . import init + +def Parameter(data, requires_grad=True): + """ + + Args: + data: + requires_grad: + + Returns: + + """ + + param = paddle.create_parameter(data.shape, dtype=data.dtype, default_initializer=initializer.Assign(data)) + + param.stop_gradient = not requires_grad + + return param + +from paddle.fluid import framework + +class Module(paddle.nn.Layer): + """ + Module with add_parameter + """ + + def __setattr__(self, key, value): + + if isinstance(value, framework.Parameter): + self.add_parameter(key, value) + else: + super().__setattr__(key, value) \ No newline at end of file diff --git a/RE/paddleext/paddleext/torchapi/nn/functional.py b/RE/paddleext/paddleext/torchapi/nn/functional.py new file mode 100644 index 00000000..cf489074 --- /dev/null +++ b/RE/paddleext/paddleext/torchapi/nn/functional.py @@ -0,0 +1,15 @@ + + +import paddle +from more_itertools import chunked +from paddle.nn.functional import * + +def pad(input, pad, mode='constant', value=0.0): + + pad = sum(reversed(list(chunked(pad, 2))), []) + + if len(pad) < 2 * input.ndim: + pad = [0] * (2 * input.ndim - len(pad)) + pad + + return paddle.nn.functional.pad(input, pad, mode=mode, value=value) + diff --git a/RE/paddleext/paddleext/torchapi/nn/init.py b/RE/paddleext/paddleext/torchapi/nn/init.py new file mode 100644 index 00000000..4a74a80c --- /dev/null +++ b/RE/paddleext/paddleext/torchapi/nn/init.py @@ -0,0 +1,49 @@ +""" +init function for paddle +""" +import paddle + + +def normal_(tensor, mean=0.0, std=1.0): + """ + + Args: + tensor: + mean: + std: + + Returns: + + """ + + paddle.assign(paddle.normal(mean=mean, std=std, shape=tensor.shape), tensor) + + return tensor + +def zeros_(tensor): + """ + + Args: + tensor: + + Returns: + + """ + + paddle.assign(paddle.zeros_like(tensor), tensor) + + return tensor + +def ones_(tensor): + """ + + Args: + tensor: + + Returns: + + """ + + paddle.assign(paddle.ones_like(tensor), tensor) + + return tensor \ No newline at end of file diff --git a/RE/paddleext/paddleext/torchapi/optim/__init__.py b/RE/paddleext/paddleext/torchapi/optim/__init__.py new file mode 100644 index 00000000..c31d1d6c --- /dev/null +++ b/RE/paddleext/paddleext/torchapi/optim/__init__.py @@ -0,0 +1,48 @@ +from functools import partial + +from paddle.optimizer import * + +from . import lr_scheduler + +# import sys +# this_module = sys.modules[__name__] +# import inspect +# +# def paddle_optim_class_creator(paddle_optim_class, *args, **kwargs): +# """ +# +# Args: +# paddle_optim_class: +# *args: +# **kwargs: +# +# Returns: +# +# """ +# if "params" in kwargs: +# kwargs["parameters"] = kwargs["params"] +# del kwargs["params"] +# if "lr" in kwargs: +# kwargs["learning_rate"] = kwargs["lr"] +# del kwargs["lr"] +# +# return paddle_optim_class(*args, **kwargs) +# +# from . import lr +# +# class PaddleOptimModuleProxy(object): +# +# def __getattribute__(self, *args, **kwargs): +# # Perform custom logic here +# +# obj = object.__getattribute__(this_module, *args, **kwargs) +# +# if inspect.isclass(obj) and obj.__module__.startswith("paddle.optimization"): +# print(obj.__module__) +# return partial(paddle_optim_class_creator, obj) +# else: +# return obj +# +# +# +# sys.modules[__name__] = PaddleOptimModuleProxy() \ No newline at end of file diff --git a/RE/paddleext/paddleext/torchapi/optim/lr_scheduler.py b/RE/paddleext/paddleext/torchapi/optim/lr_scheduler.py new file mode 100644 index 00000000..1ba3040a --- /dev/null +++ b/RE/paddleext/paddleext/torchapi/optim/lr_scheduler.py @@ -0,0 +1,34 @@ +from functools import partial + +from paddle.optimizer.lr import * + +StepLR = StepDecay +ExponentialLR = ExponentialDecay + +# +# def paddle_lr_class_creator(paddle_lr_class, *args, **kwargs): +# +# if "lr" in kwargs: +# kwargs["learning_rate"] = kwargs["lr"] +# del kwargs["lr"] +# +# return paddle_lr_class(*args, **kwargs) +# +# import sys +# this_module = sys.modules[__name__] +# import inspect +# +# class PaddleLRModuleProxy(object): +# +# def __getattribute__(self, *args, **kwargs): +# # Perform custom logic here +# +# obj = object.__getattribute__(this_module, *args, **kwargs) +# +# if inspect.isclass(obj) and obj.__module__.startswith("paddle.optimization"): +# print("LR", obj.__module__) +# return partial(paddle_lr_class_creator, obj) +# else: +# return obj +# +# sys.modules[__name__] = PaddleLRModuleProxy() \ No newline at end of file diff --git a/RE/paddleext/paddleext/torchapi/sampler.py b/RE/paddleext/paddleext/torchapi/sampler.py new file mode 100644 index 00000000..7e4f1cb5 --- /dev/null +++ b/RE/paddleext/paddleext/torchapi/sampler.py @@ -0,0 +1,154 @@ +""" +paddle operation for sampler +""" +from typing import Iterator, Optional, Sequence, List, TypeVar, Generic, Sized + +import paddle + +from paddle.io import Sampler, DistributedBatchSampler, SequenceSampler, RandomSampler +import math + + +def identity(x): + """ + + Args: + x (): + + Returns: + + """ + return x + + + +class BatchSampler(Sampler): + r"""Wraps another sampler to yield a mini-batch of indices. + + Args: + sampler (Sampler or Iterable): Base sampler. Can be any iterable object + batch_size (int): Size of mini-batch. + drop_last (bool): If ``True``, the sampler will drop the last batch if + its size would be less than ``batch_size`` + + Example: + >>> list(BatchSampler(SequentialSampler(range(10)), batch_size=3, drop_last=False)) + [[0, 1, 2], [3, 4, 5], [6, 7, 8], [9]] + >>> list(BatchSampler(SequentialSampler(range(10)), batch_size=3, drop_last=True)) + [[0, 1, 2], [3, 4, 5], [6, 7, 8]] + """ + + def __init__(self, sampler: Sampler, batch_size: int, drop_last: bool) -> None: + # Since collections.abc.Iterable does not check for `__getitem__`, which + # is one way for an object to be an iterable, we don't do an `isinstance` + # check here. + super().__init__() + if not isinstance(batch_size, int) or isinstance(batch_size, bool) or \ + batch_size <= 0: + raise ValueError("batch_size should be a positive integer value, " + "but got batch_size={}".format(batch_size)) + if not isinstance(drop_last, bool): + raise ValueError("drop_last should be a boolean value, but got " + "drop_last={}".format(drop_last)) + self.sampler = sampler + self.batch_size = batch_size + self.drop_last = drop_last + + + def __iter__(self) -> Iterator[List[int]]: + batch = [] + for idx in self.sampler: + batch.append(idx) + if len(batch) == self.batch_size: + yield batch + batch = [] + if len(batch) > 0 and not self.drop_last: + yield batch + + def __len__(self) -> int: + # Can only be called if self.sampler has __len__ implemented + # We cannot enforce this condition, so we turn off typechecking for the + # implementation below. + # Somewhat related: see NOTE [ Lack of Default `__len__` in Python Abstract Base Classes ] + if self.drop_last: + return len(self.sampler) // self.batch_size # type: ignore[arg-type] + else: + return (len(self.sampler) + self.batch_size - 1) // self.batch_size # type: ignore[arg-type] + + + +class SequentialSampler(Sampler): + r"""Samples elements sequentially, always in the same order. + + Args: + data_source (Dataset): dataset to sample from + """ + data_source: Sized + + def __init__(self, data_source: Sized) -> None: + self.data_source = data_source + + def __iter__(self) -> Iterator[int]: + return iter(range(len(self.data_source))) + + def __len__(self) -> int: + return len(self.data_source) + +class SubsetRandomSampler(Sampler): + r"""Samples elements randomly from a given list of indices, without replacement. + + Arguments: + indices (sequence): a sequence of indices + generator (Generator): Generator used in sampling. + """ + indices: Sequence[int] + + def __init__(self, indices: Sequence[int]) -> None: + self.indices = indices + + def __iter__(self): + return (self.indices[i] for i in paddle.randperm(len(self.indices))) + + def __len__(self): + return len(self.indices) + + + +class DistributedSampler(Sampler): + """ Iterable wrapper that distributes data across multiple workers. + + Args: + iterable (iterable) + num_replicas (int, optional): Number of processes participating in distributed training. + rank (int, optional): Rank of the current process within ``num_replicas``. + + Example: + >>> list(DistributedSampler(range(10), num_replicas=2, rank=0)) + [0, 2, 4, 6, 8] + >>> list(DistributedSampler(range(10), num_replicas=2, rank=1)) + [1, 3, 5, 7, 9] + """ + + def __init__(self, iterable, num_replicas=None, rank=None): + self.iterable = iterable + self.num_replicas = num_replicas + self.rank = rank + + if num_replicas is None or rank is None: # pragma: no cover +# if not paddle.distributed.is_initialized(): +# raise RuntimeError('Requires `torch.distributed` to be initialized.') + + self.num_replicas = ( + paddle.distributed.get_world_size() if num_replicas is None else num_replicas) + self.rank = paddle.distributed.get_rank() if rank is None else rank + + if self.rank >= self.num_replicas: + raise IndexError('`rank` must be smaller than the `num_replicas`.') + + def __iter__(self): + return iter( + [e for i, e in enumerate(self.iterable) if (i - self.rank) % self.num_replicas == 0]) + + def __len__(self): + return len(self.iterable) + diff --git a/RE/paddleext/paddleext/torchapi/tensor_.py b/RE/paddleext/paddleext/torchapi/tensor_.py new file mode 100644 index 00000000..838c75a8 --- /dev/null +++ b/RE/paddleext/paddleext/torchapi/tensor_.py @@ -0,0 +1,547 @@ +""" +paddle tensor +""" +from functools import partial + +import numpy as np +from collections.abc import Iterable + +from . import paddle_delegate_func +from .functional import * +import paddle + +""" +paddle tensor +""" +import types +import paddle +from paddle import Tensor + +# just for type hint. If there are statements like isinstance(x, FloatTensor), this may cause error +FloatTensor = Tensor + +def size(self, dim=None): + shape = self.shape + if dim is None: + return shape + else: + return shape[dim] + + +# def __new__(cls, *args, **kwargs): +# +# obj = cls.__default_new__(cls, *args, **kwargs) +# +# setattr(obj, "size", types.MethodType(size, obj)) +# +# return obj +# +# setattr(Tensor, "__default_new__", Tensor.__new__) +# setattr(Tensor, "__new__", __new__) + + +def bool_(self): + return self.astype("bool") + +def float_(self): + return self.astype('float32') + + +def double_(self): + return self.astype("float64") + + +def int_(self): + return self.astype("int32") + + +def long_(self): + return self.astype('int64') + + +def expand(self, *sizes): + if isinstance(sizes[0], Iterable): + sizes = sizes[0] + ##handle -1 case + if len(sizes) > len(self.shape): + for _ in range(len(sizes) - len(self.shape)): + self = self.unsqueeze(dim=0) + expand_times = [x // y if x >= y else 1 for x, y in zip(sizes, self.shape)] + x = paddle.fluid.layers.expand(self, expand_times, name=None) + return x + + +def masked_fill(self, mask, value): + if self.ndim == 0: + assert mask.ndim == 0 + if mask.item(): + return paddle.full([], value, self.dtype) + else: + return self + + y = paddle.full(self.shape, value, self.dtype) + mask_shape = [1] * (self.ndim - mask.ndim) + mask.shape + mask = paddle.reshape(mask, mask_shape) + mask = paddle.expand_as(mask, self) + new_values = paddle.where(mask, y, self) + return new_values + # mask_float = mask.astype("float32") + # if self.dtype == paddle.bool: + # self_float = self.astype("float32") + # else: + # self_float = self + # result = self_float * (1 - mask_float) + mask_float * value + # if self.dtype == paddle.bool: + # result = result.astype(paddle.bool) + # return result + +# def masked_fill_(self, mask, value): +# +# new_values = masked_fill(self, mask, value) +# paddle.assign(new_values, self) +# +# return self + + +def to(self, arg): + if isinstance(arg, paddle.dtype): + return self.astype(arg) + elif isinstance(arg, Tensor): + return self.astype(arg.dtype) + else: + return self + +def is_floating_point(self): + return self.dtype in {paddle.float16, paddle.float32, paddle.float64} + + +def reshape(self, *size): + + if len(size) == 1 and isinstance(size[0], Iterable): + size = size[0] + + return paddle.reshape(self, size) + + +def view(self, *size): + if len(size) == 1 and isinstance(size[0], Iterable): + size = size[0] + + return reshape(self, size) + +def view_as(self, other): + + return view(self, *other.size()) + + +Tensor.__native__size = Tensor.size + +Tensor.device = None +Tensor.float = float_ +Tensor.double = double_ +Tensor.int = int_ +Tensor.long = long_ +Tensor.bool = bool_ +Tensor.scatter_explicit_index = Tensor.scatter +Tensor.scatter = scatter +Tensor.scatter_explicit_index_ = Tensor.scatter_ +Tensor.scatter_ = scatter_ +Tensor.scatter_add = scatter_add +Tensor.scatter_add_ = scatter_add_ +Tensor.expand = expand +Tensor.masked_fill = masked_fill +#Tensor.masked_fill_ = masked_fill_ +Tensor.to = to +Tensor.is_floating_point = is_floating_point +Tensor.reshape = reshape +Tensor.view = view +Tensor.view_as = view_as + +Tensor.__invert__ = paddle.logical_not + +Tensor.__native__numel = Tensor.numel +def numel(x): + return x.__native__numel().item() + +Tensor.numel = numel + +import math + +class SizeObject(int): + + def __new__(cls, sizes, *args, **kwargs): + size = int(math.prod(sizes)) + instance = int.__new__(cls, size, *args, **kwargs) + instance.sizes = sizes + return instance + + def __call__(self, index=None): + if index is None: + return self.sizes + else: + return self.sizes[index] + +Tensor.size = property(lambda self: SizeObject(self.shape)) + + +def flatten(self, *args, **kwargs): + + if self.dtype == paddle.bool: + return flatten(self.int(), *args, **kwargs) > 0 + else: + return paddle.flatten(self, *args, **kwargs) + +Tensor.flatten = flatten + + +Tensor.__getitem__official__ = Tensor.__getitem__ + +import builtins + +def getitem(self, args): + + if self.dtype == paddle.bool: + return getitem(self.int(), args) > 0 + + if isinstance(args, (list, tuple)): + ellipsis_num = builtins.sum(x is Ellipsis for x in args) + if ellipsis_num > 1: + raise Exception(f"multiple ellipsis found in args: {args}") + elif ellipsis_num == 1: + args = list(args) + ellips_idx = args.index(Ellipsis) + args_before_ellips = args[:ellips_idx] + args_after_ellips = args[ellips_idx+1:] + ommited_dims = [builtins.slice(None, None, None) for _ in range(self.ndim - len(args) + 1)] + args = tuple(args_before_ellips + ommited_dims + args_after_ellips) + + return self.__getitem__official__(args) + + elif isinstance(args, Tensor): + if args.dtype == paddle.bool and args.ndim > 1: + # paddle do not support boolean indexing with ndim > 1 + return self.flatten(start_axis=0, stop_axis=args.ndim-1)[args.flatten().nonzero()] + if args.ndim == 0: + assert args.dtype == paddle.bool + assert self.ndim == 0 + return tensor(self.reshape((1,))[args.reshape((1,))].item(), dtype=self.dtype) + + return self.__getitem__official__(args) + +Tensor.__getitem__ = getitem + +Tensor.__setitem__official__ = Tensor.__setitem__ + +def setitem(self, index, value): + + if isinstance(index, Tensor): + if self.ndim == 0: + index = index.item() + assert type(index) == bool + if index: + self.fill_(value) + return + + if index.dtype == paddle.bool and (paddle.any(paddle.isnan(self)) or paddle.any(paddle.isinf(self))): + + result = masked_fill(self, index, value) + self.set_value(result) + return + + self.__setitem__official__(index, value) + +Tensor.__setitem__ = setitem + +def getattribute(self, *args, **kwargs): + # Perform custom logic here + + obj = object.__getattribute__(self, *args, **kwargs) + + if isinstance(obj, types.MethodType) and not obj.__module__.startswith("paddleext.torchapi."): + + return partial(paddle_delegate_func, obj) + else: + return obj + + +Tensor.__getattribute__ = getattribute + +Tensor.sum = sum + + + +def permute(self, *perm): + + if len(perm) == 1 and isinstance(perm[0], Iterable): + perm = perm[0] + + assert len(perm) == self.ndim + perm = [self.ndim + x if x < 0 else x for x in perm] ##not allow negative values + + if self.dtype == paddle.bool: + return permute(self.int(), * perm) > 0 + else: + return paddle.transpose(self, perm) + +Tensor.permute = permute + + +def transpose(self, *perm): + # if len(perm)==2 and len(self.shape)>2: + if isinstance(perm[0], Iterable): + assert len(perm) == 1 + perm = perm[0] + + if len(perm) == 2 and len(perm) < self.ndim: + + perm = [self.ndim + x if x < 0 else x for x in perm] + dim1, dim2 = perm + perm = list(range(self.rank())) + perm[dim1] = dim2 + perm[dim2] = dim1 + + return self.permute(*perm) + else: + return paddle.transpose(self, perm) + + +Tensor.transpose = transpose + +def contiguous(self): + return self + +Tensor.contiguous = contiguous + + +Tensor.__lt__origin__ = Tensor.__lt__ +def __lt__(self, other): + if self.ndim == 0 and np.isscalar(other): + other = tensor(other) + return self.__lt__origin__(other) +Tensor.__lt__ = __lt__ + + +Tensor.__le__origin__ = Tensor.__le__ +def __le__(self, other): + if self.ndim == 0 and np.isscalar(other): + other = tensor(other) + return self.__le__origin__(other) +Tensor.__le__ = __le__ + + +Tensor.__gt__origin__ = Tensor.__gt__ +def __gt__(self, other): + if self.ndim == 0 and np.isscalar(other): + other = tensor(other) + return self.__gt__origin__(other) +Tensor.__gt__ = __gt__ + + +Tensor.__ge__origin__ = Tensor.__ge__ +def __ge__(self, other): + if self.ndim == 0 and np.isscalar(other): + other = tensor(other) + return self.__ge__origin__(other) +Tensor.__ge__ = __ge__ + + +Tensor.__eq__origin__ = Tensor.__eq__ +def __eq__(self, other): + if self.ndim == 0 and np.isscalar(other): + other = tensor(other) + return self.__eq__origin__(other) +Tensor.__eq__ = __eq__ + + +Tensor.__ne__origin__ = Tensor.__ne__ +def __ne__(self, other): + if self.ndim == 0 and np.isscalar(other): + other = tensor(other) + return self.__ne__origin__(other) +Tensor.__ne__ = __ne__ + + +def __or__(self, other): + return paddle.logical_or(self.bool(), other.bool()) +Tensor.__or__ = __or__ + +def __and__(self, other): + return paddle.logical_or(self.bool(), other.bool()) +Tensor.__and__ = __and__ + + +Tensor.__native__any = Tensor.any +def any(x, dim=None, keepdim=False, name=None): + if isinstance(x, Tensor) and x.ndim == 0: + assert dim is None + return x + else: + return x.__native__any(axis=dim, keepdim=keepdim, name=name) + +Tensor.any = any + +Tensor.__native__all = Tensor.all +def all(x, dim=None, keepdim=False, name=None): + + if isinstance(x, Tensor) and x.ndim == 0: + assert dim is None + return x + else: + return x.__native__all(axis=dim, keepdim=keepdim, name=name) + +Tensor.all = all + +Tensor.__native__add__ = Tensor.__add__ +#Tensor.__native__iadd__ = Tensor.__iadd__ +def add(x, y): + + tensor_out = isinstance(x, Tensor) or isinstance(y, Tensor) + + out_dtype = x.dtype if isinstance(x, Tensor) else y.dtype if isinstance(y, Tensor) else None + + if isinstance(x, Tensor) and x.ndim == 0: + x = x.item() + if isinstance(y, Tensor) and y.ndim == 0: + y = y.item() + + if isinstance(x, Tensor): + return Tensor.__native__add__(x, y) + elif isinstance(y, Tensor): + return Tensor.__native__add__(y, x) + else: + result = x + y + if np.isscalar(result) and tensor_out: + return tensor(result, dtype=out_dtype) + else: + return result + + +# def iadd(x, y): +# if isinstance(y, Tensor) and y.ndim == 0: +# y = y.item() +# +# return Tensor.__native__iadd__(x, y) + +Tensor.__add__ = add +Tensor.__radd__ = add +# Tensor.__iadd__ = iadd + +Tensor.__native__sub__ = Tensor.__sub__ +Tensor.__native__rsub__ = Tensor.__rsub__ + +def subtract(x, y): + tensor_out = isinstance(x, Tensor) or isinstance(y, Tensor) + + out_dtype = x.dtype if isinstance(x, Tensor) else y.dtype if isinstance(y, Tensor) else None + + if isinstance(x, Tensor) and x.ndim == 0: + x = x.item() + if isinstance(y, Tensor) and y.ndim == 0: + y = y.item() + + if isinstance(x, Tensor): + return Tensor.__native__sub__(x, y) + elif isinstance(y, Tensor): + return Tensor.__native__rsub__(y, x) + else: + result = x - y + if np.isscalar(result) and tensor_out: + return tensor(result, dtype=out_dtype) + else: + return result + +def rsub(x, y): + + if isinstance(y, Tensor) and y.ndim == 0: + y = y.item() + + return Tensor.__native__rsub__(x, y) + + +Tensor.__sub__ = subtract +Tensor.__rsub__ = rsub + +Tensor.__native__mul__ = Tensor.__mul__ +def multiply(x, y): + tensor_out = isinstance(x, Tensor) or isinstance(y, Tensor) + + out_dtype = x.dtype if isinstance(x, Tensor) else y.dtype if isinstance(y, Tensor) else None + + if isinstance(x, Tensor) and x.ndim == 0: + x = x.item() + if isinstance(y, Tensor) and y.ndim == 0: + y = y.item() + + if isinstance(x, Tensor): + return Tensor.__native__mul__(x, y) + elif isinstance(y, Tensor): + return Tensor.__native__mul__(y, x) + else: + result = x * y + if np.isscalar(result) and tensor_out: + return tensor(result, dtype=out_dtype) + else: + return result + +Tensor.__mul__ = multiply +Tensor.__rmul__ = multiply + +Tensor.__native__truediv__ = Tensor.__truediv__ +Tensor.__native__rdiv__ = Tensor.__rdiv__ + +def divide(x, y): + tensor_out = isinstance(x, Tensor) or isinstance(y, Tensor) + + out_dtype = x.dtype if isinstance(x, Tensor) else y.dtype if isinstance(y, Tensor) else None + + if isinstance(x, Tensor) and x.ndim == 0: + x = x.item() + if isinstance(y, Tensor) and y.ndim == 0: + y = y.item() + + if isinstance(x, Tensor): + return Tensor.__native__truediv__(x, y) + elif isinstance(y, Tensor): + return Tensor.__native__rdiv__(y, x) + else: + result = x / y + if np.isscalar(result) and tensor_out: + return tensor(result, dtype=out_dtype) + else: + return result + +def rdiv(x, y): + if isinstance(y, Tensor) and y.ndim == 0: + y = y.item() + + return Tensor.__native__rdiv__(x, y) + + +Tensor.__truediv__ = divide +Tensor.__rdiv__ = rdiv + + +def __getstate__(self): + state = {"dtype": self.dtype, "value": self.numpy()} + return state + + +def __setstate__(self, newstate): + + loaded = paddle.to_tensor(newstate["value"], dtype=newstate["dtype"]) + self.set_value(loaded) + +Tensor.__getstate__ = __getstate__ +Tensor.__setstate__ = __setstate__ + +## requires_grad property + +def getter(x): + return not x.stop_gradient + +def setter(x, value): + x.stop_gradient = not value + +Tensor.requires_grad = property(getter, setter) + +Tensor.topk = topk \ No newline at end of file diff --git a/RE/paddleext/setup.py b/RE/paddleext/setup.py new file mode 100644 index 00000000..d0e29ea7 --- /dev/null +++ b/RE/paddleext/setup.py @@ -0,0 +1,39 @@ +#!/usr/bin/env python +# -*- coding: UTF-8 -*- +################################################################################ +# +# Copyright (c) 2020 Baidu.com, Inc. All Rights Reserved +# +################################################################################ +""" +Setup script. + +Authors: sunmingming01(sunmingming01@baidu.com) +Date: 2020/12/31 12:33:34 +""" + +from setuptools import setup, find_packages + +with open('README.md') as readme_file: + README = readme_file.read() + +setup_args = dict( + name='paddle-extension', + version='1.0.0-beta', + description='Paddle extensions, including implementation for torch apis.', + long_description_content_type="text/markdown", + long_description=README, + license='Apache', + packages=find_packages(include=["paddleext", "paddleext.*"]), + author='Mingming Sun', + author_email='sunmingming01@baidu.com', + keywords=['Deep Learning', 'Paddlepaddle'], + url='', + download_url='' +) + +install_requires = [ +] + +if __name__ == '__main__': + setup(**setup_args, install_requires=install_requires) \ No newline at end of file diff --git a/RE/paddleext/test/__init__.py b/RE/paddleext/test/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/RE/paddleext/test/test_diagonal.py b/RE/paddleext/test/test_diagonal.py new file mode 100644 index 00000000..80714ed1 --- /dev/null +++ b/RE/paddleext/test/test_diagonal.py @@ -0,0 +1,32 @@ + +import numpy as np + +from chaos import paddle_ as paddle, torch_ as torch + + +def test_diagonal(): + import random + + for rank in range(2, 6): + for test in range(10): + while True: + dim1 = random.randint(0, rank - 1) + dim2 = random.randint(0, rank - 1) + if dim1 != dim2: + break + + shape = [random.randint(5, 10) for _ in range(rank)] + offset = random.randint(-shape[dim1] + 1, shape[dim2]) + + x = np.random.rand(*shape) + + torch_input = torch.from_numpy(x) + torch.fill_diagonal(torch_input, value=100, offset=offset, dim1=dim1, dim2=dim2) + + paddle_input = paddle.from_numpy(x) + paddle.fill_diagonal(paddle_input, value=100, offset=offset, dim1=dim1, dim2=dim2) + + paddle_out = paddle_input.numpy() + torch_out = torch_input.numpy() + + assert np.sum(np.abs(paddle_out - torch_out)) < 1e-5 \ No newline at end of file diff --git a/RE/paddleext/test/test_function.py b/RE/paddleext/test/test_function.py new file mode 100644 index 00000000..db676d1b --- /dev/null +++ b/RE/paddleext/test/test_function.py @@ -0,0 +1,13 @@ + + + +from chaos.backend_.paddle_.functional import fill_diagonal +import paddle + +def test_fill_diagnonal(): + + a = paddle.randn((5, 5)) + fill_diagonal(a, float("-inf")) + +if __name__ == "__main__": + test_fill_diagnonal() \ No newline at end of file diff --git a/RE/paddleext/test/test_pad.py b/RE/paddleext/test/test_pad.py new file mode 100644 index 00000000..6fb0e2bc --- /dev/null +++ b/RE/paddleext/test/test_pad.py @@ -0,0 +1,29 @@ + +import numpy as np + +from chaos.backend_ import paddle_ as paddle, torch_ as torch + + +def test_pad(): + import random + + for ndim in range(2, 6): + for test in range(5): + shape = [random.randint(5, 10) for _ in range(ndim)] + x = np.random.rand(*shape) + + torch_input = torch.from_numpy(x) + paddle_input = paddle.from_numpy(x) + + for rank in range(1, ndim + 1): + + pad = [random.randint(0, 10) for _ in range(rank)] + [random.randint(0, 10) for _ in range(rank)] + + torch_output = torch.nn.functional.pad(torch_input, pad, mode='constant', value=0.0) + + paddle_output = paddle.nn.functional.pad(paddle_input, pad, mode='constant', value=0.0) + + paddle_out = paddle_output.numpy() + torch_out = torch_output.numpy() + + assert np.allclose(paddle_out, torch_out) \ No newline at end of file diff --git a/RE/paddleext/test/test_scatter.py b/RE/paddleext/test/test_scatter.py new file mode 100644 index 00000000..52c2d29a --- /dev/null +++ b/RE/paddleext/test/test_scatter.py @@ -0,0 +1,146 @@ +from chaos.backend_ import paddle_ as paddle, torch_ as torch +import numpy as np + +def test_scatter_1d(): + + x = np.random.rand(100) + + indices = np.random.randint(low=0, high=100, size=50) + updates = np.random.rand(50) + + paddle_out = paddle.scatter(paddle.from_numpy(x), 0, paddle.from_numpy(indices), paddle.from_numpy(updates)) + torch_out = torch.scatter(torch.from_numpy(x), 0, torch.from_numpy(indices), torch.from_numpy(updates)) + + paddle_out = paddle_out.numpy() + torch_out = torch_out.numpy() + + assert np.all(paddle_out == torch_out) + + +def test_scatter_2d_dim0(): + + dim0 = 101 + dim1 = 31 + x = np.random.rand(dim0, dim1) + + # for dim = 0 + + import random + + indices = list(range(dim0)) + random.shuffle(indices) + indices = np.array(indices[:50]).reshape((25, 2)) + updates = np.random.rand(indices.shape[0], 2) + + torch_out = torch.scatter(torch.from_numpy(x), 0, torch.from_numpy(indices), torch.from_numpy(updates)) + paddle_out = paddle.scatter(paddle.from_numpy(x), 0, paddle.from_numpy(indices), paddle.from_numpy(updates)) + + paddle_out = paddle_out.numpy() + torch_out = torch_out.numpy() + + assert np.allclose(paddle_out, torch_out) + + +def test_scatter_2d_dim1(): + + dim0 = 101 + dim1 = 131 + x = np.random.rand(dim0, dim1) + + # for dim = 0 + + import random + + indices = list(range(dim1)) + random.shuffle(indices) + indices = np.array(indices[:50]).reshape((25, 2)) + updates = np.random.rand(indices.shape[0], 2) + + torch_out = torch.scatter(torch.from_numpy(x), 1, torch.from_numpy(indices), torch.from_numpy(updates)) + paddle_out = paddle.scatter(paddle.from_numpy(x), 1, paddle.from_numpy(indices), paddle.from_numpy(updates)) + + paddle_out = paddle_out.numpy() + torch_out = torch_out.numpy() + + assert np.allclose(paddle_out, torch_out) + + +def test_scatter_nd_dimm(): + import random, math + + for rank in range(1, 6): + for test in range(10): + dim = random.randint(0, rank-1) + + shape = [random.randint(5, 10) for _ in range(rank)] + + indice_shape = [random.randint(5, 10) for _ in range(rank)] + indice_shape = [min(shape[i], indice_shape[i]) for i in range(rank)] + indice_numel = math.prod(indice_shape) + + shape[dim] = 2 * indice_numel + + x = np.random.rand(*shape) + + indice_value = list(range(shape[dim])) + random.shuffle(indice_value) + + indices = np.array(indice_value[:indice_numel]).reshape(indice_shape) + updates = np.random.rand(*indice_shape) + + torch_out = torch.scatter(torch.from_numpy(x), dim, torch.from_numpy(indices), torch.from_numpy(updates)) + paddle_out = paddle.scatter(paddle.from_numpy(x), dim, paddle.from_numpy(indices), paddle.from_numpy(updates)) + + paddle_out = paddle_out.numpy() + torch_out = torch_out.numpy() + + assert np.allclose(paddle_out, torch_out) + +def test_scatter_add_1d(): + + x = np.random.rand(100) + + indices = np.random.randint(low=0, high=100, size=50) + updates = np.random.rand(50) + + paddle_out = paddle.scatter_add(paddle.from_numpy(x), 0, paddle.from_numpy(indices), paddle.from_numpy(updates)) + torch_out = torch.scatter_add(torch.from_numpy(x), 0, torch.from_numpy(indices), torch.from_numpy(updates)) + + paddle_out = paddle_out.numpy() + torch_out = torch_out.numpy() + + assert np.all(paddle_out == torch_out) + +def test_scatter_add_nd_dimm(): + import random, math + + for rank in range(1, 6): + for test in range(10): + dim = random.randint(0, rank-1) + + shape = [random.randint(5, 10) for _ in range(rank)] + + indice_shape = [random.randint(5, 10) for _ in range(rank)] + indice_shape = [min(shape[i], indice_shape[i]) for i in range(rank)] + indice_numel = math.prod(indice_shape) + + shape[dim] = 2 * indice_numel + + x = np.random.rand(*shape) + + + indice_value = list(range(shape[dim])) + random.shuffle(indice_value) + + indices = np.array(indice_value[:indice_numel]).reshape(indice_shape) + + # indices = np.random.randint(0, shape[dim], size=indice_shape) + updates = np.random.rand(*indice_shape) + + torch_out = torch.scatter_add(torch.from_numpy(x), dim, torch.from_numpy(indices), torch.from_numpy(updates)) + paddle_out = paddle.scatter_add(paddle.from_numpy(x), dim, paddle.from_numpy(indices), paddle.from_numpy(updates)) + + paddle_out = paddle_out.numpy() + torch_out = torch_out.numpy() + + assert np.allclose(paddle_out, torch_out) \ No newline at end of file diff --git a/RE/paddlemetric/.gitignore b/RE/paddlemetric/.gitignore new file mode 100644 index 00000000..2f78cf5b --- /dev/null +++ b/RE/paddlemetric/.gitignore @@ -0,0 +1,2 @@ +*.pyc + diff --git a/RE/paddlemetric/.ignore b/RE/paddlemetric/.ignore new file mode 100644 index 00000000..0d20b648 --- /dev/null +++ b/RE/paddlemetric/.ignore @@ -0,0 +1 @@ +*.pyc diff --git a/RE/paddlemetric/CHANGELOG.md b/RE/paddlemetric/CHANGELOG.md new file mode 100644 index 00000000..a843ec77 --- /dev/null +++ b/RE/paddlemetric/CHANGELOG.md @@ -0,0 +1,20 @@ +Changelog +=== +以下记录了项目中所有值得关注的变更内容,其格式基于[Keep a Changelog]。 + +本项目版本遵守[Semantic Versioning]和[PEP-440]。 + +## [v1.0]- 2022-07-04 +--- +### Added +- Support the testing of some classification modules +### Changed + + + + +[v1.0]: https://console.cloud.baidu-int.com/devops/icode/repos/baidu/ccl/torch2paddle/commits/7476c4f8477d6161f8d5aaaf78f47d6bee990d42 + +[Keep a Changelog]: https://keepachangelog.com/zh-CN/1.0.0/ +[Semantic Versioning]: https://semver.org/lang/zh-CN/ +[PEP-440]: https://www.python.org/dev/peps/pep-0440/ diff --git a/RE/paddlemetric/src/README.md b/RE/paddlemetric/src/README.md new file mode 100644 index 00000000..1b8004fc --- /dev/null +++ b/RE/paddlemetric/src/README.md @@ -0,0 +1,28 @@ +# Paddle Metrics + +Metrics library for paddle, porting from torch metrics +## Install + +pip install http://public.bcc-bdbl.baidu.com:8000/Package/paddlemetrics-1.0.0b0-py3-none-any.whl + +## Document + +### Requirements + +* paddleextension + +### Progress + +Testing progress: + +### Classification + +* classification/test_accuracy.py +* classification/test_f_beta.py +* classification/test_precision_recall.py +* classification/test_stat_scores.py + +### functional + +* functional/test_classification.py + diff --git a/RE/paddlemetric/src/paddlemetrics/__about__.py b/RE/paddlemetric/src/paddlemetrics/__about__.py new file mode 100644 index 00000000..53a9cfa4 --- /dev/null +++ b/RE/paddlemetric/src/paddlemetrics/__about__.py @@ -0,0 +1,27 @@ +__version__ = "0.6.0dev" +__author__ = "PyTorchLightning et al." +__author_email__ = "name@pytorchlightning.ai" +__license__ = "Apache-2.0" +__copyright__ = f"Copyright (c) 2020-2021, {__author__}." +__homepage__ = "https://github.com/PyTorchLightning/metrics" +__docs__ = "PyTorch native Metrics" +__docs_url__ = "https://paddlemetrics.readthedocs.io/en/stable/" +__long_doc__ = """ +paddlemetrics is a metrics API created for easy metric development and usage in both PyTorch and +[PyTorch Lightning](https://pytorch-lightning.readthedocs.io/en/stable/). It was originally a part of +Pytorch Lightning, but got split off so users could take advantage of the large collection of metrics +implemented without having to install Pytorch Lightning (even though we would love for you to try it out). +We currently have around 25+ metrics implemented and we continuously is adding more metrics, both within +already covered domains (classification, regression ect.) but also new domains (object detection ect.). +We make sure that all our metrics are rigorously tested such that you can trust them. +""" + +__all__ = [ + "__author__", + "__author_email__", + "__copyright__", + "__docs__", + "__homepage__", + "__license__", + "__version__", +] diff --git a/RE/paddlemetric/src/paddlemetrics/__init__.py b/RE/paddlemetric/src/paddlemetrics/__init__.py new file mode 100644 index 00000000..ea557086 --- /dev/null +++ b/RE/paddlemetric/src/paddlemetrics/__init__.py @@ -0,0 +1,143 @@ +r"""Root package info.""" +import logging as __logging +import os +import sys + +from paddlemetrics.__about__ import * # noqa: F401, F403 + +_logger = __logging.getLogger("paddlemetrics") +_logger.addHandler(__logging.StreamHandler()) +_logger.setLevel(__logging.INFO) + +_PACKAGE_ROOT = os.path.dirname(__file__) +_PROJECT_ROOT = os.path.dirname(_PACKAGE_ROOT) + +from paddlemetrics import functional # noqa: E402 +from paddlemetrics.aggregation import CatMetric, MaxMetric, MeanMetric, MinMetric, SumMetric # noqa: E402 +from paddlemetrics.audio import PESQ, PIT, SI_SDR, SI_SNR, SNR, STOI # noqa: E402 +from paddlemetrics.classification import ( # noqa: E402 + AUC, + AUROC, + F1, + ROC, + Accuracy, + AveragePrecision, + BinnedAveragePrecision, + BinnedPrecisionRecallCurve, + BinnedRecallAtFixedPrecision, + CalibrationError, + CohenKappa, + ConfusionMatrix, + FBeta, + HammingDistance, + Hinge, + IoU, + KLDivergence, + MatthewsCorrcoef, + Precision, + PrecisionRecallCurve, + Recall, + Specificity, + StatScores, +) +from paddlemetrics.collections import MetricCollection # noqa: E402 +#from paddlemetrics.image import FID, IS, KID, LPIPS, PSNR, SSIM # noqa: E402 +from paddlemetrics.metric import Metric # noqa: E402 +from paddlemetrics.regression import ( # noqa: E402 + CosineSimilarity, + ExplainedVariance, + MeanAbsoluteError, + MeanAbsolutePercentageError, + MeanSquaredError, + MeanSquaredLogError, + PearsonCorrcoef, + R2Score, + SpearmanCorrcoef, + SymmetricMeanAbsolutePercentageError, + TweedieDevianceScore, +) +from paddlemetrics.retrieval import ( # noqa: E402 + RetrievalFallOut, + RetrievalHitRate, + RetrievalMAP, + RetrievalMRR, + RetrievalNormalizedDCG, + RetrievalPrecision, + RetrievalRecall, + RetrievalRPrecision, +) +from paddlemetrics.text import WER, BLEUScore, ROUGEScore, SacreBLEUScore # noqa: E402 BERTScore, +from paddlemetrics.wrappers import BootStrapper, MetricTracker, MultioutputWrapper # noqa: E402 + +__all__ = [ + "functional", + "Accuracy", + "AUC", + "AUROC", + "AveragePrecision", + "BinnedAveragePrecision", + "BinnedPrecisionRecallCurve", + "BinnedRecallAtFixedPrecision", +# "BERTScore", + "BLEUScore", + "BootStrapper", + "CalibrationError", + "CatMetric", + "CohenKappa", + "ConfusionMatrix", + "CosineSimilarity", + "TweedieDevianceScore", + "ExplainedVariance", + "F1", + "FBeta", +# "FID", + "HammingDistance", + "Hinge", + "IoU", +# "IS", +# "KID", + "KLDivergence", +# "LPIPS", + "MatthewsCorrcoef", + "MaxMetric", + "MeanAbsoluteError", + "MeanAbsolutePercentageError", + "MeanMetric", + "MeanSquaredError", + "MeanSquaredLogError", + "Metric", + "MetricCollection", + "MetricTracker", + "MinMetric", + "MultioutputWrapper", + "PearsonCorrcoef", + "PESQ", + "PIT", + "Precision", + "PrecisionRecallCurve", +# "PSNR", + "R2Score", + "Recall", + "RetrievalFallOut", + "RetrievalHitRate", + "RetrievalMAP", + "RetrievalMRR", + "RetrievalNormalizedDCG", + "RetrievalPrecision", + "RetrievalRecall", + "RetrievalRPrecision", + "ROC", + "ROUGEScore", + "SacreBLEUScore", + "SI_SDR", + "SI_SNR", + "SNR", + "SpearmanCorrcoef", + "Specificity", +# "SSIM", + "StatScores", + "STOI", + "SumMetric", + "SymmetricMeanAbsolutePercentageError", + "WER", +] diff --git a/RE/paddlemetric/src/paddlemetrics/aggregation.py b/RE/paddlemetric/src/paddlemetrics/aggregation.py new file mode 100644 index 00000000..a95c51c0 --- /dev/null +++ b/RE/paddlemetric/src/paddlemetrics/aggregation.py @@ -0,0 +1,445 @@ +# Copyright The PyTorch Lightning team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import warnings +from typing import Any, Callable, List, Optional, Union + +import paddleext.torchapi as B +from paddleext.torchapi import Tensor + +from paddlemetrics.metric import Metric +from paddlemetrics.utilities.data import dim_zero_cat + + +class BaseAggregator(Metric): + """Base class for aggregation metrics. + + Args: + fn: string specifying the reduction function + default_value: default tensor value to use for the metric state + nan_strategy: options: + - ``'error'``: if any `nan` values are encounted will give a RuntimeError + - ``'warn'``: if any `nan` values are encounted will give a warning and continue + - ``'ignore'``: all `nan` values are silently removed + - a float: if a float is provided will impude any `nan` values with this value + + compute_on_step: + Forward only calls ``update()`` and returns None if this is + set to False. default: True + dist_sync_on_step: + Synchronize metric state across processes at each ``forward()`` + before returning the value at the step. + process_group: + Specify the process group on which synchronization is called. + default: None (which selects the entire world) + dist_sync_fn: + Callback that performs the allgather operation on the metric state. + When `None`, DDP will be used to perform the allgather. + + Raises: + ValueError: + If ``nan_strategy`` is not one of ``error``, ``warn``, ``ignore`` or a float + """ + + value: Tensor + is_differentiable = None + higher_is_better = None + + def __init__( + self, + fn: Union[Callable, str], + default_value: Union[Tensor, List], + nan_strategy: Union[str, float] = "error", + compute_on_step: bool = True, + dist_sync_on_step: bool = False, + process_group: Optional[Any] = None, + dist_sync_fn: Callable = None, + ): + super().__init__( + compute_on_step=compute_on_step, + dist_sync_on_step=dist_sync_on_step, + process_group=process_group, + dist_sync_fn=dist_sync_fn, + ) + allowed_nan_strategy = ("error", "warn", "ignore") + if nan_strategy not in allowed_nan_strategy and not isinstance(nan_strategy, float): + raise ValueError( + f"Arg `nan_strategy` should either be a float or one of {allowed_nan_strategy}" + f" but got {nan_strategy}." + ) + + self.nan_strategy = nan_strategy + self.add_state("value", default=default_value, dist_reduce_fx=fn) + + def _cast_and_nan_check_input(self, x: Union[float, Tensor]) -> Tensor: + """Converts input x to a tensor if not already and afterwards checks for nans that either give an error, + warning or just ignored.""" + if not isinstance(x, Tensor): + x = B.as_tensor(x, dtype=B.float32, device=self.device) + + nans = B.isnan(x) + if any(nans.flatten()): + if self.nan_strategy == "error": + raise RuntimeError("Encounted `nan` values in tensor") + if self.nan_strategy == "warn": + warnings.warn("Encounted `nan` values in tensor. Will be removed.", UserWarning) + x = x[~nans] + elif self.nan_strategy == "ignore": + x = x[~nans] + else: + x[nans] = self.nan_strategy + + return x.float() + + def update(self, value: Union[float, Tensor]) -> None: # type: ignore + """Overwrite in child class.""" + pass + + def compute(self) -> Tensor: + """Compute the aggregated value.""" + return self.value.squeeze() if isinstance(self.value, Tensor) else self.value + + +class MaxMetric(BaseAggregator): + """Aggregate a stream of value into their maximum value. + + Args: + nan_strategy: options: + - ``'error'``: if any `nan` values are encounted will give a RuntimeError + - ``'warn'``: if any `nan` values are encounted will give a warning and continue + - ``'ignore'``: all `nan` values are silently removed + - a float: if a float is provided will impude any `nan` values with this value + + compute_on_step: + Forward only calls ``update()`` and returns None if this is + set to False. default: True + dist_sync_on_step: + Synchronize metric state across processes at each ``forward()`` + before returning the value at the step. + process_group: + Specify the process group on which synchronization is called. + default: None (which selects the entire world) + dist_sync_fn: + Callback that performs the allgather operation on the metric state. + When `None`, DDP will be used to perform the allgather. + + Raises: + ValueError: + If ``nan_strategy`` is not one of ``error``, ``warn``, ``ignore`` or a float + + Example: + >>> from paddlemetrics import MaxMetric + >>> metric = MaxMetric() + >>> metric.update(1) + >>> metric.update(B.tensor([2, 3])) + >>> metric.compute() + tensor(3.) + """ + + def __init__( + self, + nan_strategy: Union[str, float] = "warn", + compute_on_step: bool = True, + dist_sync_on_step: bool = False, + process_group: Optional[Any] = None, + dist_sync_fn: Callable = None, + ): + super().__init__( + "max", + -B.tensor(float("inf")), + nan_strategy, + compute_on_step, + dist_sync_on_step, + process_group, + dist_sync_fn, + ) + + def update(self, value: Union[float, Tensor]) -> None: # type: ignore + """Update state with data. + + Args: + value: Either a float or tensor containing data. Additional tensor + dimensions will be flattened + """ + value = self._cast_and_nan_check_input(value) + if any(value.flatten()): # make sure tensor not empty + self.value = B.max(self.value, B.max(value)) + + +class MinMetric(BaseAggregator): + """Aggregate a stream of value into their minimum value. + + Args: + nan_strategy: options: + - ``'error'``: if any `nan` values are encounted will give a RuntimeError + - ``'warn'``: if any `nan` values are encounted will give a warning and continue + - ``'ignore'``: all `nan` values are silently removed + - a float: if a float is provided will impude any `nan` values with this value + + compute_on_step: + Forward only calls ``update()`` and returns None if this is + set to False. default: True + dist_sync_on_step: + Synchronize metric state across processes at each ``forward()`` + before returning the value at the step. + process_group: + Specify the process group on which synchronization is called. + default: None (which selects the entire world) + dist_sync_fn: + Callback that performs the allgather operation on the metric state. + When `None`, DDP will be used to perform the allgather. + + Raises: + ValueError: + If ``nan_strategy`` is not one of ``error``, ``warn``, ``ignore`` or a float + + Example: + >>> from paddlemetrics import MinMetric + >>> metric = MinMetric() + >>> metric.update(1) + >>> metric.update(B.tensor([2, 3])) + >>> metric.compute() + tensor(1.) + """ + + def __init__( + self, + nan_strategy: Union[str, float] = "warn", + compute_on_step: bool = True, + dist_sync_on_step: bool = False, + process_group: Optional[Any] = None, + dist_sync_fn: Callable = None, + ): + super().__init__( + "min", + B.tensor(float("inf")), + nan_strategy, + compute_on_step, + dist_sync_on_step, + process_group, + dist_sync_fn, + ) + + def update(self, value: Union[float, Tensor]) -> None: # type: ignore + """Update state with data. + + Args: + value: Either a float or tensor containing data. Additional tensor + dimensions will be flattened + """ + value = self._cast_and_nan_check_input(value) + if any(value.flatten()): # make sure tensor not empty + self.value = B.min(self.value, B.min(value)) + + +class SumMetric(BaseAggregator): + """Aggregate a stream of value into their sum. + + Args: + nan_strategy: options: + - ``'error'``: if any `nan` values are encounted will give a RuntimeError + - ``'warn'``: if any `nan` values are encounted will give a warning and continue + - ``'ignore'``: all `nan` values are silently removed + - a float: if a float is provided will impude any `nan` values with this value + + compute_on_step: + Forward only calls ``update()`` and returns None if this is + set to False. default: True + dist_sync_on_step: + Synchronize metric state across processes at each ``forward()`` + before returning the value at the step. + process_group: + Specify the process group on which synchronization is called. + default: None (which selects the entire world) + dist_sync_fn: + Callback that performs the allgather operation on the metric state. + When `None`, DDP will be used to perform the allgather. + + Raises: + ValueError: + If ``nan_strategy`` is not one of ``error``, ``warn``, ``ignore`` or a float + + Example: + >>> from paddlemetrics import SumMetric + >>> metric = SumMetric() + >>> metric.update(1) + >>> metric.update(B.tensor([2, 3])) + >>> metric.compute() + tensor(6.) + """ + + def __init__( + self, + nan_strategy: Union[str, float] = "warn", + compute_on_step: bool = True, + dist_sync_on_step: bool = False, + process_group: Optional[Any] = None, + dist_sync_fn: Callable = None, + ): + super().__init__( + "sum", B.zeros(1), nan_strategy, compute_on_step, dist_sync_on_step, process_group, dist_sync_fn + ) + + def update(self, value: Union[float, Tensor]) -> None: # type: ignore + """Update state with data. + + Args: + value: Either a float or tensor containing data. Additional tensor + dimensions will be flattened + """ + value = self._cast_and_nan_check_input(value) + self.value += value.sum() + + +class CatMetric(BaseAggregator): + """Concatenate a stream of values. + + Args: + nan_strategy: options: + - ``'error'``: if any `nan` values are encounted will give a RuntimeError + - ``'warn'``: if any `nan` values are encounted will give a warning and continue + - ``'ignore'``: all `nan` values are silently removed + - a float: if a float is provided will impude any `nan` values with this value + + compute_on_step: + Forward only calls ``update()`` and returns None if this is + set to False. default: True + dist_sync_on_step: + Synchronize metric state across processes at each ``forward()`` + before returning the value at the step. + process_group: + Specify the process group on which synchronization is called. + default: None (which selects the entire world) + dist_sync_fn: + Callback that performs the allgather operation on the metric state. + When `None`, DDP will be used to perform the allgather. + + Raises: + ValueError: + If ``nan_strategy`` is not one of ``error``, ``warn``, ``ignore`` or a float + + Example: + >>> from paddlemetrics import CatMetric + >>> metric = CatMetric() + >>> metric.update(1) + >>> metric.update(B.tensor([2, 3])) + >>> metric.compute() + tensor([1., 2., 3.]) + """ + + def __init__( + self, + nan_strategy: Union[str, float] = "warn", + compute_on_step: bool = True, + dist_sync_on_step: bool = False, + process_group: Optional[Any] = None, + dist_sync_fn: Callable = None, + ): + super().__init__("cat", [], nan_strategy, compute_on_step, dist_sync_on_step, process_group, dist_sync_fn) + + def update(self, value: Union[float, Tensor]) -> None: # type: ignore + """Update state with data. + + Args: + value: Either a float or tensor containing data. Additional tensor + dimensions will be flattened + """ + value = self._cast_and_nan_check_input(value) + if any(value.flatten()): + self.value.append(value) + + def compute(self) -> Tensor: + """Compute the aggregated value.""" + if isinstance(self.value, list) and self.value: + return dim_zero_cat(self.value) + return self.value + + +class MeanMetric(BaseAggregator): + """Aggregate a stream of value into their mean value. + + Args: + nan_strategy: options: + - ``'error'``: if any `nan` values are encounted will give a RuntimeError + - ``'warn'``: if any `nan` values are encounted will give a warning and continue + - ``'ignore'``: all `nan` values are silently removed + - a float: if a float is provided will impude any `nan` values with this value + + compute_on_step: + Forward only calls ``update()`` and returns None if this is + set to False. default: True + dist_sync_on_step: + Synchronize metric state across processes at each ``forward()`` + before returning the value at the step. + process_group: + Specify the process group on which synchronization is called. + default: None (which selects the entire world) + dist_sync_fn: + Callback that performs the allgather operation on the metric state. + When `None`, DDP will be used to perform the allgather. + + Raises: + ValueError: + If ``nan_strategy`` is not one of ``error``, ``warn``, ``ignore`` or a float + + Example: + >>> from paddlemetrics import MeanMetric + >>> metric = MeanMetric() + >>> metric.update(1) + >>> metric.update(B.tensor([2, 3])) + >>> metric.compute() + tensor([2.]) + """ + + def __init__( + self, + nan_strategy: Union[str, float] = "warn", + compute_on_step: bool = True, + dist_sync_on_step: bool = False, + process_group: Optional[Any] = None, + dist_sync_fn: Callable = None, + ): + super().__init__( + "sum", B.zeros(1), nan_strategy, compute_on_step, dist_sync_on_step, process_group, dist_sync_fn + ) + self.add_state("weight", default=B.zeros(1), dist_reduce_fx="sum") + + def update(self, value: Union[float, Tensor], weight: Union[float, Tensor] = 1.0) -> None: # type: ignore + """Update state with data. + + Args: + value: Either a float or tensor containing data. Additional tensor + dimensions will be flattened + weight: Either a float or tensor containing weights for calculating + the average. Shape of weight should be able to broadcast with + the shape of `value`. Default to `1.0` corresponding to simple + harmonic average. + """ + value = self._cast_and_nan_check_input(value) + weight = self._cast_and_nan_check_input(weight) + + # broadcast weight to values shape + if not hasattr(B, "broadcast_to"): + if weight.shape == (): + weight = B.ones_like(value) * weight + if weight.shape != value.shape: + raise ValueError("Broadcasting not supported on PyTorch <1.8") + else: + weight = B.broadcast_to(weight, value.shape) + + self.value += (value * weight).sum() + self.weight += weight.sum() + + def compute(self) -> Tensor: + """Compute the aggregated value.""" + return self.value / self.weight diff --git a/RE/paddlemetric/src/paddlemetrics/audio/__init__.py b/RE/paddlemetric/src/paddlemetrics/audio/__init__.py new file mode 100644 index 00000000..efd0b451 --- /dev/null +++ b/RE/paddlemetric/src/paddlemetrics/audio/__init__.py @@ -0,0 +1,19 @@ +# Copyright The PyTorch Lightning team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from paddlemetrics.audio.pesq import PESQ # noqa: F401 +from paddlemetrics.audio.pit import PIT # noqa: F401 +from paddlemetrics.audio.si_sdr import SI_SDR # noqa: F401 +from paddlemetrics.audio.si_snr import SI_SNR # noqa: F401 +from paddlemetrics.audio.snr import SNR # noqa: F401 +from paddlemetrics.audio.stoi import STOI # noqa: F401 diff --git a/RE/paddlemetric/src/paddlemetrics/audio/pesq.py b/RE/paddlemetric/src/paddlemetrics/audio/pesq.py new file mode 100644 index 00000000..d45fab53 --- /dev/null +++ b/RE/paddlemetric/src/paddlemetrics/audio/pesq.py @@ -0,0 +1,130 @@ +# Copyright The PyTorch Lightning team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from typing import Any, Callable, Optional + +from paddleext.torchapi import Tensor, tensor + +from paddlemetrics.functional.audio.pesq import pesq +from paddlemetrics.metric import Metric +from paddlemetrics.utilities.imports import _PESQ_AVAILABLE + + +class PESQ(Metric): + """PESQ (Perceptual Evaluation of Speech Quality) + + This is a wrapper for the pesq package [1]. . Note that input will be moved to `cpu` + to perform the metric calculation. + + .. note:: using this metrics requires you to have ``pesq`` install. Either install as ``pip install + paddlemetrics[audio]`` or ``pip install pesq`` + + Forward accepts + + - ``preds``: ``shape [...,time]`` + - ``target``: ``shape [...,time]`` + + Args: + fs: + sampling frequency, should be 16000 or 8000 (Hz) + mode: + 'wb' (wide-band) or 'nb' (narrow-band) + keep_same_device: + whether to move the pesq value to the device of preds + compute_on_step: + Forward only calls ``update()`` and return ``None`` if this is set to ``False``. + dist_sync_on_step: + Synchronize metric state across processes at each ``forward()`` + before returning the value at the step + process_group: + Specify the process group on which synchronization is called. + default: ``None`` (which selects the entire world) + dist_sync_fn: + Callback that performs the allgather operation on the metric state. When ``None``, DDP + will be used to perform the allgather + + Raises: + ValueError: + If ``peqs`` package is not installed + ValueError: + If ``fs`` is not either ``8000`` or ``16000`` + ValueError: + If ``mode`` is not either ``"wb"`` or ``"nb"`` + + Example: + >>> from paddlemetrics.audio import PESQ + >>> import torchapi as B + >>> g = B.manual_seed(1) + >>> preds = B.randn(8000) + >>> target = B.randn(8000) + >>> nb_pesq = PESQ(8000, 'nb') + >>> nb_pesq(preds, target) + tensor(2.2076) + >>> wb_pesq = PESQ(16000, 'wb') + >>> wb_pesq(preds, target) + tensor(1.7359) + + References: + [1] https://github.com/ludlows/python-pesq + """ + + sum_pesq: Tensor + total: Tensor + is_differentiable = False + higher_is_better = True + + def __init__( + self, + fs: int, + mode: str, + compute_on_step: bool = True, + dist_sync_on_step: bool = False, + process_group: Optional[Any] = None, + dist_sync_fn: Optional[Callable[[Tensor], Tensor]] = None, + ) -> None: + super().__init__( + compute_on_step=compute_on_step, + dist_sync_on_step=dist_sync_on_step, + process_group=process_group, + dist_sync_fn=dist_sync_fn, + ) + if not _PESQ_AVAILABLE: + raise ValueError( + "PESQ metric requires that pesq is installed." + "Either install as `pip install paddlemetrics[audio]` or `pip install pesq`" + ) + if fs not in (8000, 16000): + raise ValueError(f"Expected argument `fs` to either be 8000 or 16000 but got {fs}") + self.fs = fs + if mode not in ("wb", "nb"): + raise ValueError(f"Expected argument `mode` to either be 'wb' or 'nb' but got {mode}") + self.mode = mode + + self.add_state("sum_pesq", default=tensor(0.0), dist_reduce_fx="sum") + self.add_state("total", default=tensor(0), dist_reduce_fx="sum") + + def update(self, preds: Tensor, target: Tensor) -> None: # type: ignore + """Update state with predictions and targets. + + Args: + preds: Predictions from model + target: Ground truth values + """ + pesq_batch = pesq(preds, target, self.fs, self.mode, False).to(self.sum_pesq.device) + + self.sum_pesq += pesq_batch.sum() + self.total += pesq_batch.numel() + + def compute(self) -> Tensor: + """Computes average PESQ.""" + return self.sum_pesq / self.total diff --git a/RE/paddlemetric/src/paddlemetrics/audio/pit.py b/RE/paddlemetric/src/paddlemetrics/audio/pit.py new file mode 100644 index 00000000..9d9dc757 --- /dev/null +++ b/RE/paddlemetric/src/paddlemetrics/audio/pit.py @@ -0,0 +1,113 @@ +# Copyright The PyTorch Lightning team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from typing import Any, Callable, Dict, Optional + +from paddleext.torchapi import Tensor, tensor + +from paddlemetrics.functional.audio.pit import pit +from paddlemetrics.metric import Metric + + +class PIT(Metric): + """Permutation invariant training (PIT). The PIT implements the famous Permutation Invariant Training method. + + [1] in speech separation field in order to calculate audio metrics in a permutation invariant way. + + Forward accepts + + - ``preds``: ``shape [batch, spk, ...]`` + - ``target``: ``shape [batch, spk, ...]`` + + Args: + metric_func: + a metric function accept a batch of target and estimate, i.e. metric_func(preds[:, i, ...], + target[:, j, ...]), and returns a batch of metric tensors [batch] + eval_func: + the function to find the best permutation, can be 'min' or 'max', i.e. the smaller the better + or the larger the better. + compute_on_step: + Forward only calls ``update()`` and returns None if this is set to False. default: True + dist_sync_on_step: + Synchronize metric state across processes at each ``forward()`` + before returning the value at the step. + process_group: + Specify the process group on which synchronization is called. default: None (which selects the entire world) + dist_sync_fn: + Callback that performs the allgather operation on the metric state. When `None`, DDP + will be used to perform the allgather. + kwargs: + additional args for metric_func + + Returns: + average PIT metric + + Example: + >>> import torchapi as B + >>> from paddlemetrics import PIT + >>> from paddlemetrics.functional import si_snr + >>> _ = B.manual_seed(42) + >>> preds = B.randn(3, 2, 5) # [batch, spk, time] + >>> target = B.randn(3, 2, 5) # [batch, spk, time] + >>> pit = PIT(si_snr, 'max') + >>> pit(preds, target) + tensor(-2.1065) + + Reference: + [1] D. Yu, M. Kolbaek, Z.-H. Tan, J. Jensen, Permutation invariant training of deep models for + speaker-independent multi-talker speech separation, in: 2017 IEEE Int. Conf. Acoust. Speech + Signal Process. ICASSP, IEEE, New Orleans, LA, 2017: pp. 241–245. https://doi.org/10.1109/ICASSP.2017.7952154. + """ + + is_differentiable = True + sum_pit_metric: Tensor + total: Tensor + + def __init__( + self, + metric_func: Callable, + eval_func: str = "max", + compute_on_step: bool = True, + dist_sync_on_step: bool = False, + process_group: Optional[Any] = None, + dist_sync_fn: Optional[Callable[[Tensor], Tensor]] = None, + **kwargs: Dict[str, Any], + ) -> None: + super().__init__( + compute_on_step=compute_on_step, + dist_sync_on_step=dist_sync_on_step, + process_group=process_group, + dist_sync_fn=dist_sync_fn, + ) + self.metric_func = metric_func + self.eval_func = eval_func + self.kwargs = kwargs + + self.add_state("sum_pit_metric", default=tensor(0.0), dist_reduce_fx="sum") + self.add_state("total", default=tensor(0), dist_reduce_fx="sum") + + def update(self, preds: Tensor, target: Tensor) -> None: # type: ignore + """Update state with predictions and targets. + + Args: + preds: Predictions from model + target: Ground truth values + """ + pit_metric = pit(preds, target, self.metric_func, self.eval_func, **self.kwargs)[0] + + self.sum_pit_metric += pit_metric.sum() + self.total += pit_metric.numel() + + def compute(self) -> Tensor: + """Computes average PIT metric.""" + return self.sum_pit_metric / self.total diff --git a/RE/paddlemetric/src/paddlemetrics/audio/si_sdr.py b/RE/paddlemetric/src/paddlemetrics/audio/si_sdr.py new file mode 100644 index 00000000..f6a46378 --- /dev/null +++ b/RE/paddlemetric/src/paddlemetrics/audio/si_sdr.py @@ -0,0 +1,105 @@ +# Copyright The PyTorch Lightning team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from typing import Any, Callable, Optional + +from paddleext.torchapi import Tensor, tensor + +from paddlemetrics.functional.audio.si_sdr import si_sdr +from paddlemetrics.metric import Metric + + +class SI_SDR(Metric): + """Scale-invariant signal-to-distortion ratio (SI-SDR). The SI-SDR value is in general considered an overall + measure of how good a source sound. + + Forward accepts + + - ``preds``: ``shape [...,time]`` + - ``target``: ``shape [...,time]`` + + Args: + zero_mean: + if to zero mean target and preds or not + compute_on_step: + Forward only calls ``update()`` and returns None if this is set to False. default: True + dist_sync_on_step: + Synchronize metric state across processes at each ``forward()`` + before returning the value at the step. + process_group: + Specify the process group on which synchronization is called. default: None (which selects the entire world) + dist_sync_fn: + Callback that performs the allgather operation on the metric state. When `None`, DDP + will be used to perform the allgather. + + Raises: + TypeError: + if target and preds have a different shape + + Returns: + average si-sdr value + + Example: + >>> import torchapi as B + >>> from paddlemetrics import SI_SDR + >>> target = B.tensor([3.0, -0.5, 2.0, 7.0]) + >>> preds = B.tensor([2.5, 0.0, 2.0, 8.0]) + >>> si_sdr = SI_SDR() + >>> si_sdr_val = si_sdr(preds, target) + >>> si_sdr_val + tensor(18.4030) + + References: + [1] Le Roux, Jonathan, et al. "SDR half-baked or well done." IEEE International Conference on Acoustics, Speech + and Signal Processing (ICASSP) 2019. + """ + + is_differentiable = True + higher_is_better = True + sum_si_sdr: Tensor + total: Tensor + + def __init__( + self, + zero_mean: bool = False, + compute_on_step: bool = True, + dist_sync_on_step: bool = False, + process_group: Optional[Any] = None, + dist_sync_fn: Optional[Callable[[Tensor], Tensor]] = None, + ) -> None: + super().__init__( + compute_on_step=compute_on_step, + dist_sync_on_step=dist_sync_on_step, + process_group=process_group, + dist_sync_fn=dist_sync_fn, + ) + self.zero_mean = zero_mean + + self.add_state("sum_si_sdr", default=tensor(0.0), dist_reduce_fx="sum") + self.add_state("total", default=tensor(0), dist_reduce_fx="sum") + + def update(self, preds: Tensor, target: Tensor) -> None: # type: ignore + """Update state with predictions and targets. + + Args: + preds: Predictions from model + target: Ground truth values + """ + si_sdr_batch = si_sdr(preds=preds, target=target, zero_mean=self.zero_mean) + + self.sum_si_sdr += si_sdr_batch.sum() + self.total += si_sdr_batch.numel() + + def compute(self) -> Tensor: + """Computes average SI-SDR.""" + return self.sum_si_sdr / self.total diff --git a/RE/paddlemetric/src/paddlemetrics/audio/si_snr.py b/RE/paddlemetric/src/paddlemetrics/audio/si_snr.py new file mode 100644 index 00000000..31747a28 --- /dev/null +++ b/RE/paddlemetric/src/paddlemetrics/audio/si_snr.py @@ -0,0 +1,101 @@ +# Copyright The PyTorch Lightning team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from typing import Any, Callable, Optional + +from paddleext.torchapi import Tensor, tensor + +from paddlemetrics.functional.audio.si_snr import si_snr +from paddlemetrics.metric import Metric + + +class SI_SNR(Metric): + """Scale-invariant signal-to-noise ratio (SI-SNR). + + Forward accepts + + - ``preds``: ``shape [...,time]`` + - ``target``: ``shape [...,time]`` + + Args: + compute_on_step: + Forward only calls ``update()`` and returns None if this is set to False. default: True + dist_sync_on_step: + Synchronize metric state across processes at each ``forward()`` + before returning the value at the step. + process_group: + Specify the process group on which synchronization is called. default: None (which selects the entire world) + dist_sync_fn: + Callback that performs the allgather operation on the metric state. When `None`, DDP + will be used to perform the allgather. + + Raises: + TypeError: + if target and preds have a different shape + + Returns: + average si-snr value + + Example: + >>> import torchapi as B + >>> from paddlemetrics import SI_SNR + >>> target = B.tensor([3.0, -0.5, 2.0, 7.0]) + >>> preds = B.tensor([2.5, 0.0, 2.0, 8.0]) + >>> si_snr = SI_SNR() + >>> si_snr_val = si_snr(preds, target) + >>> si_snr_val + tensor(15.0918) + + References: + [1] Y. Luo and N. Mesgarani, "TaSNet: Time-Domain Audio Separation Network for Real-Time, Single-Channel Speech + Separation," 2018 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), 2018, pp. + 696-700, doi: 10.1109/ICASSP.2018.8462116. + """ + + is_differentiable = True + sum_si_snr: Tensor + total: Tensor + higher_is_better = True + + def __init__( + self, + compute_on_step: bool = True, + dist_sync_on_step: bool = False, + process_group: Optional[Any] = None, + dist_sync_fn: Optional[Callable[[Tensor], Tensor]] = None, + ) -> None: + super().__init__( + compute_on_step=compute_on_step, + dist_sync_on_step=dist_sync_on_step, + process_group=process_group, + dist_sync_fn=dist_sync_fn, + ) + + self.add_state("sum_si_snr", default=tensor(0.0), dist_reduce_fx="sum") + self.add_state("total", default=tensor(0), dist_reduce_fx="sum") + + def update(self, preds: Tensor, target: Tensor) -> None: # type: ignore + """Update state with predictions and targets. + + Args: + preds: Predictions from model + target: Ground truth values + """ + si_snr_batch = si_snr(preds=preds, target=target) + + self.sum_si_snr += si_snr_batch.sum() + self.total += si_snr_batch.numel() + + def compute(self) -> Tensor: + """Computes average SI-SNR.""" + return self.sum_si_snr / self.total diff --git a/RE/paddlemetric/src/paddlemetrics/audio/snr.py b/RE/paddlemetric/src/paddlemetrics/audio/snr.py new file mode 100644 index 00000000..683cb8bf --- /dev/null +++ b/RE/paddlemetric/src/paddlemetrics/audio/snr.py @@ -0,0 +1,110 @@ +# Copyright The PyTorch Lightning team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from typing import Any, Callable, Optional + +from paddleext.torchapi import Tensor, tensor + +from paddlemetrics.functional.audio.snr import snr +from paddlemetrics.metric import Metric + + +class SNR(Metric): + r"""Signal-to-noise ratio (SNR_): + + .. math:: + \text{SNR} = \frac{P_{signal}}{P_{noise}} + + where :math:`P` denotes the power of each signal. The SNR metric compares the level + of the desired signal to the level of background noise. Therefore, a high value of + SNR means that the audio is clear. + + Forward accepts + + - ``preds``: ``shape [..., time]`` + - ``target``: ``shape [..., time]`` + + Args: + zero_mean: + if to zero mean target and preds or not + compute_on_step: + Forward only calls ``update()`` and returns None if this is set to False. default: True + dist_sync_on_step: + Synchronize metric state across processes at each ``forward()`` + before returning the value at the step. + process_group: + Specify the process group on which synchronization is called. default: None (which selects the entire world) + dist_sync_fn: + Callback that performs the allgather operation on the metric state. When `None`, DDP + will be used to perform the allgather. + + Raises: + TypeError: + if target and preds have a different shape + + Returns: + average snr value + + Example: + >>> import torchapi as B + >>> from paddlemetrics import SNR + >>> target = B.tensor([3.0, -0.5, 2.0, 7.0]) + >>> preds = B.tensor([2.5, 0.0, 2.0, 8.0]) + >>> snr = SNR() + >>> snr_val = snr(preds, target) + >>> snr_val + tensor(16.1805) + + References: + [1] Le Roux, Jonathan, et al. "SDR half-baked or well done." IEEE International Conference on Acoustics, Speech + and Signal Processing (ICASSP) 2019. + + """ + is_differentiable = True + sum_snr: Tensor + total: Tensor + + def __init__( + self, + zero_mean: bool = False, + compute_on_step: bool = True, + dist_sync_on_step: bool = False, + process_group: Optional[Any] = None, + dist_sync_fn: Optional[Callable[[Tensor], Tensor]] = None, + ) -> None: + super().__init__( + compute_on_step=compute_on_step, + dist_sync_on_step=dist_sync_on_step, + process_group=process_group, + dist_sync_fn=dist_sync_fn, + ) + self.zero_mean = zero_mean + + self.add_state("sum_snr", default=tensor(0.0), dist_reduce_fx="sum") + self.add_state("total", default=tensor(0), dist_reduce_fx="sum") + + def update(self, preds: Tensor, target: Tensor) -> None: # type: ignore + """Update state with predictions and targets. + + Args: + preds: Predictions from model + target: Ground truth values + """ + snr_batch = snr(preds=preds, target=target, zero_mean=self.zero_mean) + + self.sum_snr += snr_batch.sum() + self.total += snr_batch.numel() + + def compute(self) -> Tensor: + """Computes average SNR.""" + return self.sum_snr / self.total diff --git a/RE/paddlemetric/src/paddlemetrics/audio/stoi.py b/RE/paddlemetric/src/paddlemetrics/audio/stoi.py new file mode 100644 index 00000000..1c8cf378 --- /dev/null +++ b/RE/paddlemetric/src/paddlemetrics/audio/stoi.py @@ -0,0 +1,133 @@ +# Copyright The PyTorch Lightning team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from typing import Any, Callable, Optional + +from paddleext.torchapi import Tensor, tensor + +from paddlemetrics.functional.audio.stoi import stoi +from paddlemetrics.metric import Metric +from paddlemetrics.utilities.imports import _PYSTOI_AVAILABLE + + +class STOI(Metric): + r"""STOI (Short Term Objective Intelligibility, see [2,3]), a wrapper for the pystoi package [1]. + Note that input will be moved to `cpu` to perform the metric calculation. + + Intelligibility measure which is highly correlated with the intelligibility of degraded speech signals, e.g., due + to additive noise, single/multi-channel noise reduction, binary masking and vocoded speech as in CI simulations. + The STOI-measure is intrusive, i.e., a function of the clean and degraded speech signals. STOI may be a good + alternative to the speech intelligibility index (SII) or the speech transmission index (STI), when you are + interested in the effect of nonlinear processing to noisy speech, e.g., noise reduction, binary masking algorithms, + on speech intelligibility. Description taken from [Cees Taal's website](http://www.ceestaal.nl/code/). + + .. note:: using this metrics requires you to have ``pystoi`` install. Either install as ``pip install + paddlemetrics[audio]`` or ``pip install pystoi`` + + Forward accepts + + - ``preds``: ``shape [...,time]`` + - ``target``: ``shape [...,time]`` + + Args: + fs: + sampling frequency (Hz) + extended: + whether to use the extended STOI described in [4] + compute_on_step: + Forward only calls ``update()`` and returns None if this is set to False. default: True + dist_sync_on_step: + Synchronize metric state across processes at each ``forward()`` + before returning the value at the step. + process_group: + Specify the process group on which synchronization is called. default: None (which selects the entire world) + dist_sync_fn: + Callback that performs the allgather operation on the metric state. When `None`, DDP + will be used to perform the allgather. + + Returns: + average STOI value + + Raises: + ModuleNotFoundError: + If ``pystoi`` package is not installed + + Example: + >>> from paddlemetrics.audio import STOI + >>> import torchapi as B + >>> g = B.manual_seed(1) + >>> preds = B.randn(8000) + >>> target = B.randn(8000) + >>> stoi = STOI(8000, False) + >>> stoi(preds, target) + tensor(-0.0100) + + References: + [1] https://github.com/mpariente/pystoi + + [2] C.H.Taal, R.C.Hendriks, R.Heusdens, J.Jensen 'A Short-Time Objective Intelligibility Measure for + Time-Frequency Weighted Noisy Speech', ICASSP 2010, Texas, Dallas. + + [3] C.H.Taal, R.C.Hendriks, R.Heusdens, J.Jensen 'An Algorithm for Intelligibility Prediction of + Time-Frequency Weighted Noisy Speech', IEEE Transactions on Audio, Speech, and Language Processing, 2011. + + [4] J. Jensen and C. H. Taal, 'An Algorithm for Predicting the Intelligibility of Speech Masked by Modulated + Noise Maskers', IEEE Transactions on Audio, Speech and Language Processing, 2016. + + """ + sum_stoi: Tensor + total: Tensor + is_differentiable = False + higher_is_better = True + + def __init__( + self, + fs: int, + extended: bool = False, + compute_on_step: bool = True, + dist_sync_on_step: bool = False, + process_group: Optional[Any] = None, + dist_sync_fn: Optional[Callable[[Tensor], Tensor]] = None, + ) -> None: + super().__init__( + compute_on_step=compute_on_step, + dist_sync_on_step=dist_sync_on_step, + process_group=process_group, + dist_sync_fn=dist_sync_fn, + ) + if not _PYSTOI_AVAILABLE: + raise ModuleNotFoundError( + "STOI metric requires that pystoi is installed." + " Either install as `pip install paddlemetrics[audio]` or `pip install pystoi`" + ) + self.fs = fs + self.extended = extended + + self.add_state("sum_stoi", default=tensor(0.0), dist_reduce_fx="sum") + self.add_state("total", default=tensor(0), dist_reduce_fx="sum") + + def update(self, preds: Tensor, target: Tensor) -> None: # type: ignore + """Update state with predictions and targets. + + Args: + preds: Predictions from model + target: Ground truth values + """ + stoi_batch = stoi(preds, target, self.fs, self.extended, False).to(self.sum_stoi.device) + + self.sum_stoi += stoi_batch.sum() + self.total += stoi_batch.numel() + + def compute(self) -> Tensor: + """Computes average STOI.""" + return self.sum_stoi / self.total diff --git a/RE/paddlemetric/src/paddlemetrics/classification/__init__.py b/RE/paddlemetric/src/paddlemetrics/classification/__init__.py new file mode 100644 index 00000000..e928018b --- /dev/null +++ b/RE/paddlemetric/src/paddlemetrics/classification/__init__.py @@ -0,0 +1,34 @@ +# Copyright The PyTorch Lightning team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from paddlemetrics.classification.accuracy import Accuracy # noqa: F401 +from paddlemetrics.classification.auc import AUC # noqa: F401 +from paddlemetrics.classification.auroc import AUROC # noqa: F401 +from paddlemetrics.classification.average_precision import AveragePrecision # noqa: F401 +from paddlemetrics.classification.binned_precision_recall import BinnedAveragePrecision # noqa: F401 +from paddlemetrics.classification.binned_precision_recall import BinnedPrecisionRecallCurve # noqa: F401 +from paddlemetrics.classification.binned_precision_recall import BinnedRecallAtFixedPrecision # noqa: F401 +from paddlemetrics.classification.calibration_error import CalibrationError # noqa: F401 +from paddlemetrics.classification.cohen_kappa import CohenKappa # noqa: F401 +from paddlemetrics.classification.confusion_matrix import ConfusionMatrix # noqa: F401 +from paddlemetrics.classification.f_beta import F1, FBeta # noqa: F401 +from paddlemetrics.classification.hamming_distance import HammingDistance # noqa: F401 +from paddlemetrics.classification.hinge import Hinge # noqa: F401 +from paddlemetrics.classification.iou import IoU # noqa: F401 +from paddlemetrics.classification.kl_divergence import KLDivergence # noqa: F401 +from paddlemetrics.classification.matthews_corrcoef import MatthewsCorrcoef # noqa: F401 +from paddlemetrics.classification.precision_recall import Precision, Recall # noqa: F401 +from paddlemetrics.classification.precision_recall_curve import PrecisionRecallCurve # noqa: F401 +from paddlemetrics.classification.roc import ROC # noqa: F401 +from paddlemetrics.classification.specificity import Specificity # noqa: F401 +from paddlemetrics.classification.stat_scores import StatScores # noqa: F401 diff --git a/RE/paddlemetric/src/paddlemetrics/classification/accuracy.py b/RE/paddlemetric/src/paddlemetrics/classification/accuracy.py new file mode 100644 index 00000000..325a18d4 --- /dev/null +++ b/RE/paddlemetric/src/paddlemetrics/classification/accuracy.py @@ -0,0 +1,276 @@ +# Copyright The PyTorch Lightning team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from typing import Any, Callable, Optional + +from paddleext.torchapi import Tensor, tensor + +from paddlemetrics.functional.classification.accuracy import ( + _accuracy_compute, + _accuracy_update, + _check_subset_validity, + _mode, + _subset_accuracy_compute, + _subset_accuracy_update, +) +from paddlemetrics.utilities.enums import DataType + +from paddlemetrics.classification.stat_scores import StatScores # isort:skip + + +class Accuracy(StatScores): + r""" + Computes Accuracy_: + + .. math:: + \text{Accuracy} = \frac{1}{N}\sum_i^N 1(y_i = \hat{y}_i) + + Where :math:`y` is a tensor of target values, and :math:`\hat{y}` is a + tensor of predictions. + + For multi-class and multi-dimensional multi-class data with probability or logits predictions, the + parameter ``top_k`` generalizes this metric to a Top-K accuracy metric: for each sample the + top-K highest probability or logit score items are considered to find the correct label. + + For multi-label and multi-dimensional multi-class inputs, this metric computes the "global" + accuracy by default, which counts all labels or sub-samples separately. This can be + changed to subset accuracy (which requires all labels or sub-samples in the sample to + be correctly predicted) by setting ``subset_accuracy=True``. + + Accepts all input types listed in :ref:`references/modules:input types`. + + Args: + num_classes: + Number of classes. Necessary for ``'macro'``, ``'weighted'`` and ``None`` average methods. + threshold: + Threshold for transforming probability or logit predictions to binary (0,1) predictions, in the case + of binary or multi-label inputs. Default value of 0.5 corresponds to input being probabilities. + average: + Defines the reduction that is applied. Should be one of the following: + + - ``'micro'`` [default]: Calculate the metric globally, across all samples and classes. + - ``'macro'``: Calculate the metric for each class separately, and average the + metrics across classes (with equal weights for each class). + - ``'weighted'``: Calculate the metric for each class separately, and average the + metrics across classes, weighting each class by its support (``tp + fn``). + - ``'none'`` or ``None``: Calculate the metric for each class separately, and return + the metric for every class. + - ``'samples'``: Calculate the metric for each sample, and average the metrics + across samples (with equal weights for each sample). + + .. note:: What is considered a sample in the multi-dimensional multi-class case + depends on the value of ``mdmc_average``. + + .. note:: If ``'none'`` and a given class doesn't occur in the `preds` or `target`, + the value for the class will be ``nan``. + + mdmc_average: + Defines how averaging is done for multi-dimensional multi-class inputs (on top of the + ``average`` parameter). Should be one of the following: + + - ``None`` [default]: Should be left unchanged if your data is not multi-dimensional + multi-class. + + - ``'samplewise'``: In this case, the statistics are computed separately for each + sample on the ``N`` axis, and then averaged over samples. + The computation for each sample is done by treating the flattened extra axes ``...`` + (see :ref:`references/modules:input types`) as the ``N`` dimension within the sample, + and computing the metric for the sample based on that. + + - ``'global'``: In this case the ``N`` and ``...`` dimensions of the inputs + (see :ref:`references/modules:input types`) + are flattened into a new ``N_X`` sample axis, i.e. the inputs are treated as if they + were ``(N_X, C)``. From here on the ``average`` parameter applies as usual. + + ignore_index: + Integer specifying a target class to ignore. If given, this class index does not contribute + to the returned score, regardless of reduction method. If an index is ignored, and ``average=None`` + or ``'none'``, the score for the ignored class will be returned as ``nan``. + + top_k: + Number of highest probability or logit score predictions considered to find the correct label, + relevant only for (multi-dimensional) multi-class inputs. The + default value (``None``) will be interpreted as 1 for these inputs. + + Should be left at default (``None``) for all other types of inputs. + + multiclass: + Used only in certain special cases, where you want to treat inputs as a different type + than what they appear to be. See the parameter's + :ref:`documentation section ` + for a more detailed explanation and examples. + + subset_accuracy: + Whether to compute subset accuracy for multi-label and multi-dimensional + multi-class inputs (has no effect for other input types). + + - For multi-label inputs, if the parameter is set to ``True``, then all labels for + each sample must be correctly predicted for the sample to count as correct. If it + is set to ``False``, then all labels are counted separately - this is equivalent to + flattening inputs beforehand (i.e. ``preds = preds.flatten()`` and same for ``target``). + + - For multi-dimensional multi-class inputs, if the parameter is set to ``True``, then all + sub-sample (on the extra axis) must be correct for the sample to be counted as correct. + If it is set to ``False``, then all sub-samples are counter separately - this is equivalent, + in the case of label predictions, to flattening the inputs beforehand (i.e. + ``preds = preds.flatten()`` and same for ``target``). Note that the ``top_k`` parameter + still applies in both cases, if set. + + compute_on_step: + Forward only calls ``update()`` and return ``None`` if this is set to ``False``. + dist_sync_on_step: + Synchronize metric state across processes at each ``forward()`` + before returning the value at the step + process_group: + Specify the process group on which synchronization is called. + default: ``None`` (which selects the entire world) + dist_sync_fn: + Callback that performs the allgather operation on the metric state. When ``None``, DDP + will be used to perform the allgather + + Raises: + ValueError: + If ``top_k`` is not an ``integer`` larger than ``0``. + ValueError: + If ``average`` is none of ``"micro"``, ``"macro"``, ``"weighted"``, ``"samples"``, ``"none"``, ``None``. + ValueError: + If two different input modes are provided, eg. using ``multi-label`` with ``multi-class``. + ValueError: + If ``top_k`` parameter is set for ``multi-label`` inputs. + + Example: + >>> import paddleext.torchapi as B + >>> from paddlemetrics import Accuracy + >>> target = B.tensor([0, 1, 2, 3]) + >>> preds = B.tensor([0, 2, 1, 3]) + >>> accuracy = Accuracy() + >>> accuracy(preds, target) + tensor(0.5000) + + >>> target = B.tensor([0, 1, 2]) + >>> preds = B.tensor([[0.1, 0.9, 0], [0.3, 0.1, 0.6], [0.2, 0.5, 0.3]]) + >>> accuracy = Accuracy(top_k=2) + >>> accuracy(preds, target) + tensor(0.6667) + + """ + is_differentiable = False + correct: Tensor + total: Tensor + + def __init__( + self, + threshold: float = 0.5, + num_classes: Optional[int] = None, + average: str = "micro", + mdmc_average: Optional[str] = "global", + ignore_index: Optional[int] = None, + top_k: Optional[int] = None, + multiclass: Optional[bool] = None, + subset_accuracy: bool = False, + compute_on_step: bool = True, + dist_sync_on_step: bool = False, + process_group: Optional[Any] = None, + dist_sync_fn: Callable = None, + ) -> None: + allowed_average = ["micro", "macro", "weighted", "samples", "none", None] + if average not in allowed_average: + raise ValueError(f"The `average` has to be one of {allowed_average}, got {average}.") + + super().__init__( + reduce="macro" if average in ["weighted", "none", None] else average, + mdmc_reduce=mdmc_average, + threshold=threshold, + top_k=top_k, + num_classes=num_classes, + multiclass=multiclass, + ignore_index=ignore_index, + compute_on_step=compute_on_step, + dist_sync_on_step=dist_sync_on_step, + process_group=process_group, + dist_sync_fn=dist_sync_fn, + ) + + self.add_state("correct", default=tensor(0), dist_reduce_fx="sum") + self.add_state("total", default=tensor(0), dist_reduce_fx="sum") + + if top_k is not None and (not isinstance(top_k, int) or top_k <= 0): + raise ValueError(f"The `top_k` should be an integer larger than 0, got {top_k}") + + self.average = average + self.threshold = threshold + self.top_k = top_k + self.subset_accuracy = subset_accuracy + self.mode: DataType = None # type: ignore + self.multiclass = multiclass + + def update(self, preds: Tensor, target: Tensor) -> None: # type: ignore + """Update state with predictions and targets. See + :ref:`references/modules:input types` for more information on input + types. + + Args: + preds: Predictions from model (logits, probabilities, or labels) + target: Ground truth labels + """ + """ returns the mode of the data (binary, multi label, multi class, multi-dim multi class) """ + mode = _mode(preds, target, self.threshold, self.top_k, self.num_classes, self.multiclass) + + if not self.mode: + self.mode = mode + elif self.mode != mode: + raise ValueError(f"You can not use {mode} inputs with {self.mode} inputs.") + + if self.subset_accuracy and not _check_subset_validity(self.mode): + self.subset_accuracy = False + + if self.subset_accuracy: + correct, total = _subset_accuracy_update(preds, target, threshold=self.threshold, top_k=self.top_k) + self.correct += correct + self.total += total + else: + if not self.mode: + raise RuntimeError("You have to have determined mode.") + tp, fp, tn, fn = _accuracy_update( + preds, + target, + reduce=self.reduce, + mdmc_reduce=self.mdmc_reduce, + threshold=self.threshold, + num_classes=self.num_classes, + top_k=self.top_k, + multiclass=self.multiclass, + ignore_index=self.ignore_index, + mode=self.mode, + ) + + # Update states + if self.reduce != "samples" and self.mdmc_reduce != "samplewise": + self.tp += tp + self.fp += fp + self.tn += tn + self.fn += fn + else: + self.tp.append(tp) + self.fp.append(fp) + self.tn.append(tn) + self.fn.append(fn) + + def compute(self) -> Tensor: + """Computes accuracy based on inputs passed in to ``update`` previously.""" + if not self.mode: + raise RuntimeError("You have to have determined mode.") + if self.subset_accuracy: + return _subset_accuracy_compute(self.correct, self.total) + tp, fp, tn, fn = self._get_final_stats() + return _accuracy_compute(tp, fp, tn, fn, self.average, self.mdmc_reduce, self.mode) diff --git a/RE/paddlemetric/src/paddlemetrics/classification/auc.py b/RE/paddlemetric/src/paddlemetrics/classification/auc.py new file mode 100644 index 00000000..99b64048 --- /dev/null +++ b/RE/paddlemetric/src/paddlemetrics/classification/auc.py @@ -0,0 +1,91 @@ +# Copyright The PyTorch Lightning team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from typing import Any, Callable, List, Optional + +from paddleext.torchapi import Tensor + +from paddlemetrics.functional.classification.auc import _auc_compute, _auc_update +from paddlemetrics.metric import Metric +from paddlemetrics.utilities import rank_zero_warn +from paddlemetrics.utilities.data import dim_zero_cat + + +class AUC(Metric): + r""" + Computes Area Under the Curve (AUC) using the trapezoidal rule + + Forward accepts two input tensors that should be 1D and have the same number + of elements + + Args: + reorder: AUC expects its first input to be sorted. If this is not the case, + setting this argument to ``True`` will use a stable sorting algorithm to + sort the input in descending order + compute_on_step: + Forward only calls ``update()`` and return None if this is set to False. + dist_sync_on_step: + Synchronize metric state across processes at each ``forward()`` + before returning the value at the step. + process_group: + Specify the process group on which synchronization is called. default: None (which selects the entire world) + dist_sync_fn: + Callback that performs the ``allgather`` operation on the metric state. When ``None``, DDP + will be used to perform the ``allgather``. + """ + is_differentiable = False + x: List[Tensor] + y: List[Tensor] + + def __init__( + self, + reorder: bool = False, + compute_on_step: bool = True, + dist_sync_on_step: bool = False, + process_group: Optional[Any] = None, + dist_sync_fn: Callable = None, + ) -> None: + super().__init__( + compute_on_step=compute_on_step, + dist_sync_on_step=dist_sync_on_step, + process_group=process_group, + dist_sync_fn=dist_sync_fn, + ) + + self.reorder = reorder + + self.add_state("x", default=[], dist_reduce_fx="cat") + self.add_state("y", default=[], dist_reduce_fx="cat") + + rank_zero_warn( + "Metric `AUC` will save all targets and predictions in buffer." + " For large datasets this may lead to large memory footprint." + ) + + def update(self, preds: Tensor, target: Tensor) -> None: # type: ignore + """Update state with predictions and targets. + + Args: + preds: Predictions from model (probabilities, or labels) + target: Ground truth labels + """ + x, y = _auc_update(preds, target) + + self.x.append(x) + self.y.append(y) + + def compute(self) -> Tensor: + """Computes AUC based on inputs passed in to ``update`` previously.""" + x = dim_zero_cat(self.x) + y = dim_zero_cat(self.y) + return _auc_compute(x, y, reorder=self.reorder) diff --git a/RE/paddlemetric/src/paddlemetrics/classification/auroc.py b/RE/paddlemetric/src/paddlemetrics/classification/auroc.py new file mode 100644 index 00000000..6236391d --- /dev/null +++ b/RE/paddlemetric/src/paddlemetrics/classification/auroc.py @@ -0,0 +1,186 @@ +# Copyright The PyTorch Lightning team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from typing import Any, Callable, List, Optional + +import paddleext.torchapi as B +from paddleext.torchapi import Tensor + +from paddlemetrics.functional.classification.auroc import _auroc_compute, _auroc_update +from paddlemetrics.metric import Metric +from paddlemetrics.utilities import rank_zero_warn +from paddlemetrics.utilities.data import dim_zero_cat +from paddlemetrics.utilities.enums import DataType +from paddlemetrics.utilities.imports import _TORCH_LOWER_1_6 + + +class AUROC(Metric): + r"""Compute Area Under the Receiver Operating Characteristic Curve (`ROC AUC`_). + Works for both binary, multilabel and multiclass problems. In the case of + multiclass, the values will be calculated based on a one-vs-the-rest approach. + + Forward accepts + + - ``preds`` (float tensor): ``(N, ...)`` (binary) or ``(N, C, ...)`` (multiclass) tensor + with probabilities, where C is the number of classes. + + - ``target`` (long tensor): ``(N, ...)`` or ``(N, C, ...)`` with integer labels + + For non-binary input, if the ``preds`` and ``target`` tensor have the same + size the input will be interpretated as multilabel and if ``preds`` have one + dimension more than the ``target`` tensor the input will be interpretated as + multiclass. + + Args: + num_classes: integer with number of classes for multi-label and multiclass problems. + Should be set to ``None`` for binary problems + pos_label: integer determining the positive class. Default is ``None`` + which for binary problem is translate to 1. For multiclass problems + this argument should not be set as we iteratively change it in the + range [0,num_classes-1] + average: + - ``'micro'`` computes metric globally. Only works for multilabel problems + - ``'macro'`` computes metric for each class and uniformly averages them + - ``'weighted'`` computes metric for each class and does a weighted-average, + where each class is weighted by their support (accounts for class imbalance) + - ``None`` computes and returns the metric per class + max_fpr: + If not ``None``, calculates standardized partial AUC over the + range [0, max_fpr]. Should be a float between 0 and 1. + compute_on_step: + Forward only calls ``update()`` and return None if this is set to False. default: True + dist_sync_on_step: + Synchronize metric state across processes at each ``forward()`` + before returning the value at the step. + process_group: + Specify the process group on which synchronization is called. default: None (which selects the entire world) + dist_sync_fn: + Callback that performs the allgather operation on the metric state. When ``None``, DDP + will be used to perform the allgather + + Raises: + ValueError: + If ``average`` is none of ``None``, ``"macro"`` or ``"weighted"``. + ValueError: + If ``max_fpr`` is not a ``float`` in the range ``(0, 1]``. + RuntimeError: + If ``PyTorch version`` is ``below 1.6`` since max_fpr requires ``B.bucketize`` + which is not available below 1.6. + ValueError: + If the mode of data (binary, multi-label, multi-class) changes between batches. + + Example (binary case): + >>> from paddlemetrics import AUROC + >>> preds = B.tensor([0.13, 0.26, 0.08, 0.19, 0.34]) + >>> target = B.tensor([0, 0, 1, 1, 1]) + >>> auroc = AUROC(pos_label=1) + >>> auroc(preds, target) + tensor(0.5000) + + Example (multiclass case): + >>> preds = B.tensor([[0.90, 0.05, 0.05], + ... [0.05, 0.90, 0.05], + ... [0.05, 0.05, 0.90], + ... [0.85, 0.05, 0.10], + ... [0.10, 0.10, 0.80]]) + >>> target = B.tensor([0, 1, 1, 2, 2]) + >>> auroc = AUROC(num_classes=3) + >>> auroc(preds, target) + tensor(0.7778) + + """ + is_differentiable = False + preds: List[Tensor] + target: List[Tensor] + + def __init__( + self, + num_classes: Optional[int] = None, + pos_label: Optional[int] = None, + average: Optional[str] = "macro", + max_fpr: Optional[float] = None, + compute_on_step: bool = True, + dist_sync_on_step: bool = False, + process_group: Optional[Any] = None, + dist_sync_fn: Callable = None, + ) -> None: + super().__init__( + compute_on_step=compute_on_step, + dist_sync_on_step=dist_sync_on_step, + process_group=process_group, + dist_sync_fn=dist_sync_fn, + ) + + self.num_classes = num_classes + self.pos_label = pos_label + self.average = average + self.max_fpr = max_fpr + + allowed_average = (None, "macro", "weighted", "micro") + if self.average not in allowed_average: + raise ValueError( + f"Argument `average` expected to be one of the following: {allowed_average} but got {average}" + ) + + if self.max_fpr is not None: + if not isinstance(max_fpr, float) or not 0 < max_fpr <= 1: + raise ValueError(f"`max_fpr` should be a float in range (0, 1], got: {max_fpr}") + + if _TORCH_LOWER_1_6: + raise RuntimeError( + "`max_fpr` argument requires `B.bucketize` which is not available below PyTorch version 1.6" + ) + + self.mode: DataType = None # type: ignore + self.add_state("preds", default=[], dist_reduce_fx="cat") + self.add_state("target", default=[], dist_reduce_fx="cat") + + rank_zero_warn( + "Metric `AUROC` will save all targets and predictions in buffer." + " For large datasets this may lead to large memory footprint." + ) + + def update(self, preds: Tensor, target: Tensor) -> None: # type: ignore + """Update state with predictions and targets. + + Args: + preds: Predictions from model (probabilities, or labels) + target: Ground truth labels + """ + preds, target, mode = _auroc_update(preds, target) + + self.preds.append(preds) + self.target.append(target) + + if self.mode and self.mode != mode: + raise ValueError( + "The mode of data (binary, multi-label, multi-class) should be constant, but changed" + f" between batches from {self.mode} to {mode}" + ) + self.mode = mode + + def compute(self) -> Tensor: + """Computes AUROC based on inputs passed in to ``update`` previously.""" + if not self.mode: + raise RuntimeError("You have to have determined mode.") + preds = dim_zero_cat(self.preds) + target = dim_zero_cat(self.target) + return _auroc_compute( + preds, + target, + self.mode, + self.num_classes, + self.pos_label, + self.average, + self.max_fpr, + ) diff --git a/RE/paddlemetric/src/paddlemetrics/classification/average_precision.py b/RE/paddlemetric/src/paddlemetrics/classification/average_precision.py new file mode 100644 index 00000000..0e37da58 --- /dev/null +++ b/RE/paddlemetric/src/paddlemetrics/classification/average_precision.py @@ -0,0 +1,147 @@ +# Copyright The PyTorch Lightning team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from typing import Any, List, Optional, Union + +import paddleext.torchapi as B +from paddleext.torchapi import Tensor + +from paddlemetrics.functional.classification.average_precision import ( + _average_precision_compute, + _average_precision_update, +) +from paddlemetrics.metric import Metric +from paddlemetrics.utilities import rank_zero_warn +from paddlemetrics.utilities.data import dim_zero_cat + + +class AveragePrecision(Metric): + """Computes the average precision score, which summarises the precision recall curve into one number. Works for + both binary and multiclass problems. In the case of multiclass, the values will be calculated based on a one- + vs-the-rest approach. + + Forward accepts + + - ``preds`` (float tensor): ``(N, ...)`` (binary) or ``(N, C, ...)`` (multiclass) tensor + with probabilities, where C is the number of classes. + + - ``target`` (long tensor): ``(N, ...)`` with integer labels + + Args: + num_classes: integer with number of classes. Not nessesary to provide + for binary problems. + pos_label: integer determining the positive class. Default is ``None`` + which for binary problem is translate to 1. For multiclass problems + this argument should not be set as we iteratively change it in the + range [0,num_classes-1] + average: + defines the reduction that is applied in the case of multiclass and multilabel input. + Should be one of the following: + + - ``'macro'`` [default]: Calculate the metric for each class separately, and average the + metrics across classes (with equal weights for each class). + - ``'micro'``: Calculate the metric globally, across all samples and classes. Cannot be + used with multiclass input. + - ``'weighted'``: Calculate the metric for each class separately, and average the + metrics across classes, weighting each class by its support. + - ``'none'`` or ``None``: Calculate the metric for each class separately, and return + the metric for every class. + + compute_on_step: + Forward only calls ``update()`` and return None if this is set to False. default: True + dist_sync_on_step: + Synchronize metric state across processes at each ``forward()`` + before returning the value at the step. default: False + process_group: + Specify the process group on which synchronization is called. default: None (which selects the entire world) + + Example (binary case): + >>> from paddlemetrics import AveragePrecision + >>> pred = B.tensor([0, 1, 2, 3]) + >>> target = B.tensor([0, 1, 1, 1]) + >>> average_precision = AveragePrecision(pos_label=1) + >>> average_precision(pred, target) + tensor(1.) + + Example (multiclass case): + >>> pred = B.tensor([[0.75, 0.05, 0.05, 0.05, 0.05], + ... [0.05, 0.75, 0.05, 0.05, 0.05], + ... [0.05, 0.05, 0.75, 0.05, 0.05], + ... [0.05, 0.05, 0.05, 0.75, 0.05]]) + >>> target = B.tensor([0, 1, 3, 2]) + >>> average_precision = AveragePrecision(num_classes=5, average=None) + >>> average_precision(pred, target) + [tensor(1.), tensor(1.), tensor(0.2500), tensor(0.2500), tensor(nan)] + """ + + is_differentiable = False + preds: List[Tensor] + target: List[Tensor] + + def __init__( + self, + num_classes: Optional[int] = None, + pos_label: Optional[int] = None, + average: Optional[str] = "macro", + compute_on_step: bool = True, + dist_sync_on_step: bool = False, + process_group: Optional[Any] = None, + ) -> None: + super().__init__( + compute_on_step=compute_on_step, + dist_sync_on_step=dist_sync_on_step, + process_group=process_group, + ) + + self.num_classes = num_classes + self.pos_label = pos_label + allowed_average = ("micro", "macro", "weighted", None) + if average not in allowed_average: + raise ValueError(f"Expected argument `average` to be one of {allowed_average}" f" but got {average}") + self.average = average + + self.add_state("preds", default=[], dist_reduce_fx="cat") + self.add_state("target", default=[], dist_reduce_fx="cat") + + rank_zero_warn( + "Metric `AveragePrecision` will save all targets and predictions in buffer." + " For large datasets this may lead to large memory footprint." + ) + + def update(self, preds: Tensor, target: Tensor) -> None: # type: ignore + """Update state with predictions and targets. + + Args: + preds: Predictions from model + target: Ground truth values + """ + preds, target, num_classes, pos_label = _average_precision_update( + preds, target, self.num_classes, self.pos_label, self.average + ) + self.preds.append(preds) + self.target.append(target) + self.num_classes = num_classes + self.pos_label = pos_label + + def compute(self) -> Union[Tensor, List[Tensor]]: + """Compute the average precision score. + + Returns: + tensor with average precision. If multiclass will return list + of such tensors, one for each class + """ + preds = dim_zero_cat(self.preds) + target = dim_zero_cat(self.target) + if not self.num_classes: + raise ValueError(f"`num_classes` bas to be positive number, but got {self.num_classes}") + return _average_precision_compute(preds, target, self.num_classes, self.pos_label, self.average) diff --git a/RE/paddlemetric/src/paddlemetrics/classification/binned_precision_recall.py b/RE/paddlemetric/src/paddlemetrics/classification/binned_precision_recall.py new file mode 100644 index 00000000..ffc86ae6 --- /dev/null +++ b/RE/paddlemetric/src/paddlemetrics/classification/binned_precision_recall.py @@ -0,0 +1,324 @@ +# Copyright The PyTorch Lightning team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from typing import Any, List, Optional, Tuple, Union + +import paddleext.torchapi as B +from paddleext.torchapi import Tensor + +from paddlemetrics.functional.classification.average_precision import _average_precision_compute_with_precision_recall +from paddlemetrics.metric import Metric +from paddlemetrics.utilities.data import METRIC_EPS, to_onehot + + +def _recall_at_precision( + precision: Tensor, + recall: Tensor, + thresholds: Tensor, + min_precision: float, +) -> Tuple[Tensor, Tensor]: + try: + max_recall, _, best_threshold = max( + (r, p, t) for p, r, t in zip(precision, recall, thresholds) if p >= min_precision + ) + + except ValueError: + max_recall = B.tensor(0.0, device=recall.device, dtype=recall.dtype) + best_threshold = B.tensor(0) + + if max_recall == 0.0: + best_threshold = B.tensor(1e6, device=thresholds.device, dtype=thresholds.dtype) + + return max_recall, best_threshold + + +class BinnedPrecisionRecallCurve(Metric): + """Computes precision-recall pairs for different thresholds. Works for both binary and multiclass problems. In + the case of multiclass, the values will be calculated based on a one-vs-the-rest approach. + + Computation is performed in constant-memory by computing precision and recall + for ``thresholds`` buckets/thresholds (evenly distributed between 0 and 1). + + Forward accepts + + - ``preds`` (float tensor): ``(N, ...)`` (binary) or ``(N, C, ...)`` (multiclass) tensor + with probabilities, where C is the number of classes. + + - ``target`` (long tensor): ``(N, ...)`` or ``(N, C, ...)`` with integer labels + + Args: + num_classes: integer with number of classes. For binary, set to 1. + thresholds: list or tensor with specific thresholds or a number of bins from linear sampling. + It is used for computation will lead to more detailed curve and accurate estimates, + but will be slower and consume more memory. + compute_on_step: + Forward only calls ``update()`` and return None if this is set to False. default: True + dist_sync_on_step: + Synchronize metric state across processes at each ``forward()`` + before returning the value at the step. default: False + process_group: + Specify the process group on which synchronization is called. default: None (which selects the entire world) + + Raises: + ValueError: + If ``thresholds`` is not a int, list or tensor + + Example (binary case): + >>> from paddlemetrics import BinnedPrecisionRecallCurve + >>> pred = B.tensor([0, 0.1, 0.8, 0.4]) + >>> target = B.tensor([0, 1, 1, 0]) + >>> pr_curve = BinnedPrecisionRecallCurve(num_classes=1, thresholds=5) + >>> precision, recall, thresholds = pr_curve(pred, target) + >>> precision + tensor([0.5000, 0.5000, 1.0000, 1.0000, 1.0000, 1.0000]) + >>> recall + tensor([1.0000, 0.5000, 0.5000, 0.5000, 0.0000, 0.0000]) + >>> thresholds + tensor([0.0000, 0.2500, 0.5000, 0.7500, 1.0000]) + + Example (multiclass case): + >>> pred = B.tensor([[0.75, 0.05, 0.05, 0.05, 0.05], + ... [0.05, 0.75, 0.05, 0.05, 0.05], + ... [0.05, 0.05, 0.75, 0.05, 0.05], + ... [0.05, 0.05, 0.05, 0.75, 0.05]]) + >>> target = B.tensor([0, 1, 3, 2]) + >>> pr_curve = BinnedPrecisionRecallCurve(num_classes=5, thresholds=3) + >>> precision, recall, thresholds = pr_curve(pred, target) + >>> precision # doctest: +NORMALIZE_WHITESPACE + [tensor([0.2500, 1.0000, 1.0000, 1.0000]), + tensor([0.2500, 1.0000, 1.0000, 1.0000]), + tensor([2.5000e-01, 1.0000e-06, 1.0000e+00, 1.0000e+00]), + tensor([2.5000e-01, 1.0000e-06, 1.0000e+00, 1.0000e+00]), + tensor([2.5000e-07, 1.0000e+00, 1.0000e+00, 1.0000e+00])] + >>> recall # doctest: +NORMALIZE_WHITESPACE + [tensor([1.0000, 1.0000, 0.0000, 0.0000]), + tensor([1.0000, 1.0000, 0.0000, 0.0000]), + tensor([1.0000, 0.0000, 0.0000, 0.0000]), + tensor([1.0000, 0.0000, 0.0000, 0.0000]), + tensor([0., 0., 0., 0.])] + >>> thresholds # doctest: +NORMALIZE_WHITESPACE + [tensor([0.0000, 0.5000, 1.0000]), + tensor([0.0000, 0.5000, 1.0000]), + tensor([0.0000, 0.5000, 1.0000]), + tensor([0.0000, 0.5000, 1.0000]), + tensor([0.0000, 0.5000, 1.0000])] + """ + + TPs: Tensor + FPs: Tensor + FNs: Tensor + + def __init__( + self, + num_classes: int, + thresholds: Union[int, Tensor, List[float], None] = None, + compute_on_step: bool = True, + dist_sync_on_step: bool = False, + process_group: Optional[Any] = None, + ) -> None: + super().__init__( + compute_on_step=compute_on_step, + dist_sync_on_step=dist_sync_on_step, + process_group=process_group, + ) + + self.num_classes = num_classes + if isinstance(thresholds, int): + self.num_thresholds = thresholds + thresholds = B.linspace(0, 1.0, thresholds) + self.register_buffer("thresholds", thresholds) + elif thresholds is not None: + if not isinstance(thresholds, (list, Tensor)): + raise ValueError("Expected argument `thresholds` to either be an integer, list of floats or a tensor") + thresholds = B.tensor(thresholds) if isinstance(thresholds, list) else thresholds + self.num_thresholds = thresholds.numel() + self.register_buffer("thresholds", thresholds) + + for name in ("TPs", "FPs", "FNs"): + self.add_state( + name=name, + default=B.zeros(num_classes, self.num_thresholds, dtype=B.float32), + dist_reduce_fx="sum", + ) + + def update(self, preds: Tensor, target: Tensor) -> None: # type: ignore + """ + Args + preds: (n_samples, n_classes) tensor + target: (n_samples, n_classes) tensor + """ + # binary case + if len(preds.shape) == len(target.shape) == 1: + preds = preds.reshape(-1, 1) + target = target.reshape(-1, 1) + + if len(preds.shape) == len(target.shape) + 1: + target = to_onehot(target, num_classes=self.num_classes) + + target = target == 1 + # Iterate one threshold at a time to conserve memory + for i in range(self.num_thresholds): + predictions = preds >= self.thresholds[i] + self.TPs[:, i] += (target & predictions).sum(dim=0) + self.FPs[:, i] += ((~target) & (predictions)).sum(dim=0) + self.FNs[:, i] += ((target) & (~predictions)).sum(dim=0) + + def compute(self) -> Union[Tuple[Tensor, Tensor, Tensor], Tuple[List[Tensor], List[Tensor], List[Tensor]]]: + """Returns float tensor of size n_classes.""" + precisions = (self.TPs + METRIC_EPS) / (self.TPs + self.FPs + METRIC_EPS) + recalls = self.TPs / (self.TPs + self.FNs + METRIC_EPS) + + # Need to guarantee that last precision=1 and recall=0, similar to precision_recall_curve + t_ones = B.ones(self.num_classes, 1, dtype=precisions.dtype, device=precisions.device) + precisions = B.cat([precisions, t_ones], dim=1) + t_zeros = B.zeros(self.num_classes, 1, dtype=recalls.dtype, device=recalls.device) + recalls = B.cat([recalls, t_zeros], dim=1) + if self.num_classes == 1: + return precisions[0, :], recalls[0, :], self.thresholds + return list(precisions), list(recalls), [self.thresholds for _ in range(self.num_classes)] + + +class BinnedAveragePrecision(BinnedPrecisionRecallCurve): + """Computes the average precision score, which summarises the precision recall curve into one number. Works for + both binary and multiclass problems. In the case of multiclass, the values will be calculated based on a one- + vs-the-rest approach. + + Computation is performed in constant-memory by computing precision and recall + for ``thresholds`` buckets/thresholds (evenly distributed between 0 and 1). + + Forward accepts + + - ``preds`` (float tensor): ``(N, ...)`` (binary) or ``(N, C, ...)`` (multiclass) tensor + with probabilities, where C is the number of classes. + + - ``target`` (long tensor): ``(N, ...)`` with integer labels + + Args: + num_classes: integer with number of classes. Not nessesary to provide + for binary problems. + thresholds: list or tensor with specific thresholds or a number of bins from linear sampling. + It is used for computation will lead to more detailed curve and accurate estimates, + but will be slower and consume more memory + compute_on_step: + Forward only calls ``update()`` and return None if this is set to False. default: True + process_group: + Specify the process group on which synchronization is called. default: None (which selects the entire world) + + Raises: + ValueError: + If ``thresholds`` is not a list or tensor + + Example (binary case): + >>> from paddlemetrics import BinnedAveragePrecision + >>> pred = B.tensor([0, 1, 2, 3]) + >>> target = B.tensor([0, 1, 1, 1]) + >>> average_precision = BinnedAveragePrecision(num_classes=1, thresholds=10) + >>> average_precision(pred, target) + tensor(1.0000) + + Example (multiclass case): + >>> pred = B.tensor([[0.75, 0.05, 0.05, 0.05, 0.05], + ... [0.05, 0.75, 0.05, 0.05, 0.05], + ... [0.05, 0.05, 0.75, 0.05, 0.05], + ... [0.05, 0.05, 0.05, 0.75, 0.05]]) + >>> target = B.tensor([0, 1, 3, 2]) + >>> average_precision = BinnedAveragePrecision(num_classes=5, thresholds=10) + >>> average_precision(pred, target) + [tensor(1.0000), tensor(1.0000), tensor(0.2500), tensor(0.2500), tensor(-0.)] + """ + + def compute(self) -> Union[List[Tensor], Tensor]: # type: ignore + precisions, recalls, _ = super().compute() + return _average_precision_compute_with_precision_recall(precisions, recalls, self.num_classes, average=None) + + +class BinnedRecallAtFixedPrecision(BinnedPrecisionRecallCurve): + """Computes the higest possible recall value given the minimum precision thresholds provided. + + Computation is performed in constant-memory by computing precision and recall + for ``thresholds`` buckets/thresholds (evenly distributed between 0 and 1). + + Forward accepts + + - ``preds`` (float tensor): ``(N, ...)`` (binary) or ``(N, C, ...)`` (multiclass) tensor + with probabilities, where C is the number of classes. + + - ``target`` (long tensor): ``(N, ...)`` with integer labels + + Args: + num_classes: integer with number of classes. Provide 1 for for binary problems. + min_precision: float value specifying minimum precision threshold. + thresholds: list or tensor with specific thresholds or a number of bins from linear sampling. + It is used for computation will lead to more detailed curve and accurate estimates, + but will be slower and consume more memory + compute_on_step: + Forward only calls ``update()`` and return None if this is set to False. default: True + process_group: + Specify the process group on which synchronization is called. default: None (which selects the entire world) + + Raises: + ValueError: + If ``thresholds`` is not a list or tensor + + Example (binary case): + >>> from paddlemetrics import BinnedRecallAtFixedPrecision + >>> pred = B.tensor([0, 0.2, 0.5, 0.8]) + >>> target = B.tensor([0, 1, 1, 0]) + >>> average_precision = BinnedRecallAtFixedPrecision(num_classes=1, thresholds=10, min_precision=0.5) + >>> average_precision(pred, target) + (tensor(1.0000), tensor(0.1111)) + + Example (multiclass case): + >>> pred = B.tensor([[0.75, 0.05, 0.05, 0.05, 0.05], + ... [0.05, 0.75, 0.05, 0.05, 0.05], + ... [0.05, 0.05, 0.75, 0.05, 0.05], + ... [0.05, 0.05, 0.05, 0.75, 0.05]]) + >>> target = B.tensor([0, 1, 3, 2]) + >>> average_precision = BinnedRecallAtFixedPrecision(num_classes=5, thresholds=10, min_precision=0.5) + >>> average_precision(pred, target) # doctest: +NORMALIZE_WHITESPACE + (tensor([1.0000, 1.0000, 0.0000, 0.0000, 0.0000]), + tensor([6.6667e-01, 6.6667e-01, 1.0000e+06, 1.0000e+06, 1.0000e+06])) + """ + + def __init__( + self, + num_classes: int, + min_precision: float, + thresholds: Union[int, Tensor, List[float], None] = None, + compute_on_step: bool = True, + dist_sync_on_step: bool = False, + process_group: Optional[Any] = None, + ) -> None: + super().__init__( + num_classes=num_classes, + thresholds=thresholds, + compute_on_step=compute_on_step, + dist_sync_on_step=dist_sync_on_step, + process_group=process_group, + ) + self.min_precision = min_precision + + def compute(self) -> Tuple[Tensor, Tensor]: # type: ignore + """Returns float tensor of size n_classes.""" + precisions, recalls, thresholds = super().compute() + + if self.num_classes == 1: + return _recall_at_precision(precisions, recalls, thresholds, self.min_precision) + + recalls_at_p = B.zeros(self.num_classes, device=recalls[0].device, dtype=recalls[0].dtype) + thresholds_at_p = B.zeros(self.num_classes, device=thresholds[0].device, dtype=thresholds[0].dtype) + for i in range(self.num_classes): + recalls_at_p[i], thresholds_at_p[i] = _recall_at_precision( + precisions[i], recalls[i], thresholds[i], self.min_precision + ) + return recalls_at_p, thresholds_at_p diff --git a/RE/paddlemetric/src/paddlemetrics/classification/calibration_error.py b/RE/paddlemetric/src/paddlemetrics/classification/calibration_error.py new file mode 100644 index 00000000..5fc9d10a --- /dev/null +++ b/RE/paddlemetric/src/paddlemetrics/classification/calibration_error.py @@ -0,0 +1,115 @@ +# Copyright The PyTorch Lightning team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from typing import Any, List, Optional + +import paddleext.torchapi as B +from paddleext.torchapi import Tensor + +from paddlemetrics.functional.classification.calibration_error import _ce_compute, _ce_update +from paddlemetrics.metric import Metric +from paddlemetrics.utilities.data import dim_zero_cat + + +class CalibrationError(Metric): + r""" + + `Computes the Top-label Calibration Error`_ + Three different norms are implemented, each corresponding to variations on the calibration error metric. + + L1 norm (Expected Calibration Error) + + .. math:: + \text{ECE} = \frac{1}{N}\sum_i^N \|(p_i - c_i)\| + + Infinity norm (Maximum Calibration Error) + + .. math:: + \text{RMSCE} = \max_{i} (p_i - c_i) + + L2 norm (Root Mean Square Calibration Error) + + .. math:: + \text{MCE} = \frac{1}{N}\sum_i^N (p_i - c_i)^2 + + Where :math:`p_i` is the top-1 prediction accuracy in bin i + and :math:`c_i` is the average confidence of predictions in bin i. + + .. note:: + L2-norm debiasing is not yet supported. + + Args: + n_bins: Number of bins to use when computing probabilites and accuracies. + norm: Norm used to compare empirical and expected probability bins. + Defaults to "l1", or Expected Calibration Error. + debias: Applies debiasing term, only implemented for l2 norm. Defaults to True. + compute_on_step: Forward only calls ``update()`` and return None if this is set to False. + dist_sync_on_step: Synchronize metric state across processes at each ``forward()`` + before returning the value at the step + process_group: Specify the process group on which synchronization is called. + default: None (which selects the entire world) + """ + DISTANCES = {"l1", "l2", "max"} + confidences: List[Tensor] + accuracies: List[Tensor] + + def __init__( + self, + n_bins: int = 15, + norm: str = "l1", + compute_on_step: bool = False, + dist_sync_on_step: bool = False, + process_group: Optional[Any] = None, + ): + + super().__init__( + compute_on_step=compute_on_step, + dist_sync_on_step=dist_sync_on_step, + process_group=process_group, + dist_sync_fn=None, + ) + + if norm not in self.DISTANCES: + raise ValueError(f"Norm {norm} is not supported. Please select from l1, l2, or max. ") + + if not isinstance(n_bins, int) or n_bins <= 0: + raise ValueError(f"Expected argument `n_bins` to be a int larger than 0 but got {n_bins}") + self.n_bins = n_bins + self.register_buffer("bin_boundaries", B.linspace(0, 1, n_bins + 1)) + self.norm = norm + + self.add_state("confidences", [], dist_reduce_fx="cat") + self.add_state("accuracies", [], dist_reduce_fx="cat") + + def update(self, preds: Tensor, target: Tensor) -> None: # type: ignore + """Computes top-level confidences and accuracies for the input probabilites and appends them to internal + state. + + Args: + preds (Tensor): Model output probabilities. + target (Tensor): Ground-truth target class labels. + """ + confidences, accuracies = _ce_update(preds, target) + + self.confidences.append(confidences) + self.accuracies.append(accuracies) + + def compute(self) -> Tensor: + """Computes calibration error across all confidences and accuracies. + + Returns: + Tensor: Calibration error across previously collected examples. + """ + confidences = dim_zero_cat(self.confidences) + accuracies = dim_zero_cat(self.accuracies) + return _ce_compute(confidences, accuracies, self.bin_boundaries, norm=self.norm) diff --git a/RE/paddlemetric/src/paddlemetrics/classification/cohen_kappa.py b/RE/paddlemetric/src/paddlemetrics/classification/cohen_kappa.py new file mode 100644 index 00000000..3a4817cf --- /dev/null +++ b/RE/paddlemetric/src/paddlemetrics/classification/cohen_kappa.py @@ -0,0 +1,119 @@ +# Copyright The PyTorch Lightning team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from typing import Any, Optional + +import paddleext.torchapi as B +from paddleext.torchapi import Tensor + +from paddlemetrics.functional.classification.cohen_kappa import _cohen_kappa_compute, _cohen_kappa_update +from paddlemetrics.metric import Metric + + +class CohenKappa(Metric): + r""" + Calculates `Cohen's kappa score`_ that measures + inter-annotator agreement. It is defined as + + .. math:: + \kappa = (p_o - p_e) / (1 - p_e) + + where :math:`p_o` is the empirical probability of agreement and :math:`p_e` is + the expected agreement when both annotators assign labels randomly. Note that + :math:`p_e` is estimated using a per-annotator empirical prior over the + class labels. + + Works with binary, multiclass, and multilabel data. Accepts probabilities from a model output or + integer class values in prediction. Works with multi-dimensional preds and target. + + Forward accepts + - ``preds`` (float or long tensor): ``(N, ...)`` or ``(N, C, ...)`` where C is the number of classes + + - ``target`` (long tensor): ``(N, ...)`` + + If preds and target are the same shape and preds is a float tensor, we use the ``self.threshold`` argument + to convert into integer labels. This is the case for binary and multi-label probabilities or logits. + + If preds has an extra dimension as in the case of multi-class scores we perform an argmax on ``dim=1``. + + Args: + num_classes: Number of classes in the dataset. + + weights: Weighting type to calculate the score. Choose from + - ``None`` or ``'none'``: no weighting + - ``'linear'``: linear weighting + - ``'quadratic'``: quadratic weighting + + threshold: + Threshold for transforming probability or logit predictions to binary (0,1) predictions, in the case + of binary or multi-label inputs. Default value of 0.5 corresponds to input being probabilities. + + compute_on_step: + Forward only calls ``update()`` and return None if this is set to False. default: True + + dist_sync_on_step: + Synchronize metric state across processes at each ``forward()`` + before returning the value at the step. default: False + + process_group: + Specify the process group on which synchronization is called. default: None (which selects the entire world) + + Example: + >>> from paddlemetrics import CohenKappa + >>> target = B.tensor([1, 1, 0, 0]) + >>> preds = B.tensor([0, 1, 0, 0]) + >>> cohenkappa = CohenKappa(num_classes=2) + >>> cohenkappa(preds, target) + tensor(0.5000) + + """ + is_differentiable = False + confmat: Tensor + + def __init__( + self, + num_classes: int, + weights: Optional[str] = None, + threshold: float = 0.5, + compute_on_step: bool = True, + dist_sync_on_step: bool = False, + process_group: Optional[Any] = None, + ) -> None: + super().__init__( + compute_on_step=compute_on_step, + dist_sync_on_step=dist_sync_on_step, + process_group=process_group, + ) + self.num_classes = num_classes + self.weights = weights + self.threshold = threshold + + allowed_weights = ("linear", "quadratic", "none", None) + if self.weights not in allowed_weights: + raise ValueError(f"Argument weights needs to one of the following: {allowed_weights}") + + self.add_state("confmat", default=B.zeros(num_classes, num_classes), dist_reduce_fx="sum") + + def update(self, preds: Tensor, target: Tensor) -> None: # type: ignore + """Update state with predictions and targets. + + Args: + preds: Predictions from model + target: Ground truth values + """ + confmat = _cohen_kappa_update(preds, target, self.num_classes, self.threshold) + self.confmat += confmat + + def compute(self) -> Tensor: + """Computes cohen kappa score.""" + return _cohen_kappa_compute(self.confmat, self.weights) diff --git a/RE/paddlemetric/src/paddlemetrics/classification/confusion_matrix.py b/RE/paddlemetric/src/paddlemetrics/classification/confusion_matrix.py new file mode 100644 index 00000000..a3485570 --- /dev/null +++ b/RE/paddlemetric/src/paddlemetrics/classification/confusion_matrix.py @@ -0,0 +1,141 @@ +# Copyright The PyTorch Lightning team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from typing import Any, Optional + +import paddleext.torchapi as B +from paddleext.torchapi import Tensor + +from paddlemetrics.functional.classification.confusion_matrix import _confusion_matrix_compute, _confusion_matrix_update +from paddlemetrics.metric import Metric + + +class ConfusionMatrix(Metric): + r""" + Computes the `confusion matrix`_. Works with binary, + multiclass, and multilabel data. Accepts probabilities or logits from a model output or integer class + values in prediction. Works with multi-dimensional preds and target, but it should be noted that + additional dimensions will be flattened. + + Forward accepts + + - ``preds`` (float or long tensor): ``(N, ...)`` or ``(N, C, ...)`` where C is the number of classes + - ``target`` (long tensor): ``(N, ...)`` + + If preds and target are the same shape and preds is a float tensor, we use the ``self.threshold`` argument + to convert into integer labels. This is the case for binary and multi-label probabilities or logits. + + If preds has an extra dimension as in the case of multi-class scores we perform an argmax on ``dim=1``. + + If working with multilabel data, setting the `is_multilabel` argument to `True` will make sure that a + `confusion matrix gets calculated per label`_. + + Args: + num_classes: Number of classes in the dataset. + normalize: Normalization mode for confusion matrix. Choose from + + - ``None`` or ``'none'``: no normalization (default) + - ``'true'``: normalization over the targets (most commonly used) + - ``'pred'``: normalization over the predictions + - ``'all'``: normalization over the whole matrix + + threshold: + Threshold for transforming probability or logit predictions to binary (0,1) predictions, in the case + of binary or multi-label inputs. Default value of 0.5 corresponds to input being probabilities. + + multilabel: + determines if data is multilabel or not. + compute_on_step: + Forward only calls ``update()`` and return None if this is set to False. default: True + dist_sync_on_step: + Synchronize metric state across processes at each ``forward()`` + before returning the value at the step. default: False + process_group: + Specify the process group on which synchronization is called. default: None (which selects the entire world) + + Example (binary data): + >>> from paddlemetrics import ConfusionMatrix + >>> target = B.tensor([1, 1, 0, 0]) + >>> preds = B.tensor([0, 1, 0, 0]) + >>> confmat = ConfusionMatrix(num_classes=2) + >>> confmat(preds, target) + tensor([[2., 0.], + [1., 1.]]) + + Example (multiclass data): + >>> target = B.tensor([2, 1, 0, 0]) + >>> preds = B.tensor([2, 1, 0, 1]) + >>> confmat = ConfusionMatrix(num_classes=3) + >>> confmat(preds, target) + tensor([[1., 1., 0.], + [0., 1., 0.], + [0., 0., 1.]]) + + Example (multilabel data): + >>> target = B.tensor([[0, 1, 0], [1, 0, 1]]) + >>> preds = B.tensor([[0, 0, 1], [1, 0, 1]]) + >>> confmat = ConfusionMatrix(num_classes=3, multilabel=True) + >>> confmat(preds, target) # doctest: +NORMALIZE_WHITESPACE + tensor([[[1., 0.], [0., 1.]], + [[1., 0.], [1., 0.]], + [[0., 1.], [0., 1.]]]) + + """ + is_differentiable = False + confmat: Tensor + + def __init__( + self, + num_classes: int, + normalize: Optional[str] = None, + threshold: float = 0.5, + multilabel: bool = False, + compute_on_step: bool = True, + dist_sync_on_step: bool = False, + process_group: Optional[Any] = None, + ) -> None: + super().__init__( + compute_on_step=compute_on_step, + dist_sync_on_step=dist_sync_on_step, + process_group=process_group, + ) + self.num_classes = num_classes + self.normalize = normalize + self.threshold = threshold + self.multilabel = multilabel + + allowed_normalize = ("true", "pred", "all", "none", None) + if self.normalize not in allowed_normalize: + raise ValueError(f"Argument average needs to one of the following: {allowed_normalize}") + + default = B.zeros(num_classes, 2, 2) if multilabel else B.zeros(num_classes, num_classes) + self.add_state("confmat", default=default, dist_reduce_fx="sum") + + def update(self, preds: Tensor, target: Tensor) -> None: # type: ignore + """Update state with predictions and targets. + + Args: + preds: Predictions from model + target: Ground truth values + """ + confmat = _confusion_matrix_update(preds, target, self.num_classes, self.threshold, self.multilabel) + self.confmat += confmat + + def compute(self) -> Tensor: + """Computes confusion matrix. + + Returns: + If `multilabel=False` this will be a `[n_classes, n_classes]` tensor and if `multilabel=True` + this will be a `[n_classes, 2, 2]` tensor + """ + return _confusion_matrix_compute(self.confmat, self.normalize) diff --git a/RE/paddlemetric/src/paddlemetrics/classification/f_beta.py b/RE/paddlemetric/src/paddlemetrics/classification/f_beta.py new file mode 100644 index 00000000..4b24dc0e --- /dev/null +++ b/RE/paddlemetric/src/paddlemetrics/classification/f_beta.py @@ -0,0 +1,301 @@ +# Copyright The PyTorch Lightning team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from typing import Any, Callable, Optional + +import paddleext.torchapi as B +from paddleext.torchapi import Tensor + +from paddlemetrics.classification.stat_scores import StatScores +from paddlemetrics.functional.classification.f_beta import _fbeta_compute +from paddlemetrics.utilities.enums import AverageMethod + + +class FBeta(StatScores): + r""" + Computes `F-score`_, specifically: + + .. math:: + F_\beta = (1 + \beta^2) * \frac{\text{precision} * \text{recall}} + {(\beta^2 * \text{precision}) + \text{recall}} + + Where :math:`\beta` is some positive real factor. Works with binary, multiclass, and multilabel data. + Accepts logit scores or probabilities from a model output or integer class values in prediction. + Works with multi-dimensional preds and target. + + Forward accepts + + - ``preds`` (float or long tensor): ``(N, ...)`` or ``(N, C, ...)`` where C is the number of classes + - ``target`` (long tensor): ``(N, ...)`` + + If preds and target are the same shape and preds is a float tensor, we use the ``self.threshold`` argument + to convert into integer labels. This is the case for binary and multi-label logits and probabilities. + + If preds has an extra dimension as in the case of multi-class scores we perform an argmax on ``dim=1``. + + Args: + num_classes: + Number of classes. Necessary for ``'macro'``, ``'weighted'`` and ``None`` average methods. + beta: + Beta coefficient in the F measure. + threshold: + Threshold for transforming probability or logit predictions to binary (0,1) predictions, in the case + of binary or multi-label inputs. Default value of 0.5 corresponds to input being probabilities. + average: + Defines the reduction that is applied. Should be one of the following: + + - ``'micro'`` [default]: Calculate the metric globally, across all samples and classes. + - ``'macro'``: Calculate the metric for each class separately, and average the + metrics across classes (with equal weights for each class). + - ``'weighted'``: Calculate the metric for each class separately, and average the + metrics across classes, weighting each class by its support (``tp + fn``). + - ``'none'`` or ``None``: Calculate the metric for each class separately, and return + the metric for every class. + - ``'samples'``: Calculate the metric for each sample, and average the metrics + across samples (with equal weights for each sample). + + .. note:: What is considered a sample in the multi-dimensional multi-class case + depends on the value of ``mdmc_average``. + + .. note:: If ``'none'`` and a given class doesn't occur in the `preds` or `target`, + the value for the class will be ``nan``. + + mdmc_average: + Defines how averaging is done for multi-dimensional multi-class inputs (on top of the + ``average`` parameter). Should be one of the following: + + - ``None`` [default]: Should be left unchanged if your data is not multi-dimensional + multi-class. + + - ``'samplewise'``: In this case, the statistics are computed separately for each + sample on the ``N`` axis, and then averaged over samples. + The computation for each sample is done by treating the flattened extra axes ``...`` + (see :ref:`references/modules:input types`) as the ``N`` dimension within the sample, + and computing the metric for the sample based on that. + + - ``'global'``: In this case the ``N`` and ``...`` dimensions of the inputs + (see :ref:`references/modules:input types`) + are flattened into a new ``N_X`` sample axis, i.e. the inputs are treated as if they + were ``(N_X, C)``. From here on the ``average`` parameter applies as usual. + + ignore_index: + Integer specifying a target class to ignore. If given, this class index does not contribute + to the returned score, regardless of reduction method. If an index is ignored, and ``average=None`` + or ``'none'``, the score for the ignored class will be returned as ``nan``. + + top_k: + Number of highest probability or logit score predictions considered to find the correct label, + relevant only for (multi-dimensional) multi-class inputs. The + default value (``None``) will be interpreted as 1 for these inputs. + + Should be left at default (``None``) for all other types of inputs. + + multiclass: + Used only in certain special cases, where you want to treat inputs as a different type + than what they appear to be. See the parameter's + :ref:`documentation section ` + for a more detailed explanation and examples. + + compute_on_step: + Forward only calls ``update()`` and return ``None`` if this is set to ``False``. + dist_sync_on_step: + Synchronize metric state across processes at each ``forward()`` + before returning the value at the step + process_group: + Specify the process group on which synchronization is called. + default: ``None`` (which selects the entire world) + dist_sync_fn: + Callback that performs the allgather operation on the metric state. When ``None``, DDP + will be used to perform the allgather. + + Raises: + ValueError: + If ``average`` is none of ``"micro"``, ``"macro"``, ``"weighted"``, ``"none"``, ``None``. + + Example: + >>> from paddlemetrics import FBeta + >>> target = B.tensor([0, 1, 2, 0, 1, 2]) + >>> preds = B.tensor([0, 2, 1, 0, 0, 1]) + >>> f_beta = FBeta(num_classes=3, beta=0.5) + >>> f_beta(preds, target) + tensor(0.3333) + + """ + + def __init__( + self, + num_classes: Optional[int] = None, + beta: float = 1.0, + threshold: float = 0.5, + average: str = "micro", + mdmc_average: Optional[str] = None, + ignore_index: Optional[int] = None, + top_k: Optional[int] = None, + multiclass: Optional[bool] = None, + compute_on_step: bool = True, + dist_sync_on_step: bool = False, + process_group: Optional[Any] = None, + dist_sync_fn: Callable = None, + ) -> None: + self.beta = beta + allowed_average = list(AverageMethod) + if average not in allowed_average: + raise ValueError(f"The `average` has to be one of {allowed_average}, got {average}.") + + super().__init__( + reduce="macro" if average in [AverageMethod.WEIGHTED, AverageMethod.NONE] else average, + mdmc_reduce=mdmc_average, + threshold=threshold, + top_k=top_k, + num_classes=num_classes, + multiclass=multiclass, + ignore_index=ignore_index, + compute_on_step=compute_on_step, + dist_sync_on_step=dist_sync_on_step, + process_group=process_group, + dist_sync_fn=dist_sync_fn, + ) + + self.average = average + + def compute(self) -> Tensor: + """Computes fbeta over state.""" + tp, fp, tn, fn = self._get_final_stats() + return _fbeta_compute(tp, fp, tn, fn, self.beta, self.ignore_index, self.average, self.mdmc_reduce) + + +class F1(FBeta): + """Computes F1 metric. F1 metrics correspond to a harmonic mean of the precision and recall scores. + + Works with binary, multiclass, and multilabel data. Accepts logits or probabilities from a model + output or integer class values in prediction. Works with multi-dimensional preds and target. + + Forward accepts + + - ``preds`` (float or long tensor): ``(N, ...)`` or ``(N, C, ...)`` where C is the number of classes + - ``target`` (long tensor): ``(N, ...)`` + + If preds and target are the same shape and preds is a float tensor, we use the ``self.threshold`` argument. + This is the case for binary and multi-label logits. + + If preds has an extra dimension as in the case of multi-class scores we perform an argmax on ``dim=1``. + + Args: + num_classes: + Number of classes. Necessary for ``'macro'``, ``'weighted'`` and ``None`` average methods. + threshold: + Threshold for transforming probability or logit predictions to binary (0,1) predictions, in the case + of binary or multi-label inputs. Default value of 0.5 corresponds to input being probabilities. + average: + Defines the reduction that is applied. Should be one of the following: + + - ``'micro'`` [default]: Calculate the metric globally, across all samples and classes. + - ``'macro'``: Calculate the metric for each class separately, and average the + metrics across classes (with equal weights for each class). + - ``'weighted'``: Calculate the metric for each class separately, and average the + metrics across classes, weighting each class by its support (``tp + fn``). + - ``'none'`` or ``None``: Calculate the metric for each class separately, and return + the metric for every class. + - ``'samples'``: Calculate the metric for each sample, and average the metrics + across samples (with equal weights for each sample). + + .. note:: What is considered a sample in the multi-dimensional multi-class case + depends on the value of ``mdmc_average``. + + mdmc_average: + Defines how averaging is done for multi-dimensional multi-class inputs (on top of the + ``average`` parameter). Should be one of the following: + + - ``None`` [default]: Should be left unchanged if your data is not multi-dimensional + multi-class. + + - ``'samplewise'``: In this case, the statistics are computed separately for each + sample on the ``N`` axis, and then averaged over samples. + The computation for each sample is done by treating the flattened extra axes ``...`` + (see :ref:`references/modules:input types`) as the ``N`` dimension within the sample, + and computing the metric for the sample based on that. + + - ``'global'``: In this case the ``N`` and ``...`` dimensions of the inputs + (see :ref:`references/modules:input types`) + are flattened into a new ``N_X`` sample axis, i.e. the inputs are treated as if they + were ``(N_X, C)``. From here on the ``average`` parameter applies as usual. + + ignore_index: + Integer specifying a target class to ignore. If given, this class index does not contribute + to the returned score, regardless of reduction method. If an index is ignored, and ``average=None`` + or ``'none'``, the score for the ignored class will be returned as ``nan``. + + top_k: + Number of highest probability or logit score predictions considered to find the correct label, + relevant only for (multi-dimensional) multi-class inputs. The + default value (``None``) will be interpreted as 1 for these inputs. + + Should be left at default (``None``) for all other types of inputs. + multiclass: + Used only in certain special cases, where you want to treat inputs as a different type + than what they appear to be. See the parameter's + :ref:`documentation section ` + for a more detailed explanation and examples. + + compute_on_step: + Forward only calls ``update()`` and return ``None`` if this is set to ``False``. + dist_sync_on_step: + Synchronize metric state across processes at each ``forward()`` + before returning the value at the step + process_group: + Specify the process group on which synchronization is called. + default: ``None`` (which selects the entire world) + dist_sync_fn: + Callback that performs the allgather operation on the metric state. When ``None``, DDP + will be used to perform the allgather. + + + Example: + >>> from paddlemetrics import F1 + >>> target = B.tensor([0, 1, 2, 0, 1, 2]) + >>> preds = B.tensor([0, 2, 1, 0, 0, 1]) + >>> f1 = F1(num_classes=3) + >>> f1(preds, target) + tensor(0.3333) + """ + + is_differentiable = False + + def __init__( + self, + num_classes: Optional[int] = None, + threshold: float = 0.5, + average: str = "micro", + mdmc_average: Optional[str] = None, + ignore_index: Optional[int] = None, + top_k: Optional[int] = None, + multiclass: Optional[bool] = None, + compute_on_step: bool = True, + dist_sync_on_step: bool = False, + process_group: Optional[Any] = None, + dist_sync_fn: Callable = None, + ) -> None: + super().__init__( + num_classes=num_classes, + beta=1.0, + threshold=threshold, + average=average, + mdmc_average=mdmc_average, + ignore_index=ignore_index, + top_k=top_k, + multiclass=multiclass, + compute_on_step=compute_on_step, + dist_sync_on_step=dist_sync_on_step, + process_group=process_group, + dist_sync_fn=dist_sync_fn, + ) diff --git a/RE/paddlemetric/src/paddlemetrics/classification/hamming_distance.py b/RE/paddlemetric/src/paddlemetrics/classification/hamming_distance.py new file mode 100644 index 00000000..855d7f7e --- /dev/null +++ b/RE/paddlemetric/src/paddlemetrics/classification/hamming_distance.py @@ -0,0 +1,110 @@ +# Copyright The PyTorch Lightning team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from typing import Any, Callable, Optional + +import paddleext.torchapi as B +from paddleext.torchapi import Tensor, tensor + +from paddlemetrics.functional.classification.hamming_distance import _hamming_distance_compute, _hamming_distance_update +from paddlemetrics.metric import Metric + + +class HammingDistance(Metric): + r""" + Computes the average `Hamming distance`_ (also + known as Hamming loss) between targets and predictions: + + .. math:: + \text{Hamming distance} = \frac{1}{N \cdot L}\sum_i^N \sum_l^L 1(y_{il} \neq \hat{y_{il}}) + + Where :math:`y` is a tensor of target values, :math:`\hat{y}` is a tensor of predictions, + and :math:`\bullet_{il}` refers to the :math:`l`-th label of the :math:`i`-th sample of that + tensor. + + This is the same as ``1-accuracy`` for binary data, while for all other types of inputs it + treats each possible label separately - meaning that, for example, multi-class data is + treated as if it were multi-label. + + Accepts all input types listed in :ref:`references/modules:input types`. + + Args: + threshold: + Threshold for transforming probability or logit predictions to binary (0,1) predictions, in the case + of binary or multi-label inputs. Default value of 0.5 corresponds to input being probabilities. + compute_on_step: + Forward only calls ``update()`` and return ``None`` if this is set to ``False``. + dist_sync_on_step: + Synchronize metric state across processes at each ``forward()`` + before returning the value at the step. + process_group: + Specify the process group on which synchronization is called. + default: ``None`` (which selects the entire world) + dist_sync_fn: + Callback that performs the allgather operation on the metric state. When ``None``, DDP + will be used to perform the all gather. + + Raises: + ValueError: + If ``threshold`` is not between ``0`` and ``1``. + + Example: + >>> from paddlemetrics import HammingDistance + >>> target = B.tensor([[0, 1], [1, 1]]) + >>> preds = B.tensor([[0, 1], [0, 1]]) + >>> hamming_distance = HammingDistance() + >>> hamming_distance(preds, target) + tensor(0.2500) + + """ + is_differentiable = False + correct: Tensor + total: Tensor + + def __init__( + self, + threshold: float = 0.5, + compute_on_step: bool = True, + dist_sync_on_step: bool = False, + process_group: Optional[Any] = None, + dist_sync_fn: Callable = None, + ) -> None: + super().__init__( + compute_on_step=compute_on_step, + dist_sync_on_step=dist_sync_on_step, + process_group=process_group, + dist_sync_fn=dist_sync_fn, + ) + + self.add_state("correct", default=tensor(0), dist_reduce_fx="sum") + self.add_state("total", default=tensor(0), dist_reduce_fx="sum") + + self.threshold = threshold + + def update(self, preds: Tensor, target: Tensor) -> None: # type: ignore + """Update state with predictions and targets. See + :ref:`references/modules:input types` for more information on input + types. + + Args: + preds: Predictions from model (probabilities, logits or labels) + target: Ground truth labels + """ + correct, total = _hamming_distance_update(preds, target, self.threshold) + + self.correct += correct + self.total += total + + def compute(self) -> Tensor: + """Computes hamming distance based on inputs passed in to ``update`` previously.""" + return _hamming_distance_compute(self.correct, self.total) diff --git a/RE/paddlemetric/src/paddlemetrics/classification/hinge.py b/RE/paddlemetric/src/paddlemetrics/classification/hinge.py new file mode 100644 index 00000000..099864a3 --- /dev/null +++ b/RE/paddlemetric/src/paddlemetrics/classification/hinge.py @@ -0,0 +1,127 @@ +# Copyright The PyTorch Lightning team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from typing import Any, Callable, Optional, Union + +from paddleext.torchapi import Tensor, tensor + +from paddlemetrics.functional.classification.hinge import MulticlassMode, _hinge_compute, _hinge_update +from paddlemetrics.metric import Metric + + +class Hinge(Metric): + r""" + Computes the mean `Hinge loss`_, typically used for Support Vector + Machines (SVMs). In the binary case it is defined as: + + .. math:: + \text{Hinge loss} = \max(0, 1 - y \times \hat{y}) + + Where :math:`y \in {-1, 1}` is the target, and :math:`\hat{y} \in \mathbb{R}` is the prediction. + + In the multi-class case, when ``multiclass_mode=None`` (default), ``multiclass_mode=MulticlassMode.CRAMMER_SINGER`` + or ``multiclass_mode="crammer-singer"``, this metric will compute the multi-class hinge loss defined by Crammer and + Singer as: + + .. math:: + \text{Hinge loss} = \max\left(0, 1 - \hat{y}_y + \max_{i \ne y} (\hat{y}_i)\right) + + Where :math:`y \in {0, ..., \mathrm{C}}` is the target class (where :math:`\mathrm{C}` is the number of classes), + and :math:`\hat{y} \in \mathbb{R}^\mathrm{C}` is the predicted output per class. + + In the multi-class case when ``multiclass_mode=MulticlassMode.ONE_VS_ALL`` or ``multiclass_mode='one-vs-all'``, this + metric will use a one-vs-all approach to compute the hinge loss, giving a vector of C outputs where each entry pits + that class against all remaining classes. + + This metric can optionally output the mean of the squared hinge loss by setting ``squared=True`` + + Only accepts inputs with preds shape of (N) (binary) or (N, C) (multi-class) and target shape of (N). + + Args: + squared: + If True, this will compute the squared hinge loss. Otherwise, computes the regular hinge loss (default). + multiclass_mode: + Which approach to use for multi-class inputs (has no effect in the binary case). ``None`` (default), + ``MulticlassMode.CRAMMER_SINGER`` or ``"crammer-singer"``, uses the Crammer Singer multi-class hinge loss. + ``MulticlassMode.ONE_VS_ALL`` or ``"one-vs-all"`` computes the hinge loss in a one-vs-all fashion. + + Raises: + ValueError: + If ``multiclass_mode`` is not: None, ``MulticlassMode.CRAMMER_SINGER``, ``"crammer-singer"``, + ``MulticlassMode.ONE_VS_ALL`` or ``"one-vs-all"``. + + Example (binary case): + >>> import torchapi as B + >>> from paddlemetrics import Hinge + >>> target = B.tensor([0, 1, 1]) + >>> preds = B.tensor([-2.2, 2.4, 0.1]) + >>> hinge = Hinge() + >>> hinge(preds, target) + tensor(0.3000) + + Example (default / multiclass case): + >>> target = B.tensor([0, 1, 2]) + >>> preds = B.tensor([[-1.0, 0.9, 0.2], [0.5, -1.1, 0.8], [2.2, -0.5, 0.3]]) + >>> hinge = Hinge() + >>> hinge(preds, target) + tensor(2.9000) + + Example (multiclass example, one vs all mode): + >>> target = B.tensor([0, 1, 2]) + >>> preds = B.tensor([[-1.0, 0.9, 0.2], [0.5, -1.1, 0.8], [2.2, -0.5, 0.3]]) + >>> hinge = Hinge(multiclass_mode="one-vs-all") + >>> hinge(preds, target) + tensor([2.2333, 1.5000, 1.2333]) + + """ + is_differentiable = True + measure: Tensor + total: Tensor + + def __init__( + self, + squared: bool = False, + multiclass_mode: Optional[Union[str, MulticlassMode]] = None, + compute_on_step: bool = True, + dist_sync_on_step: bool = False, + process_group: Optional[Any] = None, + dist_sync_fn: Callable = None, + ) -> None: + super().__init__( + compute_on_step=compute_on_step, + dist_sync_on_step=dist_sync_on_step, + process_group=process_group, + dist_sync_fn=dist_sync_fn, + ) + + self.add_state("measure", default=tensor(0.0), dist_reduce_fx="sum") + self.add_state("total", default=tensor(0), dist_reduce_fx="sum") + + if multiclass_mode not in (None, MulticlassMode.CRAMMER_SINGER, MulticlassMode.ONE_VS_ALL): + raise ValueError( + "The `multiclass_mode` should be either None / 'crammer-singer' / MulticlassMode.CRAMMER_SINGER" + "(default) or 'one-vs-all' / MulticlassMode.ONE_VS_ALL," + f" got {multiclass_mode}." + ) + + self.squared = squared + self.multiclass_mode = multiclass_mode + + def update(self, preds: Tensor, target: Tensor) -> None: # type: ignore + measure, total = _hinge_update(preds, target, squared=self.squared, multiclass_mode=self.multiclass_mode) + + self.measure = measure + self.measure + self.total = total + self.total + + def compute(self) -> Tensor: + return _hinge_compute(self.measure, self.total) diff --git a/RE/paddlemetric/src/paddlemetrics/classification/iou.py b/RE/paddlemetric/src/paddlemetrics/classification/iou.py new file mode 100644 index 00000000..9e89946a --- /dev/null +++ b/RE/paddlemetric/src/paddlemetrics/classification/iou.py @@ -0,0 +1,107 @@ +# Copyright The PyTorch Lightning team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from typing import Any, Optional + +import paddleext.torchapi as B +from paddleext.torchapi import Tensor + +from paddlemetrics.classification.confusion_matrix import ConfusionMatrix +from paddlemetrics.functional.classification.iou import _iou_from_confmat + + +class IoU(ConfusionMatrix): + r""" + Computes Intersection over union, or `Jaccard index`_: + + .. math:: J(A,B) = \frac{|A\cap B|}{|A\cup B|} + + Where: :math:`A` and :math:`B` are both tensors of the same size, containing integer class values. + They may be subject to conversion from input data (see description below). Note that it is different from box IoU. + + Works with binary, multiclass and multi-label data. + Accepts probabilities from a model output or integer class values in prediction. + Works with multi-dimensional preds and target. + + Forward accepts + + - ``preds`` (float or long tensor): ``(N, ...)`` or ``(N, C, ...)`` where C is the number of classes + - ``target`` (long tensor): ``(N, ...)`` + + If preds and target are the same shape and preds is a float tensor, we use the ``self.threshold`` argument + to convert into integer labels. This is the case for binary and multi-label probabilities. + + If preds has an extra dimension as in the case of multi-class scores we perform an argmax on ``dim=1``. + + Args: + num_classes: Number of classes in the dataset. + ignore_index: optional int specifying a target class to ignore. If given, this class index does not contribute + to the returned score, regardless of reduction method. Has no effect if given an int that is not in the + range [0, num_classes-1]. By default, no index is ignored, and all classes are used. + absent_score: score to use for an individual class, if no instances of the class index were present in + `pred` AND no instances of the class index were present in `target`. For example, if we have 3 classes, + [0, 0] for `pred`, and [0, 2] for `target`, then class 1 would be assigned the `absent_score`. + threshold: + Threshold value for binary or multi-label probabilities. + reduction: a method to reduce metric score over labels. + + - ``'elementwise_mean'``: takes the mean (default) + - ``'sum'``: takes the sum + - ``'none'``: no reduction will be applied + + compute_on_step: + Forward only calls ``update()`` and return None if this is set to False. + dist_sync_on_step: + Synchronize metric state across processes at each ``forward()`` + before returning the value at the step. + process_group: + Specify the process group on which synchronization is called. default: None (which selects the entire world) + + Example: + >>> from paddlemetrics import IoU + >>> target = B.randint(0, 2, (10, 25, 25)) + >>> pred = B.tensor(target) + >>> pred[2:5, 7:13, 9:15] = 1 - pred[2:5, 7:13, 9:15] + >>> iou = IoU(num_classes=2) + >>> iou(pred, target) + tensor(0.9660) + + """ + is_differentiable = False + + def __init__( + self, + num_classes: int, + ignore_index: Optional[int] = None, + absent_score: float = 0.0, + threshold: float = 0.5, + reduction: str = "elementwise_mean", + compute_on_step: bool = True, + dist_sync_on_step: bool = False, + process_group: Optional[Any] = None, + ) -> None: + super().__init__( + num_classes=num_classes, + normalize=None, + threshold=threshold, + compute_on_step=compute_on_step, + dist_sync_on_step=dist_sync_on_step, + process_group=process_group, + ) + self.reduction = reduction + self.ignore_index = ignore_index + self.absent_score = absent_score + + def compute(self) -> Tensor: + """Computes intersection over union (IoU)""" + return _iou_from_confmat(self.confmat, self.num_classes, self.ignore_index, self.absent_score, self.reduction) diff --git a/RE/paddlemetric/src/paddlemetrics/classification/kl_divergence.py b/RE/paddlemetric/src/paddlemetrics/classification/kl_divergence.py new file mode 100644 index 00000000..cce887f0 --- /dev/null +++ b/RE/paddlemetric/src/paddlemetrics/classification/kl_divergence.py @@ -0,0 +1,109 @@ +# Copyright The PyTorch Lightning team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from typing import Any, Callable, Optional + +import paddleext.torchapi as B +from paddleext.torchapi import Tensor + +from paddlemetrics.functional.classification.kl_divergence import _kld_compute, _kld_update +from paddlemetrics.metric import Metric +from paddlemetrics.utilities.data import dim_zero_cat + + +class KLDivergence(Metric): + r"""Computes the `KL divergence`_: + + .. math:: + D_{KL}(P||Q) = \sum_{x\in\mathcal{X}} P(x) \log\frac{P(x)}{Q{x}} + + Where :math:`P` and :math:`Q` are probability distributions where :math:`P` usually represents a distribution + over data and :math:`Q` is often a prior or approximation of :math:`P`. It should be noted that the KL divergence + is a non-symetrical metric i.e. :math:`D_{KL}(P||Q) \neq D_{KL}(Q||P)`. + + Args: + p: data distribution with shape ``[N, d]`` + q: prior or approximate distribution with shape ``[N, d]`` + log_prob: bool indicating if input is log-probabilities or probabilities. If given as probabilities, + will normalize to make sure the distributes sum to 1 + reduction: + Determines how to reduce over the ``N``/batch dimension: + + - ``'mean'`` [default]: Averages score across samples + - ``'sum'``: Sum score across samples + - ``'none'`` or ``None``: Returns score per sample + + Raises: + TypeError: + If ``log_prob`` is not an ``bool`` + ValueError: + If ``reduction`` is not one of ``'mean'``, ``'sum'``, ``'none'`` or ``None`` + + .. note:: + Half precision is only support on GPU for this metric + + Example: + >>> import torchapi as B + >>> from paddlemetrics.functional import kl_divergence + >>> p = B.tensor([[0.36, 0.48, 0.16]]) + >>> q = B.tensor([[1/3, 1/3, 1/3]]) + >>> kl_divergence(p, q) + tensor(0.0853) + + """ + is_differentiable = True + # TODO: canot be used because if scripting + # measures: Union[List[Tensor], Tensor] + total: Tensor + + def __init__( + self, + log_prob: bool = False, + reduction: Optional[str] = "mean", + compute_on_step: bool = True, + dist_sync_on_step: bool = False, + process_group: Optional[Any] = None, + dist_sync_fn: Callable = None, + ) -> None: + super().__init__( + compute_on_step=compute_on_step, + dist_sync_on_step=dist_sync_on_step, + process_group=process_group, + dist_sync_fn=dist_sync_fn, + ) + if not isinstance(log_prob, bool): + raise TypeError(f"Expected argument `log_prob` to be bool but got {log_prob}") + self.log_prob = log_prob + + allowed_reduction = ["mean", "sum", "none", None] + if reduction not in allowed_reduction: + raise ValueError(f"Expected argument `reduction` to be one of {allowed_reduction} but got {reduction}") + self.reduction = reduction + + if self.reduction in ["mean", "sum"]: + self.add_state("measures", B.zeros(1), dist_reduce_fx="sum") + else: + self.add_state("measures", [], dist_reduce_fx="cat") + self.add_state("total", B.zeros(1), dist_reduce_fx="sum") + + def update(self, p: Tensor, q: Tensor) -> None: # type: ignore + measures, total = _kld_update(p, q, self.log_prob) + if self.reduction is None or self.reduction == "none": + self.measures.append(measures) + else: + self.measures += measures.sum() + self.total += total + + def compute(self) -> Tensor: + measures = dim_zero_cat(self.measures) if self.reduction is None or self.reduction == "none" else self.measures + return _kld_compute(measures, self.total, self.reduction) diff --git a/RE/paddlemetric/src/paddlemetrics/classification/matthews_corrcoef.py b/RE/paddlemetric/src/paddlemetrics/classification/matthews_corrcoef.py new file mode 100644 index 00000000..2ea52673 --- /dev/null +++ b/RE/paddlemetric/src/paddlemetrics/classification/matthews_corrcoef.py @@ -0,0 +1,111 @@ +# Copyright The PyTorch Lightning team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from typing import Any, Callable, Optional + +import paddleext.torchapi as B +from paddleext.torchapi import Tensor + +from paddlemetrics.functional.classification.matthews_corrcoef import ( + _matthews_corrcoef_compute, + _matthews_corrcoef_update, +) +from paddlemetrics.metric import Metric + + +class MatthewsCorrcoef(Metric): + r""" + Calculates `Matthews correlation coefficient`_ that measures + the general correlation or quality of a classification. In the binary case it + is defined as: + + .. math:: + MCC = \frac{TP*TN - FP*FN}{\sqrt{(TP+FP)*(TP+FN)*(TN+FP)*(TN+FN)}} + + where TP, TN, FP and FN are respectively the true postitives, true negatives, + false positives and false negatives. Also works in the case of multi-label or + multi-class input. + + Note: + This metric produces a multi-dimensional output, so it can not be directly logged. + + Forward accepts + + - ``preds`` (float or long tensor): ``(N, ...)`` or ``(N, C, ...)`` where C is the number of classes + - ``target`` (long tensor): ``(N, ...)`` + + If preds and target are the same shape and preds is a float tensor, we use the ``self.threshold`` argument + to convert into integer labels. This is the case for binary and multi-label probabilities. + + If preds has an extra dimension as in the case of multi-class scores we perform an argmax on ``dim=1``. + + Args: + num_classes: Number of classes in the dataset. + threshold: + Threshold value for binary or multi-label probabilites. default: 0.5 + compute_on_step: + Forward only calls ``update()`` and return None if this is set to False. default: True + dist_sync_on_step: + Synchronize metric state across processes at each ``forward()`` + before returning the value at the step. default: False + process_group: + Specify the process group on which synchronization is called. default: None (which selects the entire world) + dist_sync_fn: + Callback that performs the allgather operation on the metric state. When ``None``, DDP + will be used to perform the allgather + + Example: + >>> from paddlemetrics import MatthewsCorrcoef + >>> target = B.tensor([1, 1, 0, 0]) + >>> preds = B.tensor([0, 1, 0, 0]) + >>> matthews_corrcoef = MatthewsCorrcoef(num_classes=2) + >>> matthews_corrcoef(preds, target) + tensor(0.5774) + + """ + is_differentiable = False + confmat: Tensor + + def __init__( + self, + num_classes: int, + threshold: float = 0.5, + compute_on_step: bool = True, + dist_sync_on_step: bool = False, + process_group: Optional[Any] = None, + dist_sync_fn: Callable = None, + ) -> None: + super().__init__( + compute_on_step=compute_on_step, + dist_sync_on_step=dist_sync_on_step, + process_group=process_group, + dist_sync_fn=dist_sync_fn, + ) + self.num_classes = num_classes + self.threshold = threshold + + self.add_state("confmat", default=B.zeros(num_classes, num_classes), dist_reduce_fx="sum") + + def update(self, preds: Tensor, target: Tensor) -> None: # type: ignore + """Update state with predictions and targets. + + Args: + preds: Predictions from model + target: Ground truth values + """ + confmat = _matthews_corrcoef_update(preds, target, self.num_classes, self.threshold) + self.confmat += confmat + + def compute(self) -> Tensor: + """Computes matthews correlation coefficient.""" + return _matthews_corrcoef_compute(self.confmat) diff --git a/RE/paddlemetric/src/paddlemetrics/classification/precision_recall.py b/RE/paddlemetric/src/paddlemetrics/classification/precision_recall.py new file mode 100644 index 00000000..77920cfc --- /dev/null +++ b/RE/paddlemetric/src/paddlemetrics/classification/precision_recall.py @@ -0,0 +1,320 @@ +# Copyright The PyTorch Lightning team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from typing import Any, Callable, Optional + +import paddleext.torchapi as B +from paddleext.torchapi import Tensor + +from paddlemetrics.classification.stat_scores import StatScores +from paddlemetrics.functional.classification.precision_recall import _precision_compute, _recall_compute + + +class Precision(StatScores): + r""" + Computes `Precision`_: + + .. math:: \text{Precision} = \frac{\text{TP}}{\text{TP} + \text{FP}} + + Where :math:`\text{TP}` and :math:`\text{FP}` represent the number of true positives and + false positives respecitively. With the use of ``top_k`` parameter, this metric can + generalize to Precision@K. + + The reduction method (how the precision scores are aggregated) is controlled by the + ``average`` parameter, and additionally by the ``mdmc_average`` parameter in the + multi-dimensional multi-class case. Accepts all inputs listed in :ref:`references/modules:input types`. + + Args: + num_classes: + Number of classes. Necessary for ``'macro'``, ``'weighted'`` and ``None`` average methods. + threshold: + Threshold for transforming probability or logit predictions to binary (0,1) predictions, in the case + of binary or multi-label inputs. Default value of 0.5 corresponds to input being probabilities. + average: + Defines the reduction that is applied. Should be one of the following: + + - ``'micro'`` [default]: Calculate the metric globally, across all samples and classes. + - ``'macro'``: Calculate the metric for each class separately, and average the + metrics across classes (with equal weights for each class). + - ``'weighted'``: Calculate the metric for each class separately, and average the + metrics across classes, weighting each class by its support (``tp + fn``). + - ``'none'`` or ``None``: Calculate the metric for each class separately, and return + the metric for every class. + - ``'samples'``: Calculate the metric for each sample, and average the metrics + across samples (with equal weights for each sample). + + .. note:: What is considered a sample in the multi-dimensional multi-class case + depends on the value of ``mdmc_average``. + + mdmc_average: + Defines how averaging is done for multi-dimensional multi-class inputs (on top of the + ``average`` parameter). Should be one of the following: + + - ``None`` [default]: Should be left unchanged if your data is not multi-dimensional + multi-class. + + - ``'samplewise'``: In this case, the statistics are computed separately for each + sample on the ``N`` axis, and then averaged over samples. + The computation for each sample is done by treating the flattened extra axes ``...`` + (see :ref:`references/modules:input types`) as the ``N`` dimension within the sample, + and computing the metric for the sample based on that. + + - ``'global'``: In this case the ``N`` and ``...`` dimensions of the inputs + (see :ref:`references/modules:input types`) + are flattened into a new ``N_X`` sample axis, i.e. the inputs are treated as if they + were ``(N_X, C)``. From here on the ``average`` parameter applies as usual. + + ignore_index: + Integer specifying a target class to ignore. If given, this class index does not contribute + to the returned score, regardless of reduction method. If an index is ignored, and ``average=None`` + or ``'none'``, the score for the ignored class will be returned as ``nan``. + + top_k: + Number of highest probability or logit score predictions considered to find the correct label, + relevant only for (multi-dimensional) multi-class inputs. The + default value (``None``) will be interpreted as 1 for these inputs. + + Should be left at default (``None``) for all other types of inputs. + + multiclass: + Used only in certain special cases, where you want to treat inputs as a different type + than what they appear to be. See the parameter's + :ref:`documentation section ` + for a more detailed explanation and examples. + + compute_on_step: + Forward only calls ``update()`` and return ``None`` if this is set to ``False``. + dist_sync_on_step: + Synchronize metric state across processes at each ``forward()`` + before returning the value at the step + process_group: + Specify the process group on which synchronization is called. + default: ``None`` (which selects the entire world) + dist_sync_fn: + Callback that performs the allgather operation on the metric state. When ``None``, DDP + will be used to perform the allgather. + + Raises: + ValueError: + If ``average`` is none of ``"micro"``, ``"macro"``, ``"weighted"``, ``"samples"``, ``"none"``, ``None``. + + Example: + >>> from paddlemetrics import Precision + >>> preds = B.tensor([2, 0, 2, 1]) + >>> target = B.tensor([1, 1, 2, 0]) + >>> precision = Precision(average='macro', num_classes=3) + >>> precision(preds, target) + tensor(0.1667) + >>> precision = Precision(average='micro') + >>> precision(preds, target) + tensor(0.2500) + + """ + is_differentiable = False + + def __init__( + self, + num_classes: Optional[int] = None, + threshold: float = 0.5, + average: str = "micro", + mdmc_average: Optional[str] = None, + ignore_index: Optional[int] = None, + top_k: Optional[int] = None, + multiclass: Optional[bool] = None, + compute_on_step: bool = True, + dist_sync_on_step: bool = False, + process_group: Optional[Any] = None, + dist_sync_fn: Callable = None, + ) -> None: + allowed_average = ["micro", "macro", "weighted", "samples", "none", None] + if average not in allowed_average: + raise ValueError(f"The `average` has to be one of {allowed_average}, got {average}.") + + super().__init__( + reduce="macro" if average in ["weighted", "none", None] else average, + mdmc_reduce=mdmc_average, + threshold=threshold, + top_k=top_k, + num_classes=num_classes, + multiclass=multiclass, + ignore_index=ignore_index, + compute_on_step=compute_on_step, + dist_sync_on_step=dist_sync_on_step, + process_group=process_group, + dist_sync_fn=dist_sync_fn, + ) + + self.average = average + + def compute(self) -> Tensor: + """Computes the precision score based on inputs passed in to ``update`` previously. + + Return: + The shape of the returned tensor depends on the ``average`` parameter + + - If ``average in ['micro', 'macro', 'weighted', 'samples']``, a one-element tensor will be returned + - If ``average in ['none', None]``, the shape will be ``(C,)``, where ``C`` stands for the number + of classes + """ + tp, fp, _, fn = self._get_final_stats() + return _precision_compute(tp, fp, fn, self.average, self.mdmc_reduce) + + +class Recall(StatScores): + r""" + Computes `Recall`_: + + .. math:: \text{Recall} = \frac{\text{TP}}{\text{TP} + \text{FN}} + + Where :math:`\text{TP}` and :math:`\text{FN}` represent the number of true positives and + false negatives respecitively. With the use of ``top_k`` parameter, this metric can + generalize to Recall@K. + + The reduction method (how the recall scores are aggregated) is controlled by the + ``average`` parameter, and additionally by the ``mdmc_average`` parameter in the + multi-dimensional multi-class case. Accepts all inputs listed in :ref:`references/modules:input types`. + + Args: + num_classes: + Number of classes. Necessary for ``'macro'``, ``'weighted'`` and ``None`` average methods. + threshold: + Threshold for transforming probability or logit predictions to binary (0,1) predictions, in the case + of binary or multi-label inputs. Default value of 0.5 corresponds to input being probabilities. + average: + Defines the reduction that is applied. Should be one of the following: + + - ``'micro'`` [default]: Calculate the metric globally, across all samples and classes. + - ``'macro'``: Calculate the metric for each class separately, and average the + metrics across classes (with equal weights for each class). + - ``'weighted'``: Calculate the metric for each class separately, and average the + metrics across classes, weighting each class by its support (``tp + fn``). + - ``'none'`` or ``None``: Calculate the metric for each class separately, and return + the metric for every class. + - ``'samples'``: Calculate the metric for each sample, and average the metrics + across samples (with equal weights for each sample). + + .. note:: What is considered a sample in the multi-dimensional multi-class case + depends on the value of ``mdmc_average``. + + mdmc_average: + Defines how averaging is done for multi-dimensional multi-class inputs (on top of the + ``average`` parameter). Should be one of the following: + + - ``None`` [default]: Should be left unchanged if your data is not multi-dimensional + multi-class. + + - ``'samplewise'``: In this case, the statistics are computed separately for each + sample on the ``N`` axis, and then averaged over samples. + The computation for each sample is done by treating the flattened extra axes ``...`` + (see :ref:`references/modules:input types`) as the ``N`` dimension within the sample, + and computing the metric for the sample based on that. + + - ``'global'``: In this case the ``N`` and ``...`` dimensions of the inputs + (see :ref:`references/modules:input types`) + are flattened into a new ``N_X`` sample axis, i.e. the inputs are treated as if they + were ``(N_X, C)``. From here on the ``average`` parameter applies as usual. + + ignore_index: + Integer specifying a target class to ignore. If given, this class index does not contribute + to the returned score, regardless of reduction method. If an index is ignored, and ``average=None`` + or ``'none'``, the score for the ignored class will be returned as ``nan``. + + top_k: + Number of highest probability or logit score predictions considered to find the correct label, + relevant only for (multi-dimensional) multi-class. The + default value (``None``) will be interpreted as 1 for these inputs. + + Should be left at default (``None``) for all other types of inputs. + + multiclass: + Used only in certain special cases, where you want to treat inputs as a different type + than what they appear to be. See the parameter's + :ref:`documentation section ` + for a more detailed explanation and examples. + + compute_on_step: + Forward only calls ``update()`` and return ``None`` if this is set to ``False``. + dist_sync_on_step: + Synchronize metric state across processes at each ``forward()`` + before returning the value at the step + process_group: + Specify the process group on which synchronization is called. + default: ``None`` (which selects the entire world) + dist_sync_fn: + Callback that performs the allgather operation on the metric state. When ``None``, DDP + will be used to perform the allgather. + + Raises: + ValueError: + If ``average`` is none of ``"micro"``, ``"macro"``, ``"weighted"``, ``"samples"``, ``"none"``, ``None``. + + Example: + >>> from paddlemetrics import Recall + >>> preds = B.tensor([2, 0, 2, 1]) + >>> target = B.tensor([1, 1, 2, 0]) + >>> recall = Recall(average='macro', num_classes=3) + >>> recall(preds, target) + tensor(0.3333) + >>> recall = Recall(average='micro') + >>> recall(preds, target) + tensor(0.2500) + + """ + is_differentiable = False + + def __init__( + self, + num_classes: Optional[int] = None, + threshold: float = 0.5, + average: str = "micro", + mdmc_average: Optional[str] = None, + ignore_index: Optional[int] = None, + top_k: Optional[int] = None, + multiclass: Optional[bool] = None, + compute_on_step: bool = True, + dist_sync_on_step: bool = False, + process_group: Optional[Any] = None, + dist_sync_fn: Callable = None, + ) -> None: + allowed_average = ["micro", "macro", "weighted", "samples", "none", None] + if average not in allowed_average: + raise ValueError(f"The `average` has to be one of {allowed_average}, got {average}.") + + super().__init__( + reduce="macro" if average in ["weighted", "none", None] else average, + mdmc_reduce=mdmc_average, + threshold=threshold, + top_k=top_k, + num_classes=num_classes, + multiclass=multiclass, + ignore_index=ignore_index, + compute_on_step=compute_on_step, + dist_sync_on_step=dist_sync_on_step, + process_group=process_group, + dist_sync_fn=dist_sync_fn, + ) + + self.average = average + + def compute(self) -> Tensor: + """Computes the recall score based on inputs passed in to ``update`` previously. + + Return: + The shape of the returned tensor depends on the ``average`` parameter + + - If ``average in ['micro', 'macro', 'weighted', 'samples']``, a one-element tensor will be returned + - If ``average in ['none', None]``, the shape will be ``(C,)``, where ``C`` stands for the number + of classes + """ + tp, fp, _, fn = self._get_final_stats() + return _recall_compute(tp, fp, fn, self.average, self.mdmc_reduce) diff --git a/RE/paddlemetric/src/paddlemetrics/classification/precision_recall_curve.py b/RE/paddlemetric/src/paddlemetrics/classification/precision_recall_curve.py new file mode 100644 index 00000000..34141909 --- /dev/null +++ b/RE/paddlemetric/src/paddlemetrics/classification/precision_recall_curve.py @@ -0,0 +1,149 @@ +# Copyright The PyTorch Lightning team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from typing import Any, List, Optional, Tuple, Union + +import paddleext.torchapi as B +from paddleext.torchapi import Tensor + +from paddlemetrics.functional.classification.precision_recall_curve import ( + _precision_recall_curve_compute, + _precision_recall_curve_update, +) +from paddlemetrics.metric import Metric +from paddlemetrics.utilities import rank_zero_warn +from paddlemetrics.utilities.data import dim_zero_cat + + +class PrecisionRecallCurve(Metric): + """Computes precision-recall pairs for different thresholds. Works for both binary and multiclass problems. In + the case of multiclass, the values will be calculated based on a one-vs-the-rest approach. + + Forward accepts + + - ``preds`` (float tensor): ``(N, ...)`` (binary) or ``(N, C, ...)`` (multiclass) tensor + with probabilities, where C is the number of classes. + + - ``target`` (long tensor): ``(N, ...)`` or ``(N, C, ...)`` with integer labels + + Args: + num_classes: integer with number of classes for multi-label and multiclass problems. + Should be set to ``None`` for binary problems + pos_label: integer determining the positive class. Default is ``None`` + which for binary problem is translate to 1. For multiclass problems + this argument should not be set as we iteratively change it in the + range [0,num_classes-1] + compute_on_step: + Forward only calls ``update()`` and return None if this is set to False. default: True + dist_sync_on_step: + Synchronize metric state across processes at each ``forward()`` + before returning the value at the step. default: False + process_group: + Specify the process group on which synchronization is called. default: None (which selects the entire world) + + Example (binary case): + >>> from paddlemetrics import PrecisionRecallCurve + >>> pred = B.tensor([0, 1, 2, 3]) + >>> target = B.tensor([0, 1, 1, 0]) + >>> pr_curve = PrecisionRecallCurve(pos_label=1) + >>> precision, recall, thresholds = pr_curve(pred, target) + >>> precision + tensor([0.6667, 0.5000, 0.0000, 1.0000]) + >>> recall + tensor([1.0000, 0.5000, 0.0000, 0.0000]) + >>> thresholds + tensor([1, 2, 3]) + + Example (multiclass case): + >>> pred = B.tensor([[0.75, 0.05, 0.05, 0.05, 0.05], + ... [0.05, 0.75, 0.05, 0.05, 0.05], + ... [0.05, 0.05, 0.75, 0.05, 0.05], + ... [0.05, 0.05, 0.05, 0.75, 0.05]]) + >>> target = B.tensor([0, 1, 3, 2]) + >>> pr_curve = PrecisionRecallCurve(num_classes=5) + >>> precision, recall, thresholds = pr_curve(pred, target) + >>> precision # doctest: +NORMALIZE_WHITESPACE + [tensor([1., 1.]), tensor([1., 1.]), tensor([0.2500, 0.0000, 1.0000]), + tensor([0.2500, 0.0000, 1.0000]), tensor([0., 1.])] + >>> recall + [tensor([1., 0.]), tensor([1., 0.]), tensor([1., 0., 0.]), tensor([1., 0., 0.]), tensor([nan, 0.])] + >>> thresholds + [tensor([0.7500]), tensor([0.7500]), tensor([0.0500, 0.7500]), tensor([0.0500, 0.7500]), tensor([0.0500])] + """ + + is_differentiable = False + preds: List[Tensor] + target: List[Tensor] + + def __init__( + self, + num_classes: Optional[int] = None, + pos_label: Optional[int] = None, + compute_on_step: bool = True, + dist_sync_on_step: bool = False, + process_group: Optional[Any] = None, + ) -> None: + super().__init__( + compute_on_step=compute_on_step, + dist_sync_on_step=dist_sync_on_step, + process_group=process_group, + ) + + self.num_classes = num_classes + self.pos_label = pos_label + + self.add_state("preds", default=[], dist_reduce_fx="cat") + self.add_state("target", default=[], dist_reduce_fx="cat") + + rank_zero_warn( + "Metric `PrecisionRecallCurve` will save all targets and predictions in buffer." + " For large datasets this may lead to large memory footprint." + ) + + def update(self, preds: Tensor, target: Tensor) -> None: # type: ignore + """Update state with predictions and targets. + + Args: + preds: Predictions from model + target: Ground truth values + """ + preds, target, num_classes, pos_label = _precision_recall_curve_update( + preds, target, self.num_classes, self.pos_label + ) + self.preds.append(preds) + self.target.append(target) + self.num_classes = num_classes + self.pos_label = pos_label + + def compute(self) -> Union[Tuple[Tensor, Tensor, Tensor], Tuple[List[Tensor], List[Tensor], List[Tensor]]]: + """Compute the precision-recall curve. + + Returns: + 3-element tuple containing + + precision: + tensor where element i is the precision of predictions with + score >= thresholds[i] and the last element is 1. + If multiclass, this is a list of such tensors, one for each class. + recall: + tensor where element i is the recall of predictions with + score >= thresholds[i] and the last element is 0. + If multiclass, this is a list of such tensors, one for each class. + thresholds: + Thresholds used for computing precision/recall scores + """ + preds = dim_zero_cat(self.preds) + target = dim_zero_cat(self.target) + if not self.num_classes: + raise ValueError(f"`num_classes` bas to be positive number, but got {self.num_classes}") + return _precision_recall_curve_compute(preds, target, self.num_classes, self.pos_label) diff --git a/RE/paddlemetric/src/paddlemetrics/classification/roc.py b/RE/paddlemetric/src/paddlemetrics/classification/roc.py new file mode 100644 index 00000000..a01a5b94 --- /dev/null +++ b/RE/paddlemetric/src/paddlemetrics/classification/roc.py @@ -0,0 +1,169 @@ +# Copyright The PyTorch Lightning team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from typing import Any, Callable, List, Optional, Tuple, Union + +import paddleext.torchapi as B +from paddleext.torchapi import Tensor + +from paddlemetrics.functional.classification.roc import _roc_compute, _roc_update +from paddlemetrics.metric import Metric +from paddlemetrics.utilities import rank_zero_warn + + +class ROC(Metric): + """Computes the Receiver Operating Characteristic (ROC). Works for both binary, multiclass and multilabel + problems. In the case of multiclass, the values will be calculated based on a one-vs-the-rest approach. + + Forward accepts + + - ``preds`` (float tensor): ``(N, ...)`` (binary) or ``(N, C, ...)`` (multiclass/multilabel) tensor + with probabilities, where C is the number of classes/labels. + + - ``target`` (long tensor): ``(N, ...)`` or ``(N, C, ...)`` with integer labels + + Args: + num_classes: integer with number of classes for multi-label and multiclass problems. + Should be set to ``None`` for binary problems + pos_label: integer determining the positive class. Default is ``None`` + which for binary problem is translate to 1. For multiclass problems + this argument should not be set as we iteratively change it in the + range [0,num_classes-1] + compute_on_step: + Forward only calls ``update()`` and return None if this is set to False. default: True + dist_sync_on_step: + Synchronize metric state across processes at each ``forward()`` + before returning the value at the step. default: False + process_group: + Specify the process group on which synchronization is called. default: None (which selects the entire world) + dist_sync_fn: + Callback that performs the allgather operation on the metric state. When ``None``, DDP + will be used to perform the allgather + + Example (binary case): + >>> from paddlemetrics import ROC + >>> pred = B.tensor([0, 1, 2, 3]) + >>> target = B.tensor([0, 1, 1, 1]) + >>> roc = ROC(pos_label=1) + >>> fpr, tpr, thresholds = roc(pred, target) + >>> fpr + tensor([0., 0., 0., 0., 1.]) + >>> tpr + tensor([0.0000, 0.3333, 0.6667, 1.0000, 1.0000]) + >>> thresholds + tensor([4, 3, 2, 1, 0]) + + Example (multiclass case): + >>> pred = B.tensor([[0.75, 0.05, 0.05, 0.05], + ... [0.05, 0.75, 0.05, 0.05], + ... [0.05, 0.05, 0.75, 0.05], + ... [0.05, 0.05, 0.05, 0.75]]) + >>> target = B.tensor([0, 1, 3, 2]) + >>> roc = ROC(num_classes=4) + >>> fpr, tpr, thresholds = roc(pred, target) + >>> fpr + [tensor([0., 0., 1.]), tensor([0., 0., 1.]), tensor([0.0000, 0.3333, 1.0000]), tensor([0.0000, 0.3333, 1.0000])] + >>> tpr + [tensor([0., 1., 1.]), tensor([0., 1., 1.]), tensor([0., 0., 1.]), tensor([0., 0., 1.])] + >>> thresholds # doctest: +NORMALIZE_WHITESPACE + [tensor([1.7500, 0.7500, 0.0500]), + tensor([1.7500, 0.7500, 0.0500]), + tensor([1.7500, 0.7500, 0.0500]), + tensor([1.7500, 0.7500, 0.0500])] + + Example (multilabel case): + >>> pred = B.tensor([[0.8191, 0.3680, 0.1138], + ... [0.3584, 0.7576, 0.1183], + ... [0.2286, 0.3468, 0.1338], + ... [0.8603, 0.0745, 0.1837]]) + >>> target = B.tensor([[1, 1, 0], [0, 1, 0], [0, 0, 0], [0, 1, 1]]) + >>> roc = ROC(num_classes=3, pos_label=1) + >>> fpr, tpr, thresholds = roc(pred, target) + >>> fpr # doctest: +NORMALIZE_WHITESPACE + [tensor([0.0000, 0.3333, 0.3333, 0.6667, 1.0000]), + tensor([0., 0., 0., 1., 1.]), + tensor([0.0000, 0.0000, 0.3333, 0.6667, 1.0000])] + >>> tpr # doctest: +NORMALIZE_WHITESPACE + [tensor([0., 0., 1., 1., 1.]), + tensor([0.0000, 0.3333, 0.6667, 0.6667, 1.0000]), + tensor([0., 1., 1., 1., 1.])] + >>> thresholds # doctest: +NORMALIZE_WHITESPACE + [tensor([1.8603, 0.8603, 0.8191, 0.3584, 0.2286]), + tensor([1.7576, 0.7576, 0.3680, 0.3468, 0.0745]), + tensor([1.1837, 0.1837, 0.1338, 0.1183, 0.1138])] + """ + + is_differentiable = False + preds: List[Tensor] + target: List[Tensor] + + def __init__( + self, + num_classes: Optional[int] = None, + pos_label: Optional[int] = None, + compute_on_step: bool = True, + dist_sync_on_step: bool = False, + process_group: Optional[Any] = None, + dist_sync_fn: Callable = None, + ) -> None: + super().__init__( + compute_on_step=compute_on_step, + dist_sync_on_step=dist_sync_on_step, + process_group=process_group, + dist_sync_fn=dist_sync_fn, + ) + + self.num_classes = num_classes + self.pos_label = pos_label + + self.add_state("preds", default=[], dist_reduce_fx=None) + self.add_state("target", default=[], dist_reduce_fx=None) + + rank_zero_warn( + "Metric `ROC` will save all targets and predictions in buffer." + " For large datasets this may lead to large memory footprint." + ) + + def update(self, preds: Tensor, target: Tensor) -> None: # type: ignore + """Update state with predictions and targets. + + Args: + preds: Predictions from model + target: Ground truth values + """ + preds, target, num_classes, pos_label = _roc_update(preds, target, self.num_classes, self.pos_label) + self.preds.append(preds) + self.target.append(target) + self.num_classes = num_classes + self.pos_label = pos_label + + def compute(self) -> Union[Tuple[Tensor, Tensor, Tensor], Tuple[List[Tensor], List[Tensor], List[Tensor]]]: + """Compute the receiver operating characteristic. + + Returns: + 3-element tuple containing + + fpr: + tensor with false positive rates. + If multiclass, this is a list of such tensors, one for each class. + tpr: + tensor with true positive rates. + If multiclass, this is a list of such tensors, one for each class. + thresholds: + thresholds used for computing false- and true postive rates + """ + preds = B.cat(self.preds, dim=0) + target = B.cat(self.target, dim=0) + if not self.num_classes: + raise ValueError(f"`num_classes` bas to be positive number, but got {self.num_classes}") + return _roc_compute(preds, target, self.num_classes, self.pos_label) diff --git a/RE/paddlemetric/src/paddlemetrics/classification/specificity.py b/RE/paddlemetric/src/paddlemetrics/classification/specificity.py new file mode 100644 index 00000000..0ad44268 --- /dev/null +++ b/RE/paddlemetric/src/paddlemetrics/classification/specificity.py @@ -0,0 +1,171 @@ +# Copyright The PyTorch Lightning team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from typing import Any, Callable, Optional + +import paddleext.torchapi as B +from paddleext.torchapi import Tensor + +from paddlemetrics.classification.stat_scores import StatScores +from paddlemetrics.functional.classification.specificity import _specificity_compute + + +class Specificity(StatScores): + r""" + Computes `Specificity`_: + + .. math:: \text{Specificity} = \frac{\text{TN}}{\text{TN} + \text{FP}} + + Where :math:`\text{TN}` and :math:`\text{FP}` represent the number of true negatives and + false positives respecitively. With the use of ``top_k`` parameter, this metric can + generalize to Specificity@K. + + The reduction method (how the specificity scores are aggregated) is controlled by the + ``average`` parameter, and additionally by the ``mdmc_average`` parameter in the + multi-dimensional multi-class case. Accepts all inputs listed in :ref:`references/modules:input types`. + + Args: + num_classes: + Number of classes. Necessary for ``'macro'``, ``'weighted'`` and ``None`` average methods. + threshold: + Threshold probability value for transforming probability predictions to binary + (0,1) predictions, in the case of binary or multi-label inputs. + average: + Defines the reduction that is applied. Should be one of the following: + + - ``'micro'`` [default]: Calculate the metric globally, across all samples and classes. + - ``'macro'``: Calculate the metric for each class separately, and average the + metrics across classes (with equal weights for each class). + - ``'weighted'``: Calculate the metric for each class separately, and average the + metrics across classes, weighting each class by its support (``tn + fp``). + - ``'none'`` or ``None``: Calculate the metric for each class separately, and return + the metric for every class. + - ``'samples'``: Calculate the metric for each sample, and average the metrics + across samples (with equal weights for each sample). + + .. note:: What is considered a sample in the multi-dimensional multi-class case + depends on the value of ``mdmc_average``. + + mdmc_average: + Defines how averaging is done for multi-dimensional multi-class inputs (on top of the + ``average`` parameter). Should be one of the following: + + - ``None`` [default]: Should be left unchanged if your data is not multi-dimensional + multi-class. + + - ``'samplewise'``: In this case, the statistics are computed separately for each + sample on the ``N`` axis, and then averaged over samples. + The computation for each sample is done by treating the flattened extra axes ``...`` + (see :ref:`references/modules:input types`) as the ``N`` dimension within the sample, + and computing the metric for the sample based on that. + + - ``'global'``: In this case the ``N`` and ``...`` dimensions of the inputs + (see :ref:`references/modules:input types`) + are flattened into a new ``N_X`` sample axis, i.e. the inputs are treated as if they + were ``(N_X, C)``. From here on the ``average`` parameter applies as usual. + + ignore_index: + Integer specifying a target class to ignore. If given, this class index does not contribute + to the returned score, regardless of reduction method. If an index is ignored, and ``average=None`` + or ``'none'``, the score for the ignored class will be returned as ``nan``. + + top_k: + Number of highest probability entries for each sample to convert to 1s - relevant + only for inputs with probability predictions. If this parameter is set for multi-label + inputs, it will take precedence over ``threshold``. For (multi-dim) multi-class inputs, + this parameter defaults to 1. + + Should be left unset (``None``) for inputs with label predictions. + + multiclass: + Used only in certain special cases, where you want to treat inputs as a different type + than what they appear to be. See the parameter's + :ref:`documentation section ` + for a more detailed explanation and examples. + + compute_on_step: + Forward only calls ``update()`` and return ``None`` if this is set to ``False``. + dist_sync_on_step: + Synchronize metric state across processes at each ``forward()`` + before returning the value at the step + process_group: + Specify the process group on which synchronization is called. + default: ``None`` (which selects the entire world) + dist_sync_fn: + Callback that performs the allgather operation on the metric state. When ``None``, DDP + will be used to perform the allgather. + + Raises: + ValueError: + If ``average`` is none of ``"micro"``, ``"macro"``, ``"weighted"``, ``"samples"``, ``"none"``, ``None``. + + Example: + >>> from paddlemetrics import Specificity + >>> preds = B.tensor([2, 0, 2, 1]) + >>> target = B.tensor([1, 1, 2, 0]) + >>> specificity = Specificity(average='macro', num_classes=3) + >>> specificity(preds, target) + tensor(0.6111) + >>> specificity = Specificity(average='micro') + >>> specificity(preds, target) + tensor(0.6250) + + """ + is_differentiable = False + + def __init__( + self, + num_classes: Optional[int] = None, + threshold: float = 0.5, + average: str = "micro", + mdmc_average: Optional[str] = None, + ignore_index: Optional[int] = None, + top_k: Optional[int] = None, + multiclass: Optional[bool] = None, + compute_on_step: bool = True, + dist_sync_on_step: bool = False, + process_group: Optional[Any] = None, + dist_sync_fn: Callable = None, + ) -> None: + allowed_average = ["micro", "macro", "weighted", "samples", "none", None] + if average not in allowed_average: + raise ValueError(f"The `average` has to be one of {allowed_average}, got {average}.") + + super().__init__( + reduce="macro" if average in ["weighted", "none", None] else average, + mdmc_reduce=mdmc_average, + threshold=threshold, + top_k=top_k, + num_classes=num_classes, + multiclass=multiclass, + ignore_index=ignore_index, + compute_on_step=compute_on_step, + dist_sync_on_step=dist_sync_on_step, + process_group=process_group, + dist_sync_fn=dist_sync_fn, + ) + + self.average = average + + def compute(self) -> Tensor: + """Computes the specificity score based on inputs passed in to ``update`` previously. + + Return: + The shape of the returned tensor depends on the ``average`` parameter + + - If ``average in ['micro', 'macro', 'weighted', 'samples']``, a one-element tensor will be returned + - If ``average in ['none', None]``, the shape will be ``(C,)``, where ``C`` stands for the number + of classes + """ + tp, fp, tn, fn = self._get_final_stats() + return _specificity_compute(tp, fp, tn, fn, self.average, self.mdmc_reduce) diff --git a/RE/paddlemetric/src/paddlemetrics/classification/stat_scores.py b/RE/paddlemetric/src/paddlemetrics/classification/stat_scores.py new file mode 100644 index 00000000..ec099c86 --- /dev/null +++ b/RE/paddlemetric/src/paddlemetrics/classification/stat_scores.py @@ -0,0 +1,267 @@ +# Copyright The PyTorch Lightning team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from typing import Any, Callable, Optional, Tuple + +import paddleext.torchapi as B +from paddleext.torchapi import Tensor + +from paddlemetrics.functional.classification.stat_scores import _stat_scores_compute, _stat_scores_update +from paddlemetrics.metric import Metric +from paddlemetrics.utilities.enums import AverageMethod, MDMCAverageMethod + + +class StatScores(Metric): + r"""Computes the number of true positives, false positives, true negatives, false negatives. + Related to `Type I and Type II errors`_ + and the `confusion matrix`_. + + The reduction method (how the statistics are aggregated) is controlled by the + ``reduce`` parameter, and additionally by the ``mdmc_reduce`` parameter in the + multi-dimensional multi-class case. + + Accepts all inputs listed in :ref:`references/modules:input types`. + + Args: + threshold: + Threshold for transforming probability or logit predictions to binary (0,1) predictions, in the case + of binary or multi-label inputs. Default value of 0.5 corresponds to input being probabilities. + + top_k: + Number of highest probability or logit score predictions considered to find the correct label, + relevant only for (multi-dimensional) multi-class inputs. The + default value (``None``) will be interpreted as 1 for these inputs. + + Should be left at default (``None``) for all other types of inputs. + + reduce: + Defines the reduction that is applied. Should be one of the following: + + - ``'micro'`` [default]: Counts the statistics by summing over all [sample, class] + combinations (globally). Each statistic is represented by a single integer. + - ``'macro'``: Counts the statistics for each class separately (over all samples). + Each statistic is represented by a ``(C,)`` tensor. Requires ``num_classes`` + to be set. + - ``'samples'``: Counts the statistics for each sample separately (over all classes). + Each statistic is represented by a ``(N, )`` 1d tensor. + + .. note:: What is considered a sample in the multi-dimensional multi-class case + depends on the value of ``mdmc_reduce``. + + num_classes: + Number of classes. Necessary for (multi-dimensional) multi-class or multi-label data. + + ignore_index: + Specify a class (label) to ignore. If given, this class index does not contribute + to the returned score, regardless of reduction method. If an index is ignored, and + ``reduce='macro'``, the class statistics for the ignored class will all be returned + as ``-1``. + + mdmc_reduce: + Defines how the multi-dimensional multi-class inputs are handeled. Should be + one of the following: + + - ``None`` [default]: Should be left unchanged if your data is not multi-dimensional + multi-class (see :ref:`references/modules:input types` for the definition of input types). + + - ``'samplewise'``: In this case, the statistics are computed separately for each + sample on the ``N`` axis, and then the outputs are concatenated together. In each + sample the extra axes ``...`` are flattened to become the sub-sample axis, and + statistics for each sample are computed by treating the sub-sample axis as the + ``N`` axis for that sample. + + - ``'global'``: In this case the ``N`` and ``...`` dimensions of the inputs are + flattened into a new ``N_X`` sample axis, i.e. the inputs are treated as if they + were ``(N_X, C)``. From here on the ``reduce`` parameter applies as usual. + + multiclass: + Used only in certain special cases, where you want to treat inputs as a different type + than what they appear to be. See the parameter's + :ref:`documentation section ` + for a more detailed explanation and examples. + + compute_on_step: + Forward only calls ``update()`` and return ``None`` if this is set to ``False``. + dist_sync_on_step: + Synchronize metric state across processes at each ``forward()`` + before returning the value at the step + process_group: + Specify the process group on which synchronization is called. + default: ``None`` (which selects the entire world) + dist_sync_fn: + Callback that performs the allgather operation on the metric state. When ``None``, DDP + will be used to perform the allgather. + + Raises: + ValueError: + If ``reduce`` is none of ``"micro"``, ``"macro"`` or ``"samples"``. + ValueError: + If ``mdmc_reduce`` is none of ``None``, ``"samplewise"``, ``"global"``. + ValueError: + If ``reduce`` is set to ``"macro"`` and ``num_classes`` is not provided. + ValueError: + If ``num_classes`` is set + and ``ignore_index`` is not in the range ``0`` <= ``ignore_index`` < ``num_classes``. + + Example: + >>> from paddlemetrics.classification import StatScores + >>> preds = B.tensor([1, 0, 2, 1]) + >>> target = B.tensor([1, 1, 2, 0]) + >>> stat_scores = StatScores(reduce='macro', num_classes=3) + >>> stat_scores(preds, target) + tensor([[0, 1, 2, 1, 1], + [1, 1, 1, 1, 2], + [1, 0, 3, 0, 1]]) + >>> stat_scores = StatScores(reduce='micro') + >>> stat_scores(preds, target) + tensor([2, 2, 6, 2, 4]) + + """ + is_differentiable = False + # TODO: canot be used because if scripting + # tp: Union[Tensor, List[Tensor]] + # fp: Union[Tensor, List[Tensor]] + # tn: Union[Tensor, List[Tensor]] + # fn: Union[Tensor, List[Tensor]] + + def __init__( + self, + threshold: float = 0.5, + top_k: Optional[int] = None, + reduce: str = "micro", + num_classes: Optional[int] = None, + ignore_index: Optional[int] = None, + mdmc_reduce: Optional[str] = None, + multiclass: Optional[bool] = None, + compute_on_step: bool = True, + dist_sync_on_step: bool = False, + process_group: Optional[Any] = None, + dist_sync_fn: Callable = None, + ) -> None: + super().__init__( + compute_on_step=compute_on_step, + dist_sync_on_step=dist_sync_on_step, + process_group=process_group, + dist_sync_fn=dist_sync_fn, + ) + + self.reduce = reduce + self.mdmc_reduce = mdmc_reduce + self.num_classes = num_classes + self.threshold = threshold + self.multiclass = multiclass + self.ignore_index = ignore_index + self.top_k = top_k + + if reduce not in ["micro", "macro", "samples"]: + raise ValueError(f"The `reduce` {reduce} is not valid.") + + if mdmc_reduce not in [None, "samplewise", "global"]: + raise ValueError(f"The `mdmc_reduce` {mdmc_reduce} is not valid.") + + if reduce == "macro" and (not num_classes or num_classes < 1): + raise ValueError("When you set `reduce` as 'macro', you have to provide the number of classes.") + + if num_classes and ignore_index is not None and (not 0 <= ignore_index < num_classes or num_classes == 1): + raise ValueError(f"The `ignore_index` {ignore_index} is not valid for inputs with {num_classes} classes") + + default: Callable = lambda: [] + reduce_fn: Optional[str] = None + if mdmc_reduce != "samplewise" and reduce != "samples": + if reduce == "micro": + zeros_shape = [] + elif reduce == "macro": + zeros_shape = [num_classes] + else: + raise ValueError(f'Wrong reduce="{reduce}"') + default = lambda: B.zeros(zeros_shape, dtype=B.long) + reduce_fn = "sum" + + for s in ("tp", "fp", "tn", "fn"): + self.add_state(s, default=default(), dist_reduce_fx=reduce_fn) + + def update(self, preds: Tensor, target: Tensor) -> None: # type: ignore + """Update state with predictions and targets. See + :ref:`references/modules:input types` for more information on input + types. + + Args: + preds: Predictions from model (probabilities, logits or labels) + target: Ground truth values + """ + + tp, fp, tn, fn = _stat_scores_update( + preds, + target, + reduce=self.reduce, + mdmc_reduce=self.mdmc_reduce, + threshold=self.threshold, + num_classes=self.num_classes, + top_k=self.top_k, + multiclass=self.multiclass, + ignore_index=self.ignore_index, + ) + + # Update states + if self.reduce != AverageMethod.SAMPLES and self.mdmc_reduce != MDMCAverageMethod.SAMPLEWISE: + self.tp += tp + self.fp += fp + self.tn += tn + self.fn += fn + else: + self.tp.append(tp) + self.fp.append(fp) + self.tn.append(tn) + self.fn.append(fn) + + def _get_final_stats(self) -> Tuple[Tensor, Tensor, Tensor, Tensor]: + """Performs concatenation on the stat scores if neccesary, before passing them to a compute function.""" + tp = B.cat(self.tp) if isinstance(self.tp, list) else self.tp + fp = B.cat(self.fp) if isinstance(self.fp, list) else self.fp + tn = B.cat(self.tn) if isinstance(self.tn, list) else self.tn + fn = B.cat(self.fn) if isinstance(self.fn, list) else self.fn + return tp, fp, tn, fn + + def compute(self) -> Tensor: + """Computes the stat scores based on inputs passed in to ``update`` previously. + + Return: + The metric returns a tensor of shape ``(..., 5)``, where the last dimension corresponds + to ``[tp, fp, tn, fn, sup]`` (``sup`` stands for support and equals ``tp + fn``). The + shape depends on the ``reduce`` and ``mdmc_reduce`` (in case of multi-dimensional + multi-class data) parameters: + + - If the data is not multi-dimensional multi-class, then + + - If ``reduce='micro'``, the shape will be ``(5, )`` + - If ``reduce='macro'``, the shape will be ``(C, 5)``, + where ``C`` stands for the number of classes + - If ``reduce='samples'``, the shape will be ``(N, 5)``, where ``N`` stands for + the number of samples + + - If the data is multi-dimensional multi-class and ``mdmc_reduce='global'``, then + + - If ``reduce='micro'``, the shape will be ``(5, )`` + - If ``reduce='macro'``, the shape will be ``(C, 5)`` + - If ``reduce='samples'``, the shape will be ``(N*X, 5)``, where ``X`` stands for + the product of sizes of all "extra" dimensions of the data (i.e. all dimensions + except for ``C`` and ``N``) + + - If the data is multi-dimensional multi-class and ``mdmc_reduce='samplewise'``, then + + - If ``reduce='micro'``, the shape will be ``(N, 5)`` + - If ``reduce='macro'``, the shape will be ``(N, C, 5)`` + - If ``reduce='samples'``, the shape will be ``(N, X, 5)`` + """ + tp, fp, tn, fn = self._get_final_stats() + return _stat_scores_compute(tp, fp, tn, fn) diff --git a/RE/paddlemetric/src/paddlemetrics/collections.py b/RE/paddlemetric/src/paddlemetrics/collections.py new file mode 100644 index 00000000..3b03856e --- /dev/null +++ b/RE/paddlemetric/src/paddlemetrics/collections.py @@ -0,0 +1,239 @@ +# Copyright The PyTorch Lightning team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from collections import OrderedDict +from copy import deepcopy +from typing import Any, Dict, Hashable, Iterable, Optional, Sequence, Tuple, Union + +import paddleext.torchapi as B +from paddleext.torchapi import nn + +from paddlemetrics.metric import Metric +from paddlemetrics.utilities import rank_zero_warn + + +class MetricCollection(nn.ModuleDict): + """MetricCollection class can be used to chain metrics that have the same call pattern into one single class. + + Args: + metrics: One of the following + + * list or tuple (sequence): if metrics are passed in as a list or tuple, will use the metrics class name + as key for output dict. Therefore, two metrics of the same class cannot be chained this way. + + * arguments: similar to passing in as a list, metrics passed in as arguments will use their metric + class name as key for the output dict. + + * dict: if metrics are passed in as a dict, will use each key in the dict as key for output dict. + Use this format if you want to chain together multiple of the same metric with different parameters. + Note that the keys in the output dict will be sorted alphabetically. + + prefix: a string to append in front of the keys of the output dict + + postfix: a string to append after the keys of the output dict + + Raises: + ValueError: + If one of the elements of ``metrics`` is not an instance of ``pl.metrics.Metric``. + ValueError: + If two elements in ``metrics`` have the same ``name``. + ValueError: + If ``metrics`` is not a ``list``, ``tuple`` or a ``dict``. + ValueError: + If ``metrics`` is ``dict`` and additional_metrics are passed in. + ValueError: + If ``prefix`` is set and it is not a string. + ValueError: + If ``postfix`` is set and it is not a string. + + Example (input as list): + >>> import torchapi as B + >>> from pprint import pprint + >>> from paddlemetrics import MetricCollection, Accuracy, Precision, Recall + >>> target = B.tensor([0, 2, 0, 2, 0, 1, 0, 2]) + >>> preds = B.tensor([2, 1, 2, 0, 1, 2, 2, 2]) + >>> metrics = MetricCollection([Accuracy(), + ... Precision(num_classes=3, average='macro'), + ... Recall(num_classes=3, average='macro')]) + >>> metrics(preds, target) + {'Accuracy': tensor(0.1250), 'Precision': tensor(0.0667), 'Recall': tensor(0.1111)} + + Example (input as arguments): + >>> metrics = MetricCollection(Accuracy(), Precision(num_classes=3, average='macro'), + ... Recall(num_classes=3, average='macro')) + >>> metrics(preds, target) + {'Accuracy': tensor(0.1250), 'Precision': tensor(0.0667), 'Recall': tensor(0.1111)} + + Example (input as dict): + >>> metrics = MetricCollection({'micro_recall': Recall(num_classes=3, average='micro'), + ... 'macro_recall': Recall(num_classes=3, average='macro')}) + >>> same_metric = metrics.clone() + >>> pprint(metrics(preds, target)) + {'macro_recall': tensor(0.1111), 'micro_recall': tensor(0.1250)} + >>> pprint(same_metric(preds, target)) + {'macro_recall': tensor(0.1111), 'micro_recall': tensor(0.1250)} + >>> metrics.persistent() + """ + + def __init__( + self, + metrics: Union[Metric, Sequence[Metric], Dict[str, Metric]], + *additional_metrics: Metric, + prefix: Optional[str] = None, + postfix: Optional[str] = None, + ) -> None: + super().__init__() + + self._modules = self._sub_layers + + self.add_metrics(metrics, *additional_metrics) + + self.prefix = self._check_arg(prefix, "prefix") + self.postfix = self._check_arg(postfix, "postfix") + + def forward(self, *args: Any, **kwargs: Any) -> Dict[str, Any]: + """Iteratively call forward for each metric. + + Positional arguments (args) will be passed to every metric in the collection, while keyword arguments (kwargs) + will be filtered based on the signature of the individual metric. + """ + return {k: m(*args, **m._filter_kwargs(**kwargs)) for k, m in self.items()} + + def update(self, *args: Any, **kwargs: Any) -> None: + """Iteratively call update for each metric. + + Positional arguments (args) will be passed to every metric in the collection, while keyword arguments (kwargs) + will be filtered based on the signature of the individual metric. + """ + for _, m in self.items(keep_base=True): + m_kwargs = m._filter_kwargs(**kwargs) + m.update(*args, **m_kwargs) + + def compute(self) -> Dict[str, Any]: + return {k: m.compute() for k, m in self.items()} + + def reset(self) -> None: + """Iteratively call reset for each metric.""" + for _, m in self.items(keep_base=True): + m.reset() + + def clone(self, prefix: Optional[str] = None, postfix: Optional[str] = None) -> "MetricCollection": + """Make a copy of the metric collection + Args: + prefix: a string to append in front of the metric keys + postfix: a string to append after the keys of the output dict + + """ + mc = deepcopy(self) + if prefix: + mc.prefix = self._check_arg(prefix, "prefix") + if postfix: + mc.postfix = self._check_arg(postfix, "postfix") + return mc + + def persistent(self, mode: bool = True) -> None: + """Method for post-init to change if metric states should be saved to its state_dict.""" + for _, m in self.items(keep_base=True): + m.persistent(mode) + + def add_metrics( + self, metrics: Union[Metric, Sequence[Metric], Dict[str, Metric]], *additional_metrics: Metric + ) -> None: + """Add new metrics to Metric Collection.""" + if isinstance(metrics, Metric): + # set compatible with original type expectations + metrics = [metrics] + if isinstance(metrics, Sequence): + # prepare for optional additions + metrics = list(metrics) + remain: list = [] + for m in additional_metrics: + (metrics if isinstance(m, Metric) else remain).append(m) + + if remain: + rank_zero_warn( + f"You have passes extra arguments {remain} which are not `Metric` so they will be ignored." + ) + elif additional_metrics: + raise ValueError( + f"You have passes extra arguments {additional_metrics} which are not compatible" + f" with first passed dictionary {metrics} so they will be ignored." + ) + + if isinstance(metrics, dict): + # Check all values are metrics + # Make sure that metrics are added in deterministic order + for name in sorted(metrics.keys()): + metric = metrics[name] + if not isinstance(metric, Metric): + raise ValueError( + f"Value {metric} belonging to key {name} is not an instance of `pl.metrics.Metric`" + ) + self[name] = metric + elif isinstance(metrics, Sequence): + for metric in metrics: + if not isinstance(metric, Metric): + raise ValueError(f"Input {metric} to `MetricCollection` is not a instance of `pl.metrics.Metric`") + name = metric.__class__.__name__ + if name in self: + raise ValueError(f"Encountered two metrics both named {name}") + self[name] = metric + else: + raise ValueError("Unknown input to MetricCollection.") + + def _set_name(self, base: str) -> str: + name = base if self.prefix is None else self.prefix + base + name = name if self.postfix is None else name + self.postfix + return name + + def _to_renamed_ordered_dict(self) -> OrderedDict: + od = OrderedDict() + for k, v in self._modules.items(): + od[self._set_name(k)] = v + return od + + def keys(self, keep_base: bool = False) -> Iterable[Hashable]: + r"""Return an iterable of the ModuleDict key. + Args: + keep_base: Whether to add prefix/postfix on the items collection. + """ + if keep_base: + return self._modules.keys() + return self._to_renamed_ordered_dict().keys() + + def items(self, keep_base: bool = False) -> Iterable[Tuple[str, nn.Module]]: + r"""Return an iterable of the ModuleDict key/value pairs. + Args: + keep_base: Whether to add prefix/postfix on the items collection. + """ + if keep_base: + return self._modules.items() + return self._to_renamed_ordered_dict().items() + + @staticmethod + def _check_arg(arg: Optional[str], name: str) -> Optional[str]: + if arg is None or isinstance(arg, str): + return arg + raise ValueError(f"Expected input `{name}` to be a string, but got {type(arg)}") + + def __repr__(self) -> str: + repr_str = super().__repr__()[:-2] + if self.prefix: + repr_str += f",\n prefix={self.prefix}{',' if self.postfix else ''}" + if self.postfix: + repr_str += f"{',' if not self.prefix else ''}\n postfix={self.postfix}" + return repr_str + "\n)" + + def to(self, device): + pass \ No newline at end of file diff --git a/RE/paddlemetric/src/paddlemetrics/functional/__init__.py b/RE/paddlemetric/src/paddlemetrics/functional/__init__.py new file mode 100644 index 00000000..365d93c9 --- /dev/null +++ b/RE/paddlemetric/src/paddlemetrics/functional/__init__.py @@ -0,0 +1,138 @@ +# Copyright The PyTorch Lightning team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from paddlemetrics.functional.audio.pesq import pesq +from paddlemetrics.functional.audio.pit import pit, pit_permutate +from paddlemetrics.functional.audio.si_sdr import si_sdr +from paddlemetrics.functional.audio.si_snr import si_snr +from paddlemetrics.functional.audio.snr import snr +from paddlemetrics.functional.audio.stoi import stoi +from paddlemetrics.functional.classification.accuracy import accuracy +from paddlemetrics.functional.classification.auc import auc +from paddlemetrics.functional.classification.auroc import auroc +from paddlemetrics.functional.classification.average_precision import average_precision +from paddlemetrics.functional.classification.calibration_error import calibration_error +from paddlemetrics.functional.classification.cohen_kappa import cohen_kappa +from paddlemetrics.functional.classification.confusion_matrix import confusion_matrix +from paddlemetrics.functional.classification.dice import dice_score +from paddlemetrics.functional.classification.f_beta import f1, fbeta +from paddlemetrics.functional.classification.hamming_distance import hamming_distance +from paddlemetrics.functional.classification.hinge import hinge +from paddlemetrics.functional.classification.iou import iou +from paddlemetrics.functional.classification.kl_divergence import kl_divergence +from paddlemetrics.functional.classification.matthews_corrcoef import matthews_corrcoef +from paddlemetrics.functional.classification.precision_recall import precision, precision_recall, recall +from paddlemetrics.functional.classification.precision_recall_curve import precision_recall_curve +from paddlemetrics.functional.classification.roc import roc +from paddlemetrics.functional.classification.specificity import specificity +from paddlemetrics.functional.classification.stat_scores import stat_scores +from paddlemetrics.functional.image.gradients import image_gradients +from paddlemetrics.functional.image.psnr import psnr +from paddlemetrics.functional.image.ssim import ssim +from paddlemetrics.functional.pairwise.cosine import pairwise_cosine_similarity +from paddlemetrics.functional.pairwise.euclidean import pairwise_euclidean_distance +from paddlemetrics.functional.pairwise.linear import pairwise_linear_similarity +from paddlemetrics.functional.pairwise.manhatten import pairwise_manhatten_distance +from paddlemetrics.functional.regression.cosine_similarity import cosine_similarity +from paddlemetrics.functional.regression.explained_variance import explained_variance +from paddlemetrics.functional.regression.mean_absolute_error import mean_absolute_error +from paddlemetrics.functional.regression.mean_absolute_percentage_error import mean_absolute_percentage_error +from paddlemetrics.functional.regression.mean_squared_error import mean_squared_error +from paddlemetrics.functional.regression.mean_squared_log_error import mean_squared_log_error +from paddlemetrics.functional.regression.pearson import pearson_corrcoef +from paddlemetrics.functional.regression.r2 import r2_score +from paddlemetrics.functional.regression.spearman import spearman_corrcoef +from paddlemetrics.functional.regression.symmetric_mean_absolute_percentage_error import ( + symmetric_mean_absolute_percentage_error, +) +from paddlemetrics.functional.regression.tweedie_deviance import tweedie_deviance_score +from paddlemetrics.functional.retrieval.average_precision import retrieval_average_precision +from paddlemetrics.functional.retrieval.fall_out import retrieval_fall_out +from paddlemetrics.functional.retrieval.hit_rate import retrieval_hit_rate +from paddlemetrics.functional.retrieval.ndcg import retrieval_normalized_dcg +from paddlemetrics.functional.retrieval.precision import retrieval_precision +from paddlemetrics.functional.retrieval.r_precision import retrieval_r_precision +from paddlemetrics.functional.retrieval.recall import retrieval_recall +from paddlemetrics.functional.retrieval.reciprocal_rank import retrieval_reciprocal_rank +from paddlemetrics.functional.self_supervised import embedding_similarity +#from paddlemetrics.functional.text.bert import bert_score +from paddlemetrics.functional.text.bleu import bleu_score +from paddlemetrics.functional.text.rouge import rouge_score +from paddlemetrics.functional.text.sacre_bleu import sacre_bleu_score +from paddlemetrics.functional.text.wer import wer + +__all__ = [ + "accuracy", + "auc", + "auroc", + "average_precision", +# "bert_score", + "bleu_score", + "calibration_error", + "cohen_kappa", + "confusion_matrix", + "cosine_similarity", + "tweedie_deviance_score", + "dice_score", + "embedding_similarity", + "explained_variance", + "f1", + "fbeta", + "hamming_distance", + "hinge", + "image_gradients", + "iou", + "kl_divergence", + "kldivergence", + "matthews_corrcoef", + "mean_absolute_error", + "mean_absolute_percentage_error", + "mean_squared_error", + "mean_squared_log_error", + "pairwise_cosine_similarity", + "pairwise_euclidean_distance", + "pairwise_linear_similarity", + "pairwise_manhatten_distance", + "pearson_corrcoef", + "pesq", + "pit", + "pit_permutate", + "precision", + "precision_recall", + "precision_recall_curve", + "psnr", + "r2_score", + "r2score", + "recall", + "retrieval_average_precision", + "retrieval_fall_out", + "retrieval_hit_rate", + "retrieval_normalized_dcg", + "retrieval_precision", + "retrieval_r_precision", + "retrieval_recall", + "retrieval_reciprocal_rank", + "roc", + "rouge_score", + "sacre_bleu_score", + "si_sdr", + "si_snr", + "snr", + "spearman_corrcoef", + "specificity", + "ssim", + "stat_scores", + "stoi", + "symmetric_mean_absolute_percentage_error", + "wer", +] diff --git a/RE/paddlemetric/src/paddlemetrics/functional/audio/__init__.py b/RE/paddlemetric/src/paddlemetrics/functional/audio/__init__.py new file mode 100644 index 00000000..a7e7d89c --- /dev/null +++ b/RE/paddlemetric/src/paddlemetrics/functional/audio/__init__.py @@ -0,0 +1,19 @@ +# Copyright The PyTorch Lightning team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from paddlemetrics.functional.audio.pesq import pesq # noqa: F401 +from paddlemetrics.functional.audio.pit import pit, pit_permutate # noqa: F401 +from paddlemetrics.functional.audio.si_sdr import si_sdr # noqa: F401 +from paddlemetrics.functional.audio.si_snr import si_snr # noqa: F401 +from paddlemetrics.functional.audio.snr import snr # noqa: F401 +from paddlemetrics.functional.audio.stoi import stoi # noqa: F401 diff --git a/RE/paddlemetric/src/paddlemetrics/functional/audio/pesq.py b/RE/paddlemetric/src/paddlemetrics/functional/audio/pesq.py new file mode 100644 index 00000000..ab81723d --- /dev/null +++ b/RE/paddlemetric/src/paddlemetrics/functional/audio/pesq.py @@ -0,0 +1,100 @@ +# Copyright The PyTorch Lightning team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import numpy as np + +from paddlemetrics.utilities.imports import _PESQ_AVAILABLE + +if _PESQ_AVAILABLE: + import pesq as pesq_backend +else: + pesq_backend = None +import paddleext.torchapi as B +from paddleext.torchapi import Tensor + +from paddlemetrics.utilities.checks import _check_same_shape + + +def pesq(preds: Tensor, target: Tensor, fs: int, mode: str, keep_same_device: bool = False) -> Tensor: + r"""PESQ (Perceptual Evaluation of Speech Quality) + + This is a wrapper for the ``pesq`` package [1]. Note that input will be moved to `cpu` + to perform the metric calculation. + + .. note:: using this metrics requires you to have ``pesq`` install. Either install as ``pip install + paddlemetrics[audio]`` or ``pip install pesq`` + + Args: + preds: + shape ``[...,time]`` + target: + shape ``[...,time]`` + fs: + sampling frequency, should be 16000 or 8000 (Hz) + mode: + 'wb' (wide-band) or 'nb' (narrow-band) + keep_same_device: + whether to move the pesq value to the device of preds + + Returns: + pesq value of shape [...] + + Raises: + ValueError: + If ``peqs`` package is not installed + ValueError: + If ``fs`` is not either ``8000`` or ``16000`` + ValueError: + If ``mode`` is not either ``"wb"`` or ``"nb"`` + + Example: + >>> from paddlemetrics.functional.audio import pesq + >>> import torchapi as B + >>> g = B.manual_seed(1) + >>> preds = B.randn(8000) + >>> target = B.randn(8000) + >>> pesq(preds, target, 8000, 'nb') + tensor(2.2076) + >>> pesq(preds, target, 16000, 'wb') + tensor(1.7359) + + References: + [1] https://github.com/ludlows/python-pesq + """ + if not _PESQ_AVAILABLE: + raise ValueError( + "PESQ metric requires that pesq is installed." + "Either install as `pip install paddlemetrics[audio]` or `pip install pesq`" + ) + if fs not in (8000, 16000): + raise ValueError(f"Expected argument `fs` to either be 8000 or 16000 but got {fs}") + if mode not in ("wb", "nb"): + raise ValueError(f"Expected argument `mode` to either be 'wb' or 'nb' but got {mode}") + _check_same_shape(preds, target) + + if preds.ndim == 1: + pesq_val_np = pesq_backend.pesq(fs, target.detach().cpu().numpy(), preds.detach().cpu().numpy(), mode) + pesq_val = B.tensor(pesq_val_np) + else: + preds_np = preds.reshape(-1, preds.shape[-1]).detach().cpu().numpy() + target_np = target.reshape(-1, preds.shape[-1]).detach().cpu().numpy() + pesq_val_np = np.empty(shape=(preds_np.shape[0])) + for b in range(preds_np.shape[0]): + pesq_val_np[b] = pesq_backend.pesq(fs, target_np[b, :], preds_np[b, :], mode) + pesq_val = B.from_numpy(pesq_val_np) + pesq_val = pesq_val.reshape(preds.shape[:-1]) + + if keep_same_device: + pesq_val = pesq_val.to(preds.device) + + return pesq_val diff --git a/RE/paddlemetric/src/paddlemetrics/functional/audio/pit.py b/RE/paddlemetric/src/paddlemetrics/functional/audio/pit.py new file mode 100644 index 00000000..3ca729a2 --- /dev/null +++ b/RE/paddlemetric/src/paddlemetrics/functional/audio/pit.py @@ -0,0 +1,206 @@ +# Copyright The PyTorch Lightning team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import warnings +from itertools import permutations +from typing import Any, Callable, Dict, Tuple, Union + +import paddleext.torchapi as B +from paddleext.torchapi import Tensor + +from paddlemetrics.utilities.checks import _check_same_shape +from paddlemetrics.utilities.imports import _SCIPY_AVAILABLE + +# _ps_dict: cache of permutations +# it's necessary to cache it, otherwise it will consume a large amount of time +_ps_dict: dict = {} # _ps_dict[str(spk_num)+str(device)] = permutations + + +def _find_best_perm_by_linear_sum_assignment( + metric_mtx: B.Tensor, + eval_func: Union[B.min, B.max], +) -> Tuple[Tensor, Tensor]: + """Solves the linear sum assignment problem using scipy, and returns the best metric values and the + corresponding permutations. + + Args: + metric_mtx: + the metric matrix, shape [batch_size, spk_num, spk_num] + eval_func: + the function to reduce the metric values of different the permutations + + Returns: + best_metric: + shape [batch] + best_perm: + shape [batch, spk] + """ + from scipy.optimize import linear_sum_assignment + + mmtx = metric_mtx.detach().cpu() + best_perm = B.tensor([linear_sum_assignment(pwm, eval_func == B.max)[1] for pwm in mmtx]) + best_perm = best_perm.to(metric_mtx.device) + best_metric = B.gather(metric_mtx, 2, best_perm[:, :, None]).mean([-1, -2]) + return best_metric, best_perm # shape [batch], shape [batch, spk] + + +def _find_best_perm_by_exhuastive_method( + metric_mtx: B.Tensor, + eval_func: Union[B.min, B.max], +) -> Tuple[Tensor, Tensor]: + """Solves the linear sum assignment problem using exhuastive method, i.e. exhuastively calculates the metric + values of all possible permutations, and returns the best metric values and the corresponding permutations. + + Args: + metric_mtx: + the metric matrix, shape [batch_size, spk_num, spk_num] + eval_func: + the function to reduce the metric values of different the permutations + + Returns: + best_metric: + shape [batch] + best_perm: + shape [batch, spk] + """ + # create/read/cache the permutations and its indexes + # reading from cache would be much faster than creating in CPU then moving to GPU + batch_size, spk_num = metric_mtx.shape[:2] + key = str(spk_num) + str(metric_mtx.device) + if key not in _ps_dict: + # ps: all the permutations, shape [spk_num, perm_num] + # ps: In i-th permutation, the predcition corresponds to the j-th target is ps[j,i] + ps = B.tensor(list(permutations(range(spk_num))), device=metric_mtx.device).T + _ps_dict[key] = ps + else: + ps = _ps_dict[key] # all the permutations, shape [spk_num, perm_num] + + # find the metric of each permutation + perm_num = ps.shape[-1] + # shape [batch_size, spk_num, perm_num] + bps = ps[None, ...].expand(batch_size, spk_num, perm_num) + # shape [batch_size, spk_num, perm_num] + metric_of_ps_details = B.gather(metric_mtx, 2, bps) + # shape [batch_size, perm_num] + metric_of_ps = metric_of_ps_details.mean(dim=1) + + # find the best metric and best permutation + best_metric, best_indexes = eval_func(metric_of_ps, dim=1) + best_indexes = best_indexes.detach() + best_perm = ps.T[best_indexes, :] + return best_metric, best_perm # shape [batch], shape [batch, spk] + + +def pit( + preds: B.Tensor, target: B.Tensor, metric_func: Callable, eval_func: str = "max", **kwargs: Dict[str, Any] +) -> Tuple[Tensor, Tensor]: + """Permutation invariant training (PIT). The PIT implements the famous Permutation Invariant Training method. + + [1] in speech separation field in order to calculate audio metrics in a permutation invariant way. + + Args: + preds: + shape [batch, spk, ...] + target: + shape [batch, spk, ...] + metric_func: + a metric function accept a batch of target and estimate, + i.e. metric_func(preds[:, i, ...], target[:, j, ...]), and returns a batch of metric tensors [batch] + eval_func: + the function to find the best permutation, can be 'min' or 'max', + i.e. the smaller the better or the larger the better. + kwargs: + additional args for metric_func + + Returns: + best_metric of shape [batch], + best_perm of shape [batch] + + Example: + >>> from paddlemetrics.functional.audio import si_sdr + >>> # [batch, spk, time] + >>> preds = B.tensor([[[-0.0579, 0.3560, -0.9604], [-0.1719, 0.3205, 0.2951]]]) + >>> target = B.tensor([[[ 1.0958, -0.1648, 0.5228], [-0.4100, 1.1942, -0.5103]]]) + >>> best_metric, best_perm = pit(preds, target, si_sdr, 'max') + >>> best_metric + tensor([-5.1091]) + >>> best_perm + tensor([[0, 1]]) + >>> pit_permutate(preds, best_perm) + tensor([[[-0.0579, 0.3560, -0.9604], + [-0.1719, 0.3205, 0.2951]]]) + + Reference: + [1] `Permutation Invariant Training of Deep Models`_ + """ + _check_same_shape(preds, target) + if eval_func not in ["max", "min"]: + raise ValueError(f'eval_func can only be "max" or "min" but got {eval_func}') + if target.ndim < 2: + raise ValueError(f"Inputs must be of shape [batch, spk, ...], got {target.shape} and {preds.shape} instead") + + # calculate the metric matrix + batch_size, spk_num = target.shape[0:2] + metric_mtx = None + for target_idx in range(spk_num): # we have spk_num speeches in target in each sample + for preds_idx in range(spk_num): # we have spk_num speeches in preds in each sample + if metric_mtx is not None: + metric_mtx[:, target_idx, preds_idx] = metric_func( + preds[:, preds_idx, ...], target[:, target_idx, ...], **kwargs + ) + else: + first_ele = metric_func(preds[:, preds_idx, ...], target[:, target_idx, ...], **kwargs) + metric_mtx = B.empty((batch_size, spk_num, spk_num), dtype=first_ele.dtype, device=first_ele.device) + metric_mtx[:, target_idx, preds_idx] = first_ele + + # find best + op = B.max if eval_func == "max" else B.min + if spk_num < 3 or not _SCIPY_AVAILABLE: + if spk_num >= 3 and not _SCIPY_AVAILABLE: + warnings.warn( + f"In pit metric for speaker-num {spk_num}>3, we recommend installing scipy for better performance" + ) + + best_metric, best_perm = _find_best_perm_by_exhuastive_method(metric_mtx, op) + else: + best_metric, best_perm = _find_best_perm_by_linear_sum_assignment(metric_mtx, op) + + return best_metric, best_perm + + +def pit_permutate(preds: Tensor, perm: Tensor) -> Tensor: + """permutate estimate according to perm. + + Args: + preds (Tensor): the estimates you want to permutate, shape [batch, spk, ...] + perm (Tensor): the permutation returned from pit, shape [batch, spk] + + Returns: + Tensor: the permutated version of estimate + + Example: + >>> from paddlemetrics.functional.audio import si_sdr + >>> # [batch, spk, time] + >>> preds = B.tensor([[[-0.0579, 0.3560, -0.9604], [-0.1719, 0.3205, 0.2951]]]) + >>> target = B.tensor([[[ 1.0958, -0.1648, 0.5228], [-0.4100, 1.1942, -0.5103]]]) + >>> best_metric, best_perm = pit(preds, target, si_sdr, 'max') + >>> best_metric + tensor([-5.1091]) + >>> best_perm + tensor([[0, 1]]) + >>> pit_permutate(preds, best_perm) + tensor([[[-0.0579, 0.3560, -0.9604], + [-0.1719, 0.3205, 0.2951]]]) + """ + preds_pmted = B.stack([B.index_select(pred, 0, p) for pred, p in zip(preds, perm)]) + return preds_pmted diff --git a/RE/paddlemetric/src/paddlemetrics/functional/audio/si_sdr.py b/RE/paddlemetric/src/paddlemetrics/functional/audio/si_sdr.py new file mode 100644 index 00000000..66eb9e3a --- /dev/null +++ b/RE/paddlemetric/src/paddlemetrics/functional/audio/si_sdr.py @@ -0,0 +1,64 @@ +# Copyright The PyTorch Lightning team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import paddleext.torchapi as B +from paddleext.torchapi import Tensor + +from paddlemetrics.utilities.checks import _check_same_shape + + +def si_sdr(preds: Tensor, target: Tensor, zero_mean: bool = False) -> Tensor: + """Calculates Scale-invariant signal-to-distortion ratio (SI-SDR) metric. The SI-SDR value is in general + considered an overall measure of how good a source sound. + + Args: + preds: + shape ``[...,time]`` + target: + shape ``[...,time]`` + zero_mean: + If to zero mean target and preds or not + + Returns: + si-sdr value of shape [...] + + Example: + >>> from paddlemetrics.functional.audio import si_sdr + >>> target = B.tensor([3.0, -0.5, 2.0, 7.0]) + >>> preds = B.tensor([2.5, 0.0, 2.0, 8.0]) + >>> si_sdr_val = si_sdr(preds, target) + >>> si_sdr_val + tensor(18.4030) + + References: + [1] Le Roux, Jonathan, et al. "SDR half-baked or well done." IEEE International Conference on Acoustics, Speech + and Signal Processing (ICASSP) 2019. + """ + _check_same_shape(preds, target) + EPS = B.finfo(preds.dtype).eps + + if zero_mean: + target = target - B.mean(target, dim=-1, keepdim=True) + preds = preds - B.mean(preds, dim=-1, keepdim=True) + + alpha = (B.sum(preds * target, dim=-1, keepdim=True) + EPS) / ( + B.sum(target ** 2, dim=-1, keepdim=True) + EPS + ) + target_scaled = alpha * target + + noise = target_scaled - preds + + si_sdr_value = (B.sum(target_scaled ** 2, dim=-1) + EPS) / (B.sum(noise ** 2, dim=-1) + EPS) + si_sdr_value = 10 * B.log10(si_sdr_value) + + return si_sdr_value diff --git a/RE/paddlemetric/src/paddlemetrics/functional/audio/si_snr.py b/RE/paddlemetric/src/paddlemetrics/functional/audio/si_snr.py new file mode 100644 index 00000000..abddf039 --- /dev/null +++ b/RE/paddlemetric/src/paddlemetrics/functional/audio/si_snr.py @@ -0,0 +1,46 @@ +# Copyright The PyTorch Lightning team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from paddleext.torchapi import Tensor + +from paddlemetrics.functional.audio.si_sdr import si_sdr + + +def si_snr(preds: Tensor, target: Tensor) -> Tensor: + """Scale-invariant signal-to-noise ratio (SI-SNR). + + Args: + preds: + shape ``[...,time]`` + target: + shape ``[...,time]`` + + Returns: + si-snr value of shape [...] + + Example: + >>> import torchapi as B + >>> from paddlemetrics.functional.audio import si_snr + >>> target = B.tensor([3.0, -0.5, 2.0, 7.0]) + >>> preds = B.tensor([2.5, 0.0, 2.0, 8.0]) + >>> si_snr_val = si_snr(preds, target) + >>> si_snr_val + tensor(15.0918) + + References: + [1] Y. Luo and N. Mesgarani, "TaSNet: Time-Domain Audio Separation Network for Real-Time, Single-Channel Speech + Separation," 2018 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), 2018, pp. + 696-700, doi: 10.1109/ICASSP.2018.8462116. + """ + + return si_sdr(target=target, preds=preds, zero_mean=True) diff --git a/RE/paddlemetric/src/paddlemetrics/functional/audio/snr.py b/RE/paddlemetric/src/paddlemetrics/functional/audio/snr.py new file mode 100644 index 00000000..8c54128b --- /dev/null +++ b/RE/paddlemetric/src/paddlemetrics/functional/audio/snr.py @@ -0,0 +1,66 @@ +# Copyright The PyTorch Lightning team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import paddleext.torchapi as B +from paddleext.torchapi import Tensor + +from paddlemetrics.utilities.checks import _check_same_shape + + +def snr(preds: Tensor, target: Tensor, zero_mean: bool = False) -> Tensor: + r"""Signal-to-noise ratio (SNR_): + + .. math:: + \text{SNR} = \frac{P_{signal}}{P_{noise}} + + where :math:`P` denotes the power of each signal. The SNR metric compares the level + of the desired signal to the level of background noise. Therefore, a high value of + SNR means that the audio is clear. + + Args: + preds: + shape ``[...,time]`` + target: + shape ``[...,time]`` + zero_mean: + if to zero mean target and preds or not + + Returns: + snr value of shape [...] + + Example: + >>> from paddlemetrics.functional.audio import snr + >>> target = B.tensor([3.0, -0.5, 2.0, 7.0]) + >>> preds = B.tensor([2.5, 0.0, 2.0, 8.0]) + >>> snr_val = snr(preds, target) + >>> snr_val + tensor(16.1805) + + References: + [1] Le Roux, Jonathan, et al. "SDR half-baked or well done." IEEE International Conference on Acoustics, Speech + and Signal Processing (ICASSP) 2019. + + """ + _check_same_shape(preds, target) + EPS = B.finfo(preds.dtype).eps + + if zero_mean: + target = target - B.mean(target, dim=-1, keepdim=True) + preds = preds - B.mean(preds, dim=-1, keepdim=True) + + noise = target - preds + + snr_value = (B.sum(target ** 2, dim=-1) + EPS) / (B.sum(noise ** 2, dim=-1) + EPS) + snr_value = 10 * B.log10(snr_value) + + return snr_value diff --git a/RE/paddlemetric/src/paddlemetrics/functional/audio/stoi.py b/RE/paddlemetric/src/paddlemetrics/functional/audio/stoi.py new file mode 100644 index 00000000..4c1f5806 --- /dev/null +++ b/RE/paddlemetric/src/paddlemetrics/functional/audio/stoi.py @@ -0,0 +1,105 @@ +# Copyright The PyTorch Lightning team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import numpy as np +import paddleext.torchapi as B + +from paddlemetrics.utilities.imports import _PYSTOI_AVAILABLE + +if _PYSTOI_AVAILABLE: + from pystoi import stoi as stoi_backend +else: + stoi_backend = None +from paddleext.torchapi import Tensor + +from paddlemetrics.utilities.checks import _check_same_shape + + +def stoi(preds: Tensor, target: Tensor, fs: int, extended: bool = False, keep_same_device: bool = False) -> Tensor: + r"""STOI (Short Term Objective Intelligibility, see [2,3]), a wrapper for the pystoi package [1]. + Note that input will be moved to `cpu` to perform the metric calculation. + + Intelligibility measure which is highly correlated with the intelligibility of degraded speech signals, e.g., due + to additive noise, single/multi-channel noise reduction, binary masking and vocoded speech as in CI simulations. + The STOI-measure is intrusive, i.e., a function of the clean and degraded speech signals. STOI may be a good + alternative to the speech intelligibility index (SII) or the speech transmission index (STI), when you are + interested in the effect of nonlinear processing to noisy speech, e.g., noise reduction, binary masking algorithms, + on speech intelligibility. Description taken from [Cees Taal's website](http://www.ceestaal.nl/code/). + + .. note:: using this metrics requires you to have ``pystoi`` install. Either install as ``pip install + paddlemetrics[audio]`` or ``pip install pystoi`` + + Args: + preds: + shape ``[...,time]`` + target: + shape ``[...,time]`` + fs: + sampling frequency (Hz) + extended: + whether to use the extended STOI described in [4] + keep_same_device: + whether to move the stoi value to the device of preds + + Returns: + stoi value of shape [...] + + Raises: + ValueError: + If ``pystoi`` package is not installed + + Example: + >>> from paddlemetrics.functional.audio import stoi + >>> import torchapi as B + >>> g = B.manual_seed(1) + >>> preds = B.randn(8000) + >>> target = B.randn(8000) + >>> stoi(preds, target, 8000).float() + tensor(-0.0100) + + References: + [1] https://github.com/mpariente/pystoi + + [2] C.H.Taal, R.C.Hendriks, R.Heusdens, J.Jensen 'A Short-Time Objective Intelligibility Measure for + Time-Frequency Weighted Noisy Speech', ICASSP 2010, Texas, Dallas. + + [3] C.H.Taal, R.C.Hendriks, R.Heusdens, J.Jensen 'An Algorithm for Intelligibility Prediction of + Time-Frequency Weighted Noisy Speech', IEEE Transactions on Audio, Speech, and Language Processing, 2011. + + [4] J. Jensen and C. H. Taal, 'An Algorithm for Predicting the Intelligibility of Speech Masked by Modulated + Noise Maskers', IEEE Transactions on Audio, Speech and Language Processing, 2016. + + """ + if not _PYSTOI_AVAILABLE: + raise ValueError( + "STOI metric requires that pystoi is installed." + "Either install as `pip install paddlemetrics[audio]` or `pip install pystoi`" + ) + _check_same_shape(preds, target) + + if len(preds.shape) == 1: + stoi_val_np = stoi_backend(target.detach().cpu().numpy(), preds.detach().cpu().numpy(), fs, extended) + stoi_val = B.tensor(stoi_val_np) + else: + preds_np = preds.reshape(-1, preds.shape[-1]).detach().cpu().numpy() + target_np = target.reshape(-1, preds.shape[-1]).detach().cpu().numpy() + stoi_val_np = np.empty(shape=(preds_np.shape[0])) + for b in range(preds_np.shape[0]): + stoi_val_np[b] = stoi_backend(target_np[b, :], preds_np[b, :], fs, extended) + stoi_val = B.from_numpy(stoi_val_np) + stoi_val = stoi_val.reshape(preds.shape[:-1]) + + if keep_same_device: + stoi_val = stoi_val.to(preds.device) + + return stoi_val diff --git a/RE/paddlemetric/src/paddlemetrics/functional/classification/__init__.py b/RE/paddlemetric/src/paddlemetrics/functional/classification/__init__.py new file mode 100644 index 00000000..a03982c8 --- /dev/null +++ b/RE/paddlemetric/src/paddlemetrics/functional/classification/__init__.py @@ -0,0 +1,32 @@ +# Copyright The PyTorch Lightning team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from paddlemetrics.functional.classification.accuracy import accuracy # noqa: F401 +from paddlemetrics.functional.classification.auc import auc # noqa: F401 +from paddlemetrics.functional.classification.auroc import auroc # noqa: F401 +from paddlemetrics.functional.classification.average_precision import average_precision # noqa: F401 +from paddlemetrics.functional.classification.calibration_error import calibration_error # noqa: F401 +from paddlemetrics.functional.classification.cohen_kappa import cohen_kappa # noqa: F401 +from paddlemetrics.functional.classification.confusion_matrix import confusion_matrix # noqa: F401 +from paddlemetrics.functional.classification.dice import dice_score # noqa: F401 +from paddlemetrics.functional.classification.f_beta import f1, fbeta # noqa: F401 +from paddlemetrics.functional.classification.hamming_distance import hamming_distance # noqa: F401 +from paddlemetrics.functional.classification.hinge import hinge # noqa: F401 +from paddlemetrics.functional.classification.iou import iou # noqa: F401 +from paddlemetrics.functional.classification.kl_divergence import kl_divergence # noqa: F401 +from paddlemetrics.functional.classification.matthews_corrcoef import matthews_corrcoef # noqa: F401 +from paddlemetrics.functional.classification.precision_recall import precision, precision_recall, recall # noqa: F401 +from paddlemetrics.functional.classification.precision_recall_curve import precision_recall_curve # noqa: F401 +from paddlemetrics.functional.classification.roc import roc # noqa: F401 +from paddlemetrics.functional.classification.specificity import specificity # noqa: F401 +from paddlemetrics.functional.classification.stat_scores import stat_scores # noqa: F401 diff --git a/RE/paddlemetric/src/paddlemetrics/functional/classification/accuracy.py b/RE/paddlemetric/src/paddlemetrics/functional/classification/accuracy.py new file mode 100644 index 00000000..44c89fa9 --- /dev/null +++ b/RE/paddlemetric/src/paddlemetrics/functional/classification/accuracy.py @@ -0,0 +1,418 @@ +# Copyright The PyTorch Lightning team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from typing import Optional, Tuple + +import paddleext.torchapi as B +from paddleext.torchapi import Tensor, tensor + +from paddlemetrics.functional.classification.stat_scores import _reduce_stat_scores, _stat_scores_update +from paddlemetrics.utilities.checks import _check_classification_inputs, _input_format_classification, _input_squeeze +from paddlemetrics.utilities.enums import AverageMethod, DataType, MDMCAverageMethod + + +def _check_subset_validity(mode: DataType) -> bool: + """Checks input mode is valid.""" + return mode in (DataType.MULTILABEL, DataType.MULTIDIM_MULTICLASS) + + +def _mode( + preds: Tensor, + target: Tensor, + threshold: float, + top_k: Optional[int], + num_classes: Optional[int], + multiclass: Optional[bool], +) -> DataType: + """Finds the mode of the input tensors. + + Args: + preds: Predicted tensor + target: Ground truth tensor + threshold: Threshold for transforming probability or logit predictions to binary (0,1) predictions, in the + case of binary or multi-label inputs. Default value of 0.5 corresponds to input being probabilities. + top_k: Number of highest probability or logit score predictions considered to find the correct label, + relevant only for (multi-dimensional) multi-class inputs. + num_classes: Number of classes. Necessary for ``'macro'``, ``'weighted'`` and ``None`` average methods. + multiclass: + Used only in certain special cases, where you want to treat inputs as a different type + than what they appear to be. + + Example: + >>> target = B.tensor([0, 1, 2, 3]) + >>> preds = B.tensor([0, 2, 1, 3]) + >>> _mode(preds, target, 0.5, None, None, None) + + """ + + mode = _check_classification_inputs( + preds, target, threshold=threshold, top_k=top_k, num_classes=num_classes, multiclass=multiclass + ) + return mode + + +def _accuracy_update( + preds: Tensor, + target: Tensor, + reduce: Optional[str], + mdmc_reduce: Optional[str], + threshold: float, + num_classes: Optional[int], + top_k: Optional[int], + multiclass: Optional[bool], + ignore_index: Optional[int], + mode: DataType, +) -> Tuple[Tensor, Tensor, Tensor, Tensor]: + """Updates and returns stat scores (true positives, false positives, true negatives, false negatives) required + to compute accuracy. + + Args: + preds: Predicted tensor + target: Ground truth tensor + reduce: Defines the reduction that is applied. + mdmc_reduce: Defines how the multi-dimensional multi-class inputs are handeled. + threshold: Threshold for transforming probability or logit predictions to binary (0,1) predictions, in + the case of binary or multi-label inputs. + num_classes: Number of classes. Necessary for ``'macro'``, ``'weighted'`` and ``None`` average methods. + top_k: Number of highest probability or logit score predictions considered to find the correct label, + relevant only for (multi-dimensional) multi-class inputs. + multiclass: Used only in certain special cases, where you want to treat inputs as a different type + than what they appear to be. + ignore_index: Integer specifying a target class to ignore. If given, this class index does not contribute + to the returned score, regardless of reduction method. If an index is ignored, and ``average=None`` + or ``'none'``, the score for the ignored class will be returned as ``nan``. + mode: Mode of the input tensors + """ + + if mode == DataType.MULTILABEL and top_k: + raise ValueError("You can not use the `top_k` parameter to calculate accuracy for multi-label inputs.") + + preds, target = _input_squeeze(preds, target) + tp, fp, tn, fn = _stat_scores_update( + preds, + target, + reduce=reduce, + mdmc_reduce=mdmc_reduce, + threshold=threshold, + num_classes=num_classes, + top_k=top_k, + multiclass=multiclass, + ignore_index=ignore_index, + ) + return tp, fp, tn, fn + + +def _accuracy_compute( + tp: Tensor, + fp: Tensor, + tn: Tensor, + fn: Tensor, + average: Optional[str], + mdmc_average: Optional[str], + mode: DataType, +) -> Tensor: + """Computes accuracy from stat scores: true positives, false positives, true negatives, false negatives. + + Args: + tp: True positives + fp: False positives + tn: True negatives + fn: False negatives + average: Defines the reduction that is applied. + mdmc_average: Defines how averaging is done for multi-dimensional multi-class inputs (on top of the + ``average`` parameter). + mode: Mode of the input tensors + + Example: + >>> preds = B.tensor([0, 2, 1, 3]) + >>> target = B.tensor([0, 1, 2, 3]) + >>> threshold = 0.5 + >>> reduce = average = 'micro' + >>> mdmc_average = 'global' + >>> mode = _mode(preds, target, threshold, top_k=None, num_classes=None, multiclass=None) + >>> tp, fp, tn, fn = _accuracy_update( + ... preds, + ... target, + ... reduce, + ... mdmc_average, + ... threshold=0.5, + ... num_classes=None, + ... top_k=None, + ... multiclass=None, + ... ignore_index=None, + ... mode=mode) + >>> _accuracy_compute(tp, fp, tn, fn, average, mdmc_average, mode) + tensor(0.5000) + + >>> target = B.tensor([0, 1, 2]) + >>> preds = B.tensor([[0.1, 0.9, 0], [0.3, 0.1, 0.6], [0.2, 0.5, 0.3]]) + >>> top_k, threshold = 2, 0.5 + >>> reduce = average = 'micro' + >>> mdmc_average = 'global' + >>> mode = _mode(preds, target, threshold, top_k, num_classes=None, multiclass=None) + >>> tp, fp, tn, fn = _accuracy_update( + ... preds, + ... target, + ... reduce, + ... mdmc_average, + ... threshold, + ... num_classes=None, + ... top_k=top_k, + ... multiclass=None, + ... ignore_index=None, + ... mode=mode) + >>> _accuracy_compute(tp, fp, tn, fn, average, mdmc_average, mode) + tensor(0.6667) + """ + + simple_average = [AverageMethod.MICRO, AverageMethod.SAMPLES] + if (mode == DataType.BINARY and average in simple_average) or mode == DataType.MULTILABEL: + numerator = tp + tn + denominator = tp + tn + fp + fn + else: + numerator = tp + denominator = tp + fn + + if average == AverageMethod.MACRO and mdmc_average != MDMCAverageMethod.SAMPLEWISE: + cond = tp + fp + fn == 0 + numerator = numerator[~cond] + denominator = denominator[~cond] + + if average == AverageMethod.NONE and mdmc_average != MDMCAverageMethod.SAMPLEWISE: + # a class is not present if there exists no TPs, no FPs, and no FNs + meaningless_indeces = B.nonzero((tp | fn | fp) == 0).cpu() + numerator[meaningless_indeces, ...] = -1 + denominator[meaningless_indeces, ...] = -1 + + return _reduce_stat_scores( + numerator=numerator, + denominator=denominator, + weights=None if average != AverageMethod.WEIGHTED else tp + fn, + average=average, + mdmc_average=mdmc_average, + ) + + +def _subset_accuracy_update( + preds: Tensor, + target: Tensor, + threshold: float, + top_k: Optional[int], +) -> Tuple[Tensor, Tensor]: + """Updates and returns variables required to compute subset accuracy. + + Args: + preds: Predicted tensor + target: Ground truth tensor + threshold: Threshold for transforming probability or logit predictions to binary (0,1) predictions, in the case + of binary or multi-label inputs. Default value of 0.5 corresponds to input being probabilities. + top_k: Number of highest probability or logit score predictions considered to find the correct label, + relevant only for (multi-dimensional) multi-class inputs. + """ + + preds, target = _input_squeeze(preds, target) + preds, target, mode = _input_format_classification(preds, target, threshold=threshold, top_k=top_k) + + if mode == DataType.MULTILABEL and top_k: + raise ValueError("You can not use the `top_k` parameter to calculate accuracy for multi-label inputs.") + + if mode == DataType.MULTILABEL: + correct = (preds == target).all(dim=1).sum() + total = tensor(target.shape[0], device=target.device) + elif mode == DataType.MULTICLASS: + correct = (preds * target).sum() + total = target.sum() + elif mode == DataType.MULTIDIM_MULTICLASS: + sample_correct = (preds * target).sum(dim=(1, 2)) + correct = (sample_correct == target.shape[2]).sum() + total = tensor(target.shape[0], device=target.device) + else: + correct, total = tensor(0), tensor(0) + + return correct, total + + +def _subset_accuracy_compute(correct: Tensor, total: Tensor) -> Tensor: + """Computes subset accuracy from number of correct observations and total number of observations. + + Args: + correct: Number of correct observations + total: Number of observations + """ + + return correct.float() / total + + +def accuracy( + preds: Tensor, + target: Tensor, + average: str = "micro", + mdmc_average: Optional[str] = "global", + threshold: float = 0.5, + top_k: Optional[int] = None, + subset_accuracy: bool = False, + num_classes: Optional[int] = None, + multiclass: Optional[bool] = None, + ignore_index: Optional[int] = None, +) -> Tensor: + r"""Computes `Accuracy`_ + + .. math:: + \text{Accuracy} = \frac{1}{N}\sum_i^N 1(y_i = \hat{y}_i) + + Where :math:`y` is a tensor of target values, and :math:`\hat{y}` is a + tensor of predictions. + + For multi-class and multi-dimensional multi-class data with probability or logits predictions, the + parameter ``top_k`` generalizes this metric to a Top-K accuracy metric: for each sample the + top-K highest probability or logits items are considered to find the correct label. + + For multi-label and multi-dimensional multi-class inputs, this metric computes the "global" + accuracy by default, which counts all labels or sub-samples separately. This can be + changed to subset accuracy (which requires all labels or sub-samples in the sample to + be correctly predicted) by setting ``subset_accuracy=True``. + + Accepts all input types listed in :ref:`references/modules:input types`. + + Args: + preds: Predictions from model (probabilities, logits or labels) + target: Ground truth labels + average: + Defines the reduction that is applied. Should be one of the following: + + - ``'micro'`` [default]: Calculate the metric globally, across all samples and classes. + - ``'macro'``: Calculate the metric for each class separately, and average the + metrics across classes (with equal weights for each class). + - ``'weighted'``: Calculate the metric for each class separately, and average the + metrics across classes, weighting each class by its support (``tp + fn``). + - ``'none'`` or ``None``: Calculate the metric for each class separately, and return + the metric for every class. + - ``'samples'``: Calculate the metric for each sample, and average the metrics + across samples (with equal weights for each sample). + + .. note:: What is considered a sample in the multi-dimensional multi-class case + depends on the value of ``mdmc_average``. + + .. note:: If ``'none'`` and a given class doesn't occur in the `preds` or `target`, + the value for the class will be ``nan``. + + mdmc_average: + Defines how averaging is done for multi-dimensional multi-class inputs (on top of the + ``average`` parameter). Should be one of the following: + + - ``None`` [default]: Should be left unchanged if your data is not multi-dimensional + multi-class. + + - ``'samplewise'``: In this case, the statistics are computed separately for each + sample on the ``N`` axis, and then averaged over samples. + The computation for each sample is done by treating the flattened extra axes ``...`` + (see :ref:`references/modules:input types`) as the ``N`` dimension within the sample, + and computing the metric for the sample based on that. + + - ``'global'``: In this case the ``N`` and ``...`` dimensions of the inputs + (see :ref:`references/modules:input types`) + are flattened into a new ``N_X`` sample axis, i.e. the inputs are treated as if they + were ``(N_X, C)``. From here on the ``average`` parameter applies as usual. + num_classes: + Number of classes. Necessary for ``'macro'``, ``'weighted'`` and ``None`` average methods. + + threshold: + Threshold for transforming probability or logit predictions to binary (0,1) predictions, in the case + of binary or multi-label inputs. Default value of 0.5 corresponds to input being probabilities. + top_k: + Number of highest probability or logit score predictions considered to find the correct label, + relevant only for (multi-dimensional) multi-class inputs. The + default value (``None``) will be interpreted as 1 for these inputs. + + Should be left at default (``None``) for all other types of inputs. + multiclass: + Used only in certain special cases, where you want to treat inputs as a different type + than what they appear to be. See the parameter's + :ref:`documentation section ` + for a more detailed explanation and examples. + ignore_index: + Integer specifying a target class to ignore. If given, this class index does not contribute + to the returned score, regardless of reduction method. If an index is ignored, and ``average=None`` + or ``'none'``, the score for the ignored class will be returned as ``nan``. + subset_accuracy: + Whether to compute subset accuracy for multi-label and multi-dimensional + multi-class inputs (has no effect for other input types). + + - For multi-label inputs, if the parameter is set to ``True``, then all labels for + each sample must be correctly predicted for the sample to count as correct. If it + is set to ``False``, then all labels are counted separately - this is equivalent to + flattening inputs beforehand (i.e. ``preds = preds.flatten()`` and same for ``target``). + + - For multi-dimensional multi-class inputs, if the parameter is set to ``True``, then all + sub-sample (on the extra axis) must be correct for the sample to be counted as correct. + If it is set to ``False``, then all sub-samples are counter separately - this is equivalent, + in the case of label predictions, to flattening the inputs beforehand (i.e. + ``preds = preds.flatten()`` and same for ``target``). Note that the ``top_k`` parameter + still applies in both cases, if set. + + Raises: + ValueError: + If ``top_k`` parameter is set for ``multi-label`` inputs. + ValueError: + If ``average`` is none of ``"micro"``, ``"macro"``, ``"weighted"``, ``"samples"``, ``"none"``, ``None``. + ValueError: + If ``mdmc_average`` is not one of ``None``, ``"samplewise"``, ``"global"``. + ValueError: + If ``average`` is set but ``num_classes`` is not provided. + ValueError: + If ``num_classes`` is set + and ``ignore_index`` is not in the range ``[0, num_classes)``. + ValueError: + If ``top_k`` is not an ``integer`` larger than ``0``. + + Example: + >>> import torchapi as B + >>> from paddlemetrics.functional import accuracy + >>> target = B.tensor([0, 1, 2, 3]) + >>> preds = B.tensor([0, 2, 1, 3]) + >>> accuracy(preds, target) + tensor(0.5000) + + >>> target = B.tensor([0, 1, 2]) + >>> preds = B.tensor([[0.1, 0.9, 0], [0.3, 0.1, 0.6], [0.2, 0.5, 0.3]]) + >>> accuracy(preds, target, top_k=2) + tensor(0.6667) + """ + allowed_average = ["micro", "macro", "weighted", "samples", "none", None] + if average not in allowed_average: + raise ValueError(f"The `average` has to be one of {allowed_average}, got {average}.") + + if average in ["macro", "weighted", "none", None] and (not num_classes or num_classes < 1): + raise ValueError(f"When you set `average` as {average}, you have to provide the number of classes.") + + allowed_mdmc_average = [None, "samplewise", "global"] + if mdmc_average not in allowed_mdmc_average: + raise ValueError(f"The `mdmc_average` has to be one of {allowed_mdmc_average}, got {mdmc_average}.") + + if num_classes and ignore_index is not None and (not 0 <= ignore_index < num_classes or num_classes == 1): + raise ValueError(f"The `ignore_index` {ignore_index} is not valid for inputs with {num_classes} classes") + + if top_k is not None and (not isinstance(top_k, int) or top_k <= 0): + raise ValueError(f"The `top_k` should be an integer larger than 0, got {top_k}") + + preds, target = _input_squeeze(preds, target) + mode = _mode(preds, target, threshold, top_k, num_classes, multiclass) + reduce = "macro" if average in ["weighted", "none", None] else average + + if subset_accuracy and _check_subset_validity(mode): + correct, total = _subset_accuracy_update(preds, target, threshold, top_k) + return _subset_accuracy_compute(correct, total) + tp, fp, tn, fn = _accuracy_update( + preds, target, reduce, mdmc_average, threshold, num_classes, top_k, multiclass, ignore_index, mode + ) + return _accuracy_compute(tp, fp, tn, fn, average, mdmc_average, mode) diff --git a/RE/paddlemetric/src/paddlemetrics/functional/classification/auc.py b/RE/paddlemetric/src/paddlemetrics/functional/classification/auc.py new file mode 100644 index 00000000..0e2fddb3 --- /dev/null +++ b/RE/paddlemetric/src/paddlemetrics/functional/classification/auc.py @@ -0,0 +1,133 @@ +# Copyright The PyTorch Lightning team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from typing import Tuple + +import paddleext.torchapi as B +from paddleext.torchapi import Tensor + + +def _auc_update(x: Tensor, y: Tensor) -> Tuple[Tensor, Tensor]: + """Updates and returns variables required to compute area under the curve. Checks if the 2 input tenseor have + the same number of elements and if they are 1d. + + Args: + x: x-coordinates + y: y-coordinates + """ + + if x.ndim > 1: + x = x.squeeze() + + if y.ndim > 1: + y = y.squeeze() + + if x.ndim > 1 or y.ndim > 1: + raise ValueError( + f"Expected both `x` and `y` tensor to be 1d, but got tensors with dimension {x.ndim} and {y.ndim}" + ) + if x.numel() != y.numel(): + raise ValueError( + f"Expected the same number of elements in `x` and `y` tensor but received {x.numel()} and {y.numel()}" + ) + return x, y + + +def _auc_compute_without_check(x: Tensor, y: Tensor, direction: float) -> Tensor: + """Computes area under the curve using the trapezoidal rule. Assumes increasing or decreasing order of `x`. + + Args: + x: x-coordinates, must be either increasing or decreasing + y: y-coordinates + direction: 1 if increaing, -1 if decreasing + + Example: + >>> x = B.tensor([0, 1, 2, 3]) + >>> y = B.tensor([0, 1, 2, 2]) + >>> x, y = _auc_update(x, y) + >>> _auc_compute_without_check(x, y, direction=1.0) + tensor(4.) + """ + + with B.no_grad(): + auc_: Tensor = B.trapz(y, x) * direction + return auc_ + + +def _auc_compute(x: Tensor, y: Tensor, reorder: bool = False) -> Tensor: + """Computes area under the curve using the trapezoidal rule. Checks for increasing or decreasing order of `x`. + + Args: + x: x-coordinates, must be either increasing or decreasing + y: y-coordinates + reorder: if True, will reorder the arrays to make it either increasing or decreasing + + Example: + >>> x = B.tensor([0, 1, 2, 3]) + >>> y = B.tensor([0, 1, 2, 2]) + >>> x, y = _auc_update(x, y) + >>> _auc_compute(x, y) + tensor(4.) + >>> _auc_compute(x, y, reorder=True) + tensor(4.) + """ + + with B.no_grad(): + if reorder: + # TODO: include stable=True arg when pytorch v1.9 is released + x, x_idx = B.sort(x) + y = y[x_idx] + + dx = x[1:] - x[:-1] + if (dx < 0).any(): + if (dx <= 0).all(): + direction = -1.0 + else: + raise ValueError( + "The `x` tensor is neither increasing or decreasing. Try setting the reorder argument to `True`." + ) + else: + direction = 1.0 + return _auc_compute_without_check(x, y, direction) + + +def auc(x: Tensor, y: Tensor, reorder: bool = False) -> Tensor: + """Computes Area Under the Curve (AUC) using the trapezoidal rule. + + Args: + x: x-coordinates, must be either increasing or decreasing + y: y-coordinates + reorder: if True, will reorder the arrays to make it either increasing or decreasing + + Return: + Tensor containing AUC score (float) + + Raises: + ValueError: + If both ``x`` and ``y`` tensors are not ``1d``. + ValueError: + If both ``x`` and ``y`` don't have the same numnber of elements. + ValueError: + If ``x`` tesnsor is neither increasing or decreasing. + + Example: + >>> from paddlemetrics.functional import auc + >>> x = B.tensor([0, 1, 2, 3]) + >>> y = B.tensor([0, 1, 2, 2]) + >>> auc(x, y) + tensor(4.) + >>> auc(x, y, reorder=True) + tensor(4.) + """ + x, y = _auc_update(x, y) + return _auc_compute(x, y, reorder=reorder) diff --git a/RE/paddlemetric/src/paddlemetrics/functional/classification/auroc.py b/RE/paddlemetric/src/paddlemetrics/functional/classification/auroc.py new file mode 100644 index 00000000..a393f20e --- /dev/null +++ b/RE/paddlemetric/src/paddlemetrics/functional/classification/auroc.py @@ -0,0 +1,257 @@ +# Copyright The PyTorch Lightning team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import warnings +from typing import Optional, Sequence, Tuple + +import paddleext.torchapi as B +from paddleext.torchapi import Tensor, tensor + +from paddlemetrics.functional.classification.auc import _auc_compute_without_check +from paddlemetrics.functional.classification.roc import roc +from paddlemetrics.utilities.checks import _input_format_classification +from paddlemetrics.utilities.enums import AverageMethod, DataType +from paddlemetrics.utilities.imports import _TORCH_LOWER_1_6 + + +def _auroc_update(preds: Tensor, target: Tensor) -> Tuple[Tensor, Tensor, DataType]: + """Updates and returns variables required to compute Area Under the Receiver Operating Characteristic Curve. + Validates the inputs and returns the mode of the inputs. + + Args: + preds: Predicted tensor + target: Ground truth tensor + """ + + # use _input_format_classification for validating the input and get the mode of data + _, _, mode = _input_format_classification(preds, target) + + if mode == "multi class multi dim": + n_classes = preds.shape[1] + preds = preds.transpose(0, 1).reshape(n_classes, -1).transpose(0, 1) + target = target.flatten() + if mode == "multi-label" and preds.ndim > 2: + n_classes = preds.shape[1] + preds = preds.transpose(0, 1).reshape(n_classes, -1).transpose(0, 1) + target = target.transpose(0, 1).reshape(n_classes, -1).transpose(0, 1) + + return preds, target, mode + + +def _auroc_compute( + preds: Tensor, + target: Tensor, + mode: DataType, + num_classes: Optional[int] = None, + pos_label: Optional[int] = None, + average: Optional[str] = "macro", + max_fpr: Optional[float] = None, + sample_weights: Optional[Sequence] = None, +) -> Tensor: + """Computes Area Under the Receiver Operating Characteristic Curve. + + Args: + preds: predictions from model (logits or probabilities) + target: Ground truth labels + mode: 'multi class multi dim' or 'multi-label' or 'binary' + num_classes: integer with number of classes for multi-label and multiclass problems. + Should be set to ``None`` for binary problems + pos_label: integer determining the positive class. + Should be set to ``None`` for binary problems + average: Defines the reduction that is applied to the output: + max_fpr: If not ``None``, calculates standardized partial AUC over the + range [0, max_fpr]. Should be a float between 0 and 1. + sample_weights: sample weights for each data point + + Example: + >>> # binary case + >>> preds = B.tensor([0.13, 0.26, 0.08, 0.19, 0.34]) + >>> target = B.tensor([0, 0, 1, 1, 1]) + >>> preds, target, mode = _auroc_update(preds, target) + >>> _auroc_compute(preds, target, mode, pos_label=1) + tensor(0.5000) + + >>> # multiclass case + >>> preds = B.tensor([[0.90, 0.05, 0.05], + ... [0.05, 0.90, 0.05], + ... [0.05, 0.05, 0.90], + ... [0.85, 0.05, 0.10], + ... [0.10, 0.10, 0.80]]) + >>> target = B.tensor([0, 1, 1, 2, 2]) + >>> preds, target, mode = _auroc_update(preds, target) + >>> _auroc_compute(preds, target, mode, num_classes=3) + tensor(0.7778) + """ + + # binary mode override num_classes + if mode == DataType.BINARY: + num_classes = 1 + + # check max_fpr parameter + if max_fpr is not None: + if not isinstance(max_fpr, float) and 0 < max_fpr <= 1: + raise ValueError(f"`max_fpr` should be a float in range (0, 1], got: {max_fpr}") + + if _TORCH_LOWER_1_6: + raise RuntimeError( + "`max_fpr` argument requires `B.bucketize` which" " is not available below PyTorch version 1.6" + ) + + # max_fpr parameter is only support for binary + if mode != DataType.BINARY: + raise ValueError( + f"Partial AUC computation not available in" + f" multilabel/multiclass setting, 'max_fpr' must be" + f" set to `None`, received `{max_fpr}`." + ) + + # calculate fpr, tpr + if mode == DataType.MULTILABEL: + if average == AverageMethod.MICRO: + fpr, tpr, _ = roc(preds.flatten(), target.flatten(), 1, pos_label, sample_weights) + elif num_classes: + # for multilabel we iteratively evaluate roc in a binary fashion + output = [ + roc(preds[:, i], target[:, i], num_classes=1, pos_label=1, sample_weights=sample_weights) + for i in range(num_classes) + ] + fpr = [o[0] for o in output] + tpr = [o[1] for o in output] + else: + raise ValueError("Detected input to be `multilabel` but you did not provide `num_classes` argument") + else: + if mode != DataType.BINARY: + if num_classes is None: + raise ValueError("Detected input to `multiclass` but you did not provide `num_classes` argument") + if average == AverageMethod.WEIGHTED and len(B.unique(target)) < num_classes: + # If one or more classes has 0 observations, we should exclude them, as its weight will be 0 + target_bool_mat = B.zeros((len(target), num_classes), dtype=bool) + target_bool_mat[B.arange(len(target)), target.long()] = 1 + class_observed = target_bool_mat.sum(axis=0) > 0 + for c in range(num_classes): + if not class_observed[c]: + warnings.warn(f"Class {c} had 0 observations, omitted from AUROC calculation", UserWarning) + preds = preds[:, class_observed] + target = target_bool_mat[:, class_observed] + target = B.where(target)[1] + num_classes = class_observed.sum() + if num_classes == 1: + raise ValueError("Found 1 non-empty class in `multiclass` AUROC calculation") + fpr, tpr, _ = roc(preds, target, num_classes, pos_label, sample_weights) + + # calculate standard roc auc score + if max_fpr is None or max_fpr == 1: + if mode == DataType.MULTILABEL and average == AverageMethod.MICRO: + pass + elif num_classes != 1: + # calculate auc scores per class + auc_scores = [_auc_compute_without_check(x, y, 1.0) for x, y in zip(fpr, tpr)] + + # calculate average + if average == AverageMethod.NONE: + return tensor(auc_scores) + if average == AverageMethod.MACRO: + return B.mean(B.stack(auc_scores)) + if average == AverageMethod.WEIGHTED: + if mode == DataType.MULTILABEL: + support = B.sum(target, dim=0) + else: + support = B.bincount(target.flatten(), minlength=num_classes) + return B.sum(B.stack(auc_scores) * support / support.sum()) + + allowed_average = (AverageMethod.NONE.value, AverageMethod.MACRO.value, AverageMethod.WEIGHTED.value) + raise ValueError( + f"Argument `average` expected to be one of the following:" f" {allowed_average} but got {average}" + ) + + return _auc_compute_without_check(fpr, tpr, 1.0) + + _device = fpr.device if isinstance(fpr, Tensor) else fpr[0].device + max_area: Tensor = tensor(max_fpr, device=_device) + # Add a single point at max_fpr and interpolate its tpr value + stop = B.bucketize(max_area, fpr, out_int32=True, right=True) + weight = (max_area - fpr[stop - 1]) / (fpr[stop] - fpr[stop - 1]) + interp_tpr: Tensor = B.lerp(tpr[stop - 1], tpr[stop], weight) + tpr = B.cat([tpr[:stop], interp_tpr.view(1)]) + fpr = B.cat([fpr[:stop], max_area.view(1)]) + + # Compute partial AUC + partial_auc = _auc_compute_without_check(fpr, tpr, 1.0) + + # McClish correction: standardize result to be 0.5 if non-discriminant and 1 if maximal + min_area: Tensor = 0.5 * max_area ** 2 + return 0.5 * (1 + (partial_auc - min_area) / (max_area - min_area)) + + +def auroc( + preds: Tensor, + target: Tensor, + num_classes: Optional[int] = None, + pos_label: Optional[int] = None, + average: Optional[str] = "macro", + max_fpr: Optional[float] = None, + sample_weights: Optional[Sequence] = None, +) -> Tensor: + """Compute Area Under the Receiver Operating Characteristic Curve (`ROC AUC`_) + + Args: + preds: predictions from model (logits or probabilities) + target: Ground truth labels + num_classes: integer with number of classes for multi-label and multiclass problems. + Should be set to ``None`` for binary problems + pos_label: integer determining the positive class. Default is ``None`` + which for binary problem is translate to 1. For multiclass problems + this argument should not be set as we iteratively change it in the + range [0,num_classes-1] + average: + - ``'micro'`` computes metric globally. Only works for multilabel problems + - ``'macro'`` computes metric for each class and uniformly averages them + - ``'weighted'`` computes metric for each class and does a weighted-average, + where each class is weighted by their support (accounts for class imbalance) + - ``None`` computes and returns the metric per class + max_fpr: + If not ``None``, calculates standardized partial AUC over the + range [0, max_fpr]. Should be a float between 0 and 1. + sample_weights: sample weights for each data point + + Raises: + ValueError: + If ``max_fpr`` is not a ``float`` in the range ``(0, 1]``. + RuntimeError: + If ``PyTorch version`` is ``below 1.6`` since max_fpr requires `B.bucketize` + which is not available below 1.6. + ValueError: + If ``max_fpr`` is not set to ``None`` and the mode is ``not binary`` + since partial AUC computation is not available in multilabel/multiclass. + ValueError: + If ``average`` is none of ``None``, ``"macro"`` or ``"weighted"``. + + Example (binary case): + >>> from paddlemetrics.functional import auroc + >>> preds = B.tensor([0.13, 0.26, 0.08, 0.19, 0.34]) + >>> target = B.tensor([0, 0, 1, 1, 1]) + >>> auroc(preds, target, pos_label=1) + tensor(0.5000) + + Example (multiclass case): + >>> preds = B.tensor([[0.90, 0.05, 0.05], + ... [0.05, 0.90, 0.05], + ... [0.05, 0.05, 0.90], + ... [0.85, 0.05, 0.10], + ... [0.10, 0.10, 0.80]]) + >>> target = B.tensor([0, 1, 1, 2, 2]) + >>> auroc(preds, target, num_classes=3) + tensor(0.7778) + """ + preds, target, mode = _auroc_update(preds, target) + return _auroc_compute(preds, target, mode, num_classes, pos_label, average, max_fpr, sample_weights) diff --git a/RE/paddlemetric/src/paddlemetrics/functional/classification/average_precision.py b/RE/paddlemetric/src/paddlemetrics/functional/classification/average_precision.py new file mode 100644 index 00000000..bc611816 --- /dev/null +++ b/RE/paddlemetric/src/paddlemetrics/functional/classification/average_precision.py @@ -0,0 +1,236 @@ +# Copyright The PyTorch Lightning team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import warnings +from typing import List, Optional, Sequence, Tuple, Union + +import paddleext.torchapi as B +from paddleext.torchapi import Tensor + +from paddlemetrics.functional.classification.precision_recall_curve import ( + _precision_recall_curve_compute, + _precision_recall_curve_update, +) + + +def _average_precision_update( + preds: Tensor, + target: Tensor, + num_classes: Optional[int] = None, + pos_label: Optional[int] = None, + average: Optional[str] = "macro", +) -> Tuple[Tensor, Tensor, int, Optional[int]]: + """Format the predictions and target based on the ``num_classes``, ``pos_label`` and ``average`` parameter + Args: + preds: predictions from model (logits or probabilities) + target: ground truth values + num_classes: integer with number of classes. + pos_label: integer determining the positive class. Default is ``None`` + which for binary problem is translate to 1. For multiclass problems + this argument should not be set as we iteratively change it in the + range [0,num_classes-1] + average: reduction method for multi-class or multi-label problems + """ + preds, target, num_classes, pos_label = _precision_recall_curve_update(preds, target, num_classes, pos_label) + if average == "micro": + if preds.ndim == target.ndim: + # Considering each element of the label indicator matrix as a label + preds = preds.flatten() + target = target.flatten() + num_classes = 1 + else: + raise ValueError("Cannot use `micro` average with multi-class input") + + return preds, target, num_classes, pos_label + + +def _average_precision_compute( + preds: Tensor, + target: Tensor, + num_classes: int, + pos_label: Optional[int] = None, + average: Optional[str] = "macro", + sample_weights: Optional[Sequence] = None, +) -> Union[List[Tensor], Tensor]: + """Computes the average precision score. + + Args: + preds: predictions from model (logits or probabilities) + target: ground truth values + num_classes: integer with number of classes. + pos_label: integer determining the positive class. Default is ``None`` + which for binary problem is translate to 1. For multiclass problems + this argument should not be set as we iteratively change it in the + range [0,num_classes-1] + average: reduction method for multi-class or multi-label problems + sample_weights: sample weights for each data point + + Example: + >>> # binary case + >>> preds = B.tensor([0, 1, 2, 3]) + >>> target = B.tensor([0, 1, 1, 1]) + >>> pos_label = 1 + >>> preds, target, num_classes, pos_label = _average_precision_update(preds, target, pos_label=pos_label) + >>> _average_precision_compute(preds, target, num_classes, pos_label) + tensor(1.) + + >>> # multiclass case + >>> preds = B.tensor([[0.75, 0.05, 0.05, 0.05, 0.05], + ... [0.05, 0.75, 0.05, 0.05, 0.05], + ... [0.05, 0.05, 0.75, 0.05, 0.05], + ... [0.05, 0.05, 0.05, 0.75, 0.05]]) + >>> target = B.tensor([0, 1, 3, 2]) + >>> num_classes = 5 + >>> preds, target, num_classes, pos_label = _average_precision_update(preds, target, num_classes) + >>> _average_precision_compute(preds, target, num_classes, average=None) + [tensor(1.), tensor(1.), tensor(0.2500), tensor(0.2500), tensor(nan)] + """ + + # todo: `sample_weights` is unused + precision, recall, _ = _precision_recall_curve_compute(preds, target, num_classes, pos_label) + if average == "weighted": + if preds.ndim == target.ndim and target.ndim > 1: + weights = target.sum(dim=0).float() + else: + weights = B.bincount(target, minlength=num_classes).float() + weights = weights / B.sum(weights) + else: + weights = None + return _average_precision_compute_with_precision_recall(precision, recall, num_classes, average, weights) + + +def _average_precision_compute_with_precision_recall( + precision: Tensor, + recall: Tensor, + num_classes: int, + average: Optional[str] = "macro", + weights: Optional[Tensor] = None, +) -> Union[List[Tensor], Tensor]: + """Computes the average precision score from precision and recall. + + Args: + precision: precision values + recall: recall values + num_classes: integer with number of classes. Not nessesary to provide + for binary problems. + average: reduction method for multi-class or multi-label problems + weights: weights to use when average='weighted' + + Example: + >>> # binary case + >>> preds = B.tensor([0, 1, 2, 3]) + >>> target = B.tensor([0, 1, 1, 1]) + >>> pos_label = 1 + >>> preds, target, num_classes, pos_label = _average_precision_update(preds, target, pos_label=pos_label) + >>> precision, recall, _ = _precision_recall_curve_compute(preds, target, num_classes, pos_label) + >>> _average_precision_compute_with_precision_recall(precision, recall, num_classes, average=None) + tensor(1.) + + >>> # multiclass case + >>> preds = B.tensor([[0.75, 0.05, 0.05, 0.05, 0.05], + ... [0.05, 0.75, 0.05, 0.05, 0.05], + ... [0.05, 0.05, 0.75, 0.05, 0.05], + ... [0.05, 0.05, 0.05, 0.75, 0.05]]) + >>> target = B.tensor([0, 1, 3, 2]) + >>> num_classes = 5 + >>> preds, target, num_classes, pos_label = _average_precision_update(preds, target, num_classes) + >>> precision, recall, _ = _precision_recall_curve_compute(preds, target, num_classes) + >>> _average_precision_compute_with_precision_recall(precision, recall, num_classes, average=None) + [tensor(1.), tensor(1.), tensor(0.2500), tensor(0.2500), tensor(nan)] + """ + + # Return the step function integral + # The following works because the last entry of precision is + # guaranteed to be 1, as returned by precision_recall_curve + if num_classes == 1: + return -B.sum((recall[1:] - recall[:-1]) * precision[:-1]) + + res = [] + for p, r in zip(precision, recall): + res.append(-B.sum((r[1:] - r[:-1]) * p[:-1])) + + # Reduce + if average in ("macro", "weighted"): + res = B.stack(res) + if B.isnan(res).any(): + warnings.warn( + "Average precision score for one or more classes was `nan`. Ignoring these classes in average", + UserWarning, + ) + if average == "macro": + return res[~B.isnan(res)].mean() + weights = B.ones_like(res) if weights is None else weights + return (res * weights)[~B.isnan(res)].sum() + if average is None: + return res + allowed_average = ("micro", "macro", "weighted", None) + raise ValueError(f"Expected argument `average` to be one of {allowed_average}" f" but got {average}") + + +def average_precision( + preds: Tensor, + target: Tensor, + num_classes: Optional[int] = None, + pos_label: Optional[int] = None, + average: Optional[str] = "macro", + sample_weights: Optional[Sequence] = None, +) -> Union[List[Tensor], Tensor]: + """Computes the average precision score. + + Args: + preds: predictions from model (logits or probabilities) + target: ground truth values + num_classes: integer with number of classes. Not nessesary to provide + for binary problems. + pos_label: integer determining the positive class. Default is ``None`` + which for binary problem is translate to 1. For multiclass problems + this argument should not be set as we iteratively change it in the + range [0,num_classes-1] + average: + defines the reduction that is applied in the case of multiclass and multilabel input. + Should be one of the following: + + - ``'macro'`` [default]: Calculate the metric for each class separately, and average the + metrics across classes (with equal weights for each class). + - ``'micro'``: Calculate the metric globally, across all samples and classes. Cannot be + used with multiclass input. + - ``'weighted'``: Calculate the metric for each class separately, and average the + metrics across classes, weighting each class by its support. + - ``'none'`` or ``None``: Calculate the metric for each class separately, and return + the metric for every class. + + sample_weights: sample weights for each data point + + Returns: + tensor with average precision. If multiclass will return list + of such tensors, one for each class + + Example (binary case): + >>> from paddlemetrics.functional import average_precision + >>> pred = B.tensor([0, 1, 2, 3]) + >>> target = B.tensor([0, 1, 1, 1]) + >>> average_precision(pred, target, pos_label=1) + tensor(1.) + + Example (multiclass case): + >>> pred = B.tensor([[0.75, 0.05, 0.05, 0.05, 0.05], + ... [0.05, 0.75, 0.05, 0.05, 0.05], + ... [0.05, 0.05, 0.75, 0.05, 0.05], + ... [0.05, 0.05, 0.05, 0.75, 0.05]]) + >>> target = B.tensor([0, 1, 3, 2]) + >>> average_precision(pred, target, num_classes=5, average=None) + [tensor(1.), tensor(1.), tensor(0.2500), tensor(0.2500), tensor(nan)] + """ + # fixme: `sample_weights` is unused + preds, target, num_classes, pos_label = _average_precision_update(preds, target, num_classes, pos_label, average) + return _average_precision_compute(preds, target, num_classes, pos_label, average, sample_weights) diff --git a/RE/paddlemetric/src/paddlemetrics/functional/classification/calibration_error.py b/RE/paddlemetric/src/paddlemetrics/functional/classification/calibration_error.py new file mode 100644 index 00000000..13203641 --- /dev/null +++ b/RE/paddlemetric/src/paddlemetrics/functional/classification/calibration_error.py @@ -0,0 +1,156 @@ +# Copyright The PyTorch Lightning team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from typing import Tuple + +import paddleext.torchapi as B +from paddleext.torchapi import FloatTensor, Tensor + +from paddlemetrics.utilities.checks import _input_format_classification +from paddlemetrics.utilities.enums import DataType + + +def _ce_compute( + confidences: FloatTensor, + accuracies: FloatTensor, + bin_boundaries: FloatTensor, + norm: str = "l1", + debias: bool = False, +) -> Tensor: + """Computes the calibration error given the provided bin boundaries and norm. + + Args: + confidences (FloatTensor): The confidence (i.e. predicted prob) of the top1 prediction. + accuracies (FloatTensor): 1.0 if the top-1 prediction was correct, 0.0 otherwise. + bin_boundaries (FloatTensor): Bin boundaries separating the linspace from 0 to 1. + norm (str, optional): Norm function to use when computing calibration error. Defaults to "l1". + debias (bool, optional): Apply debiasing to L2 norm computation as in + `Verified Uncertainty Calibration`_. Defaults to False. + + Raises: + ValueError: If an unsupported norm function is provided. + + Returns: + Tensor: Calibration error scalar. + """ + if norm not in {"l1", "l2", "max"}: + raise ValueError(f"Norm {norm} is not supported. Please select from l1, l2, or max. ") + + conf_bin = B.zeros_like(bin_boundaries) + acc_bin = B.zeros_like(bin_boundaries) + prop_bin = B.zeros_like(bin_boundaries) + for i, (bin_lower, bin_upper) in enumerate(zip(bin_boundaries[:-1], bin_boundaries[1:])): + # Calculated confidence and accuracy in each bin + in_bin = confidences.gt(bin_lower.item()) * confidences.le(bin_upper.item()) + prop_in_bin = in_bin.float().mean() + if prop_in_bin.item() > 0: + acc_bin[i] = accuracies[in_bin].float().mean() + conf_bin[i] = confidences[in_bin].mean() + prop_bin[i] = prop_in_bin + + if norm == "l1": + ce = B.sum(B.abs(acc_bin - conf_bin) * prop_bin) + elif norm == "max": + ce = B.max(B.abs(acc_bin - conf_bin)) + elif norm == "l2": + ce = B.sum(B.pow(acc_bin - conf_bin, 2) * prop_bin) + # NOTE: debiasing is disabled in the wrapper functions. This implementation differs from that in sklearn. + if debias: + # the order here (acc_bin - 1 ) vs (1 - acc_bin) is flipped from + # the equation in Verified Uncertainty Prediction (Kumar et al 2019)/ + debias_bins = (acc_bin * (acc_bin - 1) * prop_bin) / (prop_bin * accuracies.size()[0] - 1) + ce += B.sum(B.nan_to_num(debias_bins)) # replace nans with zeros if nothing appeared in a bin + ce = B.sqrt(ce) if ce > 0 else B.tensor(0) + return ce + + +def _ce_update(preds: Tensor, target: Tensor) -> Tuple[FloatTensor, FloatTensor]: + """Given a predictions and targets tensor, computes the confidences of the top-1 prediction and records their + correctness. + + Args: + preds (Tensor): Input softmaxed predictions. + target (Tensor): Labels. + + Raises: + ValueError: If the dataset shape is not binary, multiclass, or multidimensional-multiclass. + + Returns: + Tuple[FloatTensor, FloatTensor]: [description] + """ + _, _, mode = _input_format_classification(preds, target) + + if mode == DataType.BINARY: + confidences, accuracies = preds, target + elif mode == DataType.MULTICLASS: + confidences, predictions = preds.max(dim=1) + accuracies = predictions.eq(target) + elif mode == DataType.MULTIDIM_MULTICLASS: + # reshape tensors + # for preds, move the class dimension to the final axis and flatten the rest + confidences, predictions = B.transpose(preds, 1, -1).flatten(0, -2).max(dim=1) + # for targets, just flatten the target + accuracies = predictions.eq(target.flatten()) + else: + raise ValueError( + f"Calibration error is not well-defined for data with size {preds.size()} and targets {target.size()}." + ) + # must be cast to float for ddp allgather to work + return confidences.float(), accuracies.float() + + +def calibration_error(preds: Tensor, target: Tensor, n_bins: int = 15, norm: str = "l1") -> Tensor: + r""" + `Computes the Top-label Calibration Error`_ + + Three different norms are implemented, each corresponding to variations on the calibration error metric. + + L1 norm (Expected Calibration Error) + + .. math:: + \text{ECE} = \frac{1}{N}\sum_i^N \|(p_i - c_i)\| + + Infinity norm (Maximum Calibration Error) + + .. math:: + \text{RMSCE} = \max_{i} (p_i - c_i) + + L2 norm (Root Mean Square Calibration Error) + + .. math:: + \text{MCE} = \frac{1}{N}\sum_i^N (p_i - c_i)^2 + + Where :math:`p_i` is the top-1 prediction accuracy in + bin i and :math:`c_i` is the average confidence of predictions in bin i. + + .. note: + L2-norm debiasing is not yet supported. + + Args: + preds (Tensor): Model output probabilities. + target (Tensor): Ground-truth target class labels. + n_bins (int, optional): Number of bins to use when computing t. Defaults to 15. + norm (str, optional): Norm used to compare empirical and expected probability bins. + Defaults to "l1", or Expected Calibration Error. + """ + if norm not in ("l1", "l2", "max"): + raise ValueError(f"Norm {norm} is not supported. Please select from l1, l2, or max. ") + + if not isinstance(n_bins, int) or n_bins <= 0: + raise ValueError(f"Expected argument `n_bins` to be a int larger than 0 but got {n_bins}") + + confidences, accuracies = _ce_update(preds, target) + + bin_boundaries = B.linspace(0, 1, n_bins + 1, dtype=B.float, device=preds.device) + + return _ce_compute(confidences, accuracies, bin_boundaries, norm=norm) diff --git a/RE/paddlemetric/src/paddlemetrics/functional/classification/cohen_kappa.py b/RE/paddlemetric/src/paddlemetrics/functional/classification/cohen_kappa.py new file mode 100644 index 00000000..2face7a5 --- /dev/null +++ b/RE/paddlemetric/src/paddlemetrics/functional/classification/cohen_kappa.py @@ -0,0 +1,112 @@ +# Copyright The PyTorch Lightning team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from typing import Optional + +import paddleext.torchapi as B +from paddleext.torchapi import Tensor + +from paddlemetrics.functional.classification.confusion_matrix import _confusion_matrix_compute, _confusion_matrix_update + +_cohen_kappa_update = _confusion_matrix_update + + +def _cohen_kappa_compute(confmat: Tensor, weights: Optional[str] = None) -> Tensor: + """Computes Cohen's kappa based on the weighting type. + + Args: + confmat: Confusion matrix without normalization + weights: Weighting type to calculate the score. Choose from + - ``None`` or ``'none'``: no weighting + - ``'linear'``: linear weighting + - ``'quadratic'``: quadratic weighting + + Example: + >>> target = B.tensor([1, 1, 0, 0]) + >>> preds = B.tensor([0, 1, 0, 0]) + >>> confmat = _cohen_kappa_update(preds, target, num_classes=2) + >>> _cohen_kappa_compute(confmat) + tensor(0.5000) + """ + + confmat = _confusion_matrix_compute(confmat) + confmat = confmat.float() if not confmat.is_floating_point() else confmat + n_classes = confmat.shape[0] + sum0 = confmat.sum(dim=0, keepdim=True) + sum1 = confmat.sum(dim=1, keepdim=True) + expected = sum1 @ sum0 / sum0.sum() # outer product + + if weights is None: + w_mat = B.ones_like(confmat).flatten() + w_mat[:: n_classes + 1] = 0 + w_mat = w_mat.reshape(n_classes, n_classes) + elif weights in ("linear", "quadratic"): + w_mat = B.zeros_like(confmat) + w_mat += B.arange(n_classes, dtype=w_mat.dtype, device=w_mat.device) + if weights == "linear": + w_mat = B.abs(w_mat - w_mat.T) + else: + w_mat = B.pow(w_mat - w_mat.T, 2.0) + else: + raise ValueError( + f"Received {weights} for argument ``weights`` but should be either" " None, 'linear' or 'quadratic'" + ) + + k = B.sum(w_mat * confmat) / B.sum(w_mat * expected) + return 1 - k + + +def cohen_kappa( + preds: Tensor, + target: Tensor, + num_classes: int, + weights: Optional[str] = None, + threshold: float = 0.5, +) -> Tensor: + r""" + Calculates `Cohen's kappa score`_ that measures inter-annotator agreement. + It is defined as + + .. math:: + \kappa = (p_o - p_e) / (1 - p_e) + + where :math:`p_o` is the empirical probability of agreement and :math:`p_e` isg + the expected agreement when both annotators assign labels randomly. Note that + :math:`p_e` is estimated using a per-annotator empirical prior over the + class labels. + + Args: + preds: (float or long tensor), Either a ``(N, ...)`` tensor with labels or + ``(N, C, ...)`` where C is the number of classes, tensor with labels/probabilities + + target: ``target`` (long tensor), tensor with shape ``(N, ...)`` with ground true labels + + num_classes: Number of classes in the dataset. + + weights: Weighting type to calculate the score. Choose from + - ``None`` or ``'none'``: no weighting + - ``'linear'``: linear weighting + - ``'quadratic'``: quadratic weighting + + threshold: + Threshold value for binary or multi-label probabilities. default: 0.5 + + Example: + >>> from paddlemetrics.functional import cohen_kappa + >>> target = B.tensor([1, 1, 0, 0]) + >>> preds = B.tensor([0, 1, 0, 0]) + >>> cohen_kappa(preds, target, num_classes=2) + tensor(0.5000) + """ + confmat = _cohen_kappa_update(preds, target, num_classes, threshold) + return _cohen_kappa_compute(confmat, weights) diff --git a/RE/paddlemetric/src/paddlemetrics/functional/classification/confusion_matrix.py b/RE/paddlemetric/src/paddlemetrics/functional/classification/confusion_matrix.py new file mode 100644 index 00000000..b4f3c12d --- /dev/null +++ b/RE/paddlemetric/src/paddlemetrics/functional/classification/confusion_matrix.py @@ -0,0 +1,184 @@ +# Copyright The PyTorch Lightning team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from typing import Optional + +import paddleext.torchapi as B +from paddleext.torchapi import Tensor + +from paddlemetrics.utilities import rank_zero_warn +from paddlemetrics.utilities.checks import _input_format_classification +from paddlemetrics.utilities.enums import DataType + + +def _confusion_matrix_update( + preds: Tensor, target: Tensor, num_classes: int, threshold: float = 0.5, multilabel: bool = False +) -> Tensor: + """Updates and returns confusion matrix (without any normalization) based on the mode of the input. + + Args: + preds: Predicted tensor + target: Ground truth tensor + threshold: Threshold for transforming probability or logit predictions to binary (0,1) predictions, in the + case of binary or multi-label inputs. Default value of 0.5 corresponds to input being probabilities. + multilabel: determines if data is multilabel or not. + """ + + preds, target, mode = _input_format_classification(preds, target, threshold) + if mode not in (DataType.BINARY, DataType.MULTILABEL): + preds = preds.argmax(dim=1) + target = target.argmax(dim=1) + if multilabel: + unique_mapping = ((2 * target + preds) + 4 * B.arange(num_classes, device=preds.device)).flatten() + minlength = 4 * num_classes + else: + unique_mapping = (target.view(-1) * num_classes + preds.view(-1)).to(B.long) + minlength = num_classes ** 2 + + bins = B.bincount(unique_mapping, minlength=minlength) + if multilabel: + confmat = bins.reshape(num_classes, 2, 2) + else: + confmat = bins.reshape(num_classes, num_classes) + return confmat + + +def _confusion_matrix_compute(confmat: Tensor, normalize: Optional[str] = None) -> Tensor: + """Computes confusion matrix based on the normalization mode. + + Args: + confmat: Confusion matrix without normalization + normalize: Normalization mode for confusion matrix. Choose from + - ``None`` or ``'none'``: no normalization (default) + - ``'true'``: normalization over the targets (most commonly used) + - ``'pred'``: normalization over the predictions + - ``'all'``: normalization over the whole matrix + + Example: + >>> # binary case + >>> target = B.tensor([1, 1, 0, 0]) + >>> preds = B.tensor([0, 1, 0, 0]) + >>> confmat = _confusion_matrix_update(preds, target, num_classes=2) + >>> _confusion_matrix_compute(confmat) + tensor([[2, 0], + [1, 1]]) + + >>> # multiclass case + >>> target = B.tensor([2, 1, 0, 0]) + >>> preds = B.tensor([2, 1, 0, 1]) + >>> confmat = _confusion_matrix_update(preds, target, num_classes=3) + >>> _confusion_matrix_compute(confmat) + tensor([[1, 1, 0], + [0, 1, 0], + [0, 0, 1]]) + + >>> # multilabel case + >>> target = B.tensor([[0, 1, 0], [1, 0, 1]]) + >>> preds = B.tensor([[0, 0, 1], [1, 0, 1]]) + >>> confmat = _confusion_matrix_update(preds, target, num_classes=3, multilabel=True) + >>> _confusion_matrix_compute(confmat) # doctest: +NORMALIZE_WHITESPACE + tensor([[[1, 0], [0, 1]], + [[1, 0], [1, 0]], + [[0, 1], [0, 1]]]) + """ + + allowed_normalize = ("true", "pred", "all", "none", None) + if normalize not in allowed_normalize: + raise ValueError(f"Argument average needs to one of the following: {allowed_normalize}") + if normalize is not None and normalize != "none": + confmat = confmat.float() if not confmat.is_floating_point() else confmat + if normalize == "true": + confmat = confmat / confmat.sum(axis=1, keepdim=True) + elif normalize == "pred": + confmat = confmat / confmat.sum(axis=0, keepdim=True) + elif normalize == "all": + confmat = confmat / confmat.sum() + + nan_elements = confmat[B.isnan(confmat)].nelement() + if nan_elements != 0: + confmat[B.isnan(confmat)] = 0 + rank_zero_warn(f"{nan_elements} nan values found in confusion matrix have been replaced with zeros.") + return confmat + + +def confusion_matrix( + preds: Tensor, + target: Tensor, + num_classes: int, + normalize: Optional[str] = None, + threshold: float = 0.5, + multilabel: bool = False, +) -> Tensor: + r""" + Computes the `confusion matrix`_. Works with binary, + multiclass, and multilabel data. Accepts probabilities or logits from a model output or integer class + values in prediction. Works with multi-dimensional preds and target, but it should be noted that + additional dimensions will be flattened. + + If preds and target are the same shape and preds is a float tensor, we use the ``self.threshold`` argument + to convert into integer labels. This is the case for binary and multi-label probabilities or logits. + + If preds has an extra dimension as in the case of multi-class scores we perform an argmax on ``dim=1``. + + If working with multilabel data, setting the `is_multilabel` argument to `True` will make sure that a + `confusion matrix gets calculated per label`_. + + Args: + preds: (float or long tensor), Either a ``(N, ...)`` tensor with labels or + ``(N, C, ...)`` where C is the number of classes, tensor with labels/logits/probabilities + target: ``target`` (long tensor), tensor with shape ``(N, ...)`` with ground true labels + num_classes: Number of classes in the dataset. + normalize: Normalization mode for confusion matrix. Choose from + + - ``None`` or ``'none'``: no normalization (default) + - ``'true'``: normalization over the targets (most commonly used) + - ``'pred'``: normalization over the predictions + - ``'all'``: normalization over the whole matrix + + threshold: + Threshold for transforming probability or logit predictions to binary (0,1) predictions, in the case + of binary or multi-label inputs. Default value of 0.5 corresponds to input being probabilities. + + multilabel: + determines if data is multilabel or not. + + Example (binary data): + >>> from paddlemetrics import ConfusionMatrix + >>> target = B.tensor([1, 1, 0, 0]) + >>> preds = B.tensor([0, 1, 0, 0]) + >>> confmat = ConfusionMatrix(num_classes=2) + >>> confmat(preds, target) + tensor([[2., 0.], + [1., 1.]]) + + Example (multiclass data): + >>> target = B.tensor([2, 1, 0, 0]) + >>> preds = B.tensor([2, 1, 0, 1]) + >>> confmat = ConfusionMatrix(num_classes=3) + >>> confmat(preds, target) + tensor([[1., 1., 0.], + [0., 1., 0.], + [0., 0., 1.]]) + + Example (multilabel data): + >>> target = B.tensor([[0, 1, 0], [1, 0, 1]]) + >>> preds = B.tensor([[0, 0, 1], [1, 0, 1]]) + >>> confmat = ConfusionMatrix(num_classes=3, multilabel=True) + >>> confmat(preds, target) # doctest: +NORMALIZE_WHITESPACE + tensor([[[1., 0.], [0., 1.]], + [[1., 0.], [1., 0.]], + [[0., 1.], [0., 1.]]]) + + """ + confmat = _confusion_matrix_update(preds, target, num_classes, threshold, multilabel) + return _confusion_matrix_compute(confmat, normalize) diff --git a/RE/paddlemetric/src/paddlemetrics/functional/classification/dice.py b/RE/paddlemetric/src/paddlemetrics/functional/classification/dice.py new file mode 100644 index 00000000..5f90fe02 --- /dev/null +++ b/RE/paddlemetric/src/paddlemetrics/functional/classification/dice.py @@ -0,0 +1,112 @@ +# Copyright The PyTorch Lightning team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from typing import Tuple + +import paddleext.torchapi as B +from paddleext.torchapi import Tensor + +from paddlemetrics.utilities.data import to_categorical +from paddlemetrics.utilities.distributed import reduce + + +def _stat_scores( + preds: Tensor, + target: Tensor, + class_index: int, + argmax_dim: int = 1, +) -> Tuple[Tensor, Tensor, Tensor, Tensor, Tensor]: + """Calculates the number of true positive, false positive, true negative and false negative for a specific + class. + + Args: + preds: prediction tensor + target: target tensor + class_index: class to calculate over + argmax_dim: if pred is a tensor of probabilities, this indicates the + axis the argmax transformation will be applied over + + Return: + True Positive, False Positive, True Negative, False Negative, Support + + Example: + >>> x = B.tensor([1, 2, 3]) + >>> y = B.tensor([0, 2, 3]) + >>> tp, fp, tn, fn, sup = _stat_scores(x, y, class_index=1) + >>> tp, fp, tn, fn, sup + (tensor(0), tensor(1), tensor(2), tensor(0), tensor(0)) + """ + if preds.ndim == target.ndim + 1: + preds = to_categorical(preds, argmax_dim=argmax_dim) + + tp = ((preds == class_index) * (target == class_index)).to(B.long).sum() + fp = ((preds == class_index) * (target != class_index)).to(B.long).sum() + tn = ((preds != class_index) * (target != class_index)).to(B.long).sum() + fn = ((preds != class_index) * (target == class_index)).to(B.long).sum() + sup = (target == class_index).to(B.long).sum() + + return tp, fp, tn, fn, sup + + +def dice_score( + preds: Tensor, + target: Tensor, + bg: bool = False, + nan_score: float = 0.0, + no_fg_score: float = 0.0, + reduction: str = "elementwise_mean", +) -> Tensor: + """Compute dice score from prediction scores. + + Args: + preds: estimated probabilities + target: ground-truth labels + bg: whether to also compute dice for the background + nan_score: score to return, if a NaN occurs during computation + no_fg_score: score to return, if no foreground pixel was found in target + reduction: a method to reduce metric score over labels. + + - ``'elementwise_mean'``: takes the mean (default) + - ``'sum'``: takes the sum + - ``'none'``: no reduction will be applied + + Return: + Tensor containing dice score + + Example: + >>> from paddlemetrics.functional import dice_score + >>> pred = B.tensor([[0.85, 0.05, 0.05, 0.05], + ... [0.05, 0.85, 0.05, 0.05], + ... [0.05, 0.05, 0.85, 0.05], + ... [0.05, 0.05, 0.05, 0.85]]) + >>> target = B.tensor([0, 1, 3, 2]) + >>> dice_score(pred, target) + tensor(0.3333) + """ + num_classes = preds.shape[1] + bg_inv = 1 - int(bg) + scores = B.zeros(num_classes - bg_inv, device=preds.device, dtype=B.float32) + for i in range(bg_inv, num_classes): + if not (target == i).any(): + # no foreground class + scores[i - bg_inv] += no_fg_score + continue + + # TODO: rewrite to use general `stat_scores` + tp, fp, _, fn, _ = _stat_scores(preds=preds, target=target, class_index=i) + denom = (2 * tp + fp + fn).to(B.float) + # nan result + score_cls = (2 * tp).to(B.float) / denom if B.is_nonzero(denom) else nan_score + scores[i - bg_inv] += score_cls.item() + + return reduce(scores, reduction=reduction) diff --git a/RE/paddlemetric/src/paddlemetrics/functional/classification/f_beta.py b/RE/paddlemetric/src/paddlemetrics/functional/classification/f_beta.py new file mode 100644 index 00000000..7b9b626c --- /dev/null +++ b/RE/paddlemetric/src/paddlemetrics/functional/classification/f_beta.py @@ -0,0 +1,351 @@ +# Copyright The PyTorch Lightning team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from typing import Optional + +import paddleext.torchapi as B +from paddleext.torchapi import Tensor + +from paddlemetrics.functional.classification.stat_scores import _reduce_stat_scores, _stat_scores_update +from paddlemetrics.utilities.enums import AverageMethod as AvgMethod +from paddlemetrics.utilities.enums import MDMCAverageMethod + + +def _safe_divide(num: Tensor, denom: Tensor) -> Tensor: + """prevent zero division.""" + denom[denom == 0.0] = 1 + return num / denom + + +def _fbeta_compute( + tp: Tensor, + fp: Tensor, + tn: Tensor, + fn: Tensor, + beta: float, + ignore_index: Optional[int], + average: str, + mdmc_average: Optional[str], +) -> Tensor: + """Computes f_beta metric from stat scores: true positives, false positives, true negatives, false negatives. + + Args: + tp: True positives + fp: False positives + tn: True negatives + fn: False negatives + beta: The parameter `beta` (which determines the weight of recall in the combined score) + ignore_index: Integer specifying a target class to ignore. If given, this class index does not contribute + to the returned score, regardless of reduction method + average: Defines the reduction that is applied + mdmc_average: Defines how averaging is done for multi-dimensional multi-class inputs (on top of the + ``average`` parameter) + + Example: + >>> from paddlemetrics.functional.classification.stat_scores import _stat_scores_update + >>> target = B.tensor([0, 1, 2, 0, 1, 2]) + >>> preds = B.tensor([0, 2, 1, 0, 0, 1]) + >>> tp, fp, tn, fn = _stat_scores_update( + ... preds, + ... target, + ... reduce='micro', + ... num_classes=3, + ... ) + >>> _fbeta_compute(tp, fp, tn, fn, beta=0.5, ignore_index=None, average='micro', mdmc_average=None) + tensor(0.3333) + """ + if average == AvgMethod.MICRO and mdmc_average != MDMCAverageMethod.SAMPLEWISE: + mask = tp >= 0 + precision = _safe_divide(tp[mask].sum().float(), (tp[mask] + fp[mask]).sum()) + recall = _safe_divide(tp[mask].sum().float(), (tp[mask] + fn[mask]).sum()) + else: + precision = _safe_divide(tp.float(), tp + fp) + recall = _safe_divide(tp.float(), tp + fn) + + num = (1 + beta ** 2) * precision * recall + denom = beta ** 2 * precision + recall + denom[denom == 0.0] = 1.0 # avoid division by 0 + + # if classes matter and a given class is not present in both the preds and the target, + # computing the score for this class is meaningless, thus they should be ignored + if average == AvgMethod.NONE and mdmc_average != MDMCAverageMethod.SAMPLEWISE: + # a class is not present if there exists no TPs, no FPs, and no FNs + meaningless_indeces = B.nonzero((tp | fn | fp) == 0).cpu() + if ignore_index is None: + ignore_index = meaningless_indeces + else: + ignore_index = B.unique(B.cat((meaningless_indeces, B.tensor([[ignore_index]])))) + + if ignore_index is not None: + if average not in (AvgMethod.MICRO, AvgMethod.SAMPLES) and mdmc_average == MDMCAverageMethod.SAMPLEWISE: + num[..., ignore_index] = -1 + denom[..., ignore_index] = -1 + elif average not in (AvgMethod.MICRO, AvgMethod.SAMPLES): + num[ignore_index, ...] = -1 + denom[ignore_index, ...] = -1 + + if average == AvgMethod.MACRO and mdmc_average != MDMCAverageMethod.SAMPLEWISE: + cond = (tp + fp + fn == 0) | (tp + fp + fn == -3) + num = num[~cond] + denom = denom[~cond] + + return _reduce_stat_scores( + numerator=num, + denominator=denom, + weights=None if average != AvgMethod.WEIGHTED else tp + fn, + average=average, + mdmc_average=mdmc_average, + ) + + +def fbeta( + preds: Tensor, + target: Tensor, + beta: float = 1.0, + average: str = "micro", + mdmc_average: Optional[str] = None, + ignore_index: Optional[int] = None, + num_classes: Optional[int] = None, + threshold: float = 0.5, + top_k: Optional[int] = None, + multiclass: Optional[bool] = None, +) -> Tensor: + r""" + Computes f_beta metric. + + .. math:: + F_{\beta} = (1 + \beta^2) * \frac{\text{precision} * \text{recall}} + {(\beta^2 * \text{precision}) + \text{recall}} + + Works with binary, multiclass, and multilabel data. + Accepts probabilities or logits from a model output or integer class values in prediction. + Works with multi-dimensional preds and target. + + If preds and target are the same shape and preds is a float tensor, we use the ``self.threshold`` argument + to convert into integer labels. This is the case for binary and multi-label logits or probabilities. + + If preds has an extra dimension as in the case of multi-class scores we perform an argmax on ``dim=1``. + + The reduction method (how the precision scores are aggregated) is controlled by the + ``average`` parameter, and additionally by the ``mdmc_average`` parameter in the + multi-dimensional multi-class case. Accepts all inputs listed in :ref:`references/modules:input types`. + + Args: + preds: Predictions from model (probabilities, logits or labels) + target: Ground truth values + average: + Defines the reduction that is applied. Should be one of the following: + + - ``'micro'`` [default]: Calculate the metric globally, across all samples and classes. + - ``'macro'``: Calculate the metric for each class separately, and average the + metrics across classes (with equal weights for each class). + - ``'weighted'``: Calculate the metric for each class separately, and average the + metrics across classes, weighting each class by its support (``tp + fn``). + - ``'none'`` or ``None``: Calculate the metric for each class separately, and return + the metric for every class. + - ``'samples'``: Calculate the metric for each sample, and average the metrics + across samples (with equal weights for each sample). + + .. note:: What is considered a sample in the multi-dimensional multi-class case + depends on the value of ``mdmc_average``. + + .. note:: If ``'none'`` and a given class doesn't occur in the `preds` or `target`, + the value for the class will be ``nan``. + + mdmc_average: + Defines how averaging is done for multi-dimensional multi-class inputs (on top of the + ``average`` parameter). Should be one of the following: + + - ``None`` [default]: Should be left unchanged if your data is not multi-dimensional + multi-class. + - ``'samplewise'``: In this case, the statistics are computed separately for each + sample on the ``N`` axis, and then averaged over samples. + The computation for each sample is done by treating the flattened extra axes ``...`` + (see :ref:`references/modules:input types`) as the ``N`` dimension within the sample, + and computing the metric for the sample based on that. + - ``'global'``: In this case the ``N`` and ``...`` dimensions of the inputs + (see :ref:`references/modules:input types`) + are flattened into a new ``N_X`` sample axis, i.e. the inputs are treated as if they + were ``(N_X, C)``. From here on the ``average`` parameter applies as usual. + + ignore_index: + Integer specifying a target class to ignore. If given, this class index does not contribute + to the returned score, regardless of reduction method. If an index is ignored, and ``average=None`` + or ``'none'``, the score for the ignored class will be returned as ``nan``. + num_classes: + Number of classes. Necessary for ``'macro'``, ``'weighted'`` and ``None`` average methods. + threshold: + Threshold for transforming probability or logit predictions to binary (0,1) predictions, in the case + of binary or multi-label inputs. Default value of 0.5 corresponds to input being probabilities. + top_k: + Number of highest probability or logit score predictions considered to find the correct label, + relevant only for (multi-dimensional) multi-class inputs. The + default value (``None``) will be interpreted as 1 for these inputs. + + Should be left at default (``None``) for all other types of inputs. + multiclass: + Used only in certain special cases, where you want to treat inputs as a different type + than what they appear to be. See the parameter's + :ref:`documentation section ` + for a more detailed explanation and examples. + + Return: + The shape of the returned tensor depends on the ``average`` parameter + + - If ``average in ['micro', 'macro', 'weighted', 'samples']``, a one-element tensor will be returned + - If ``average in ['none', None]``, the shape will be ``(C,)``, where ``C`` stands for the number + of classes + + Example: + >>> from paddlemetrics.functional import fbeta + >>> target = B.tensor([0, 1, 2, 0, 1, 2]) + >>> preds = B.tensor([0, 2, 1, 0, 0, 1]) + >>> fbeta(preds, target, num_classes=3, beta=0.5) + tensor(0.3333) + + """ + allowed_average = list(AvgMethod) + if average not in allowed_average: + raise ValueError(f"The `average` has to be one of {allowed_average}, got {average}.") + + if mdmc_average is not None and MDMCAverageMethod.from_str(mdmc_average) is None: + raise ValueError(f"The `mdmc_average` has to be one of {list(MDMCAverageMethod)}, got {mdmc_average}.") + + if average in [AvgMethod.MACRO, AvgMethod.WEIGHTED, AvgMethod.NONE] and (not num_classes or num_classes < 1): + raise ValueError(f"When you set `average` as {average}, you have to provide the number of classes.") + + if num_classes and ignore_index is not None and (not 0 <= ignore_index < num_classes or num_classes == 1): + raise ValueError(f"The `ignore_index` {ignore_index} is not valid for inputs with {num_classes} classes") + + reduce = AvgMethod.MACRO if average in [AvgMethod.WEIGHTED, AvgMethod.NONE] else average + tp, fp, tn, fn = _stat_scores_update( + preds, + target, + reduce=reduce, + mdmc_reduce=mdmc_average, + threshold=threshold, + num_classes=num_classes, + top_k=top_k, + multiclass=multiclass, + ignore_index=ignore_index, + ) + + return _fbeta_compute(tp, fp, tn, fn, beta, ignore_index, average, mdmc_average) + + +def f1( + preds: Tensor, + target: Tensor, + beta: float = 1.0, + average: str = "micro", + mdmc_average: Optional[str] = None, + ignore_index: Optional[int] = None, + num_classes: Optional[int] = None, + threshold: float = 0.5, + top_k: Optional[int] = None, + multiclass: Optional[bool] = None, +) -> Tensor: + """Computes F1 metric. F1 metrics correspond to a equally weighted average of the precision and recall scores. + + Works with binary, multiclass, and multilabel data. + Accepts probabilities or logits from a model output or integer class values in prediction. + Works with multi-dimensional preds and target. + + If preds and target are the same shape and preds is a float tensor, we use the ``self.threshold`` argument + to convert into integer labels. This is the case for binary and multi-label probabilities or logits. + + If preds has an extra dimension as in the case of multi-class scores we perform an argmax on ``dim=1``. + + The reduction method (how the precision scores are aggregated) is controlled by the + ``average`` parameter, and additionally by the ``mdmc_average`` parameter in the + multi-dimensional multi-class case. Accepts all inputs listed in :ref:`references/modules:input types`. + + Args: + preds: Predictions from model (probabilities, logits or labels) + target: Ground truth values + average: + Defines the reduction that is applied. Should be one of the following: + + - ``'micro'`` [default]: Calculate the metric globally, across all samples and classes. + - ``'macro'``: Calculate the metric for each class separately, and average the + metrics across classes (with equal weights for each class). + - ``'weighted'``: Calculate the metric for each class separately, and average the + metrics across classes, weighting each class by its support (``tp + fn``). + - ``'none'`` or ``None``: Calculate the metric for each class separately, and return + the metric for every class. + - ``'samples'``: Calculate the metric for each sample, and average the metrics + across samples (with equal weights for each sample). + + .. note:: What is considered a sample in the multi-dimensional multi-class case + depends on the value of ``mdmc_average``. + + .. note:: If ``'none'`` and a given class doesn't occur in the `preds` or `target`, + the value for the class will be ``nan``. + + mdmc_average: + Defines how averaging is done for multi-dimensional multi-class inputs (on top of the + ``average`` parameter). Should be one of the following: + + - ``None`` [default]: Should be left unchanged if your data is not multi-dimensional + multi-class. + + - ``'samplewise'``: In this case, the statistics are computed separately for each + sample on the ``N`` axis, and then averaged over samples. + The computation for each sample is done by treating the flattened extra axes ``...`` + (see :ref:`references/modules:input types`) as the ``N`` dimension within the sample, + and computing the metric for the sample based on that. + + - ``'global'``: In this case the ``N`` and ``...`` dimensions of the inputs + (see :ref:`references/modules:input types`) + are flattened into a new ``N_X`` sample axis, i.e. the inputs are treated as if they + were ``(N_X, C)``. From here on the ``average`` parameter applies as usual. + + ignore_index: + Integer specifying a target class to ignore. If given, this class index does not contribute + to the returned score, regardless of reduction method. If an index is ignored, and ``average=None`` + or ``'none'``, the score for the ignored class will be returned as ``nan``. + + num_classes: + Number of classes. Necessary for ``'macro'``, ``'weighted'`` and ``None`` average methods. + + threshold: + Threshold for transforming probability or logit predictions to binary (0,1) predictions, in the case + of binary or multi-label inputs. Default value of 0.5 corresponds to input being probabilities. + top_k: + Number of highest probability or logit score predictions considered to find the correct label, + relevant only for (multi-dimensional) multi-class inputs. The + default value (``None``) will be interpreted as 1 for these inputs. + + Should be left at default (``None``) for all other types of inputs. + + multiclass: + Used only in certain special cases, where you want to treat inputs as a different type + than what they appear to be. See the parameter's + :ref:`documentation section ` + for a more detailed explanation and examples. + + Return: + The shape of the returned tensor depends on the ``average`` parameter + + - If ``average in ['micro', 'macro', 'weighted', 'samples']``, a one-element tensor will be returned + - If ``average in ['none', None]``, the shape will be ``(C,)``, where ``C`` stands for the number + of classes + + Example: + >>> from paddlemetrics.functional import f1 + >>> target = B.tensor([0, 1, 2, 0, 1, 2]) + >>> preds = B.tensor([0, 2, 1, 0, 0, 1]) + >>> f1(preds, target, num_classes=3) + tensor(0.3333) + """ + return fbeta(preds, target, 1.0, average, mdmc_average, ignore_index, num_classes, threshold, top_k, multiclass) diff --git a/RE/paddlemetric/src/paddlemetrics/functional/classification/hamming_distance.py b/RE/paddlemetric/src/paddlemetrics/functional/classification/hamming_distance.py new file mode 100644 index 00000000..e3f95bad --- /dev/null +++ b/RE/paddlemetric/src/paddlemetrics/functional/classification/hamming_distance.py @@ -0,0 +1,97 @@ +# Copyright The PyTorch Lightning team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from typing import Tuple, Union + +import paddleext.torchapi as B +from paddleext.torchapi import Tensor + +from paddlemetrics.utilities.checks import _input_format_classification + + +def _hamming_distance_update( + preds: Tensor, + target: Tensor, + threshold: float = 0.5, +) -> Tuple[Tensor, int]: + """Returns the number of positions where prediction equals target, and number of predictions. + + Args: + preds: Predicted tensor + target: Ground truth tensor + threshold: Threshold for transforming probability or logit predictions to binary (0,1) predictions, in the case + of binary or multi-label inputs. Default value of 0.5 corresponds to input being probabilities. + """ + + preds, target, _ = _input_format_classification(preds, target, threshold=threshold) + + correct = (preds == target).sum() + total = preds.numel() + + return correct, total + + +def _hamming_distance_compute(correct: Tensor, total: Union[int, Tensor]) -> Tensor: + """Computes the Hamming distance. + + Args: + correct: Number of positions where prediction equals target + total: Total number of predictions + + Example: + >>> target = B.tensor([[0, 1], [1, 1]]) + >>> preds = B.tensor([[0, 1], [0, 1]]) + >>> correct, total = _hamming_distance_update(preds, target) + >>> _hamming_distance_compute(correct, total) + tensor(0.2500) + """ + + return 1 - correct.float() / total + + +def hamming_distance(preds: Tensor, target: Tensor, threshold: float = 0.5) -> Tensor: + r""" + Computes the average `Hamming distance`_ (also + known as Hamming loss) between targets and predictions: + + .. math:: + \text{Hamming distance} = \frac{1}{N \cdot L} \sum_i^N \sum_l^L 1(y_{il} \neq \hat{y}_{il}) + + Where :math:`y` is a tensor of target values, :math:`\hat{y}` is a tensor of predictions, + and :math:`\bullet_{il}` refers to the :math:`l`-th label of the :math:`i`-th sample of that + tensor. + + This is the same as ``1-accuracy`` for binary data, while for all other types of inputs it + treats each possible label separately - meaning that, for example, multi-class data is + treated as if it were multi-label. + + Accepts all input types listed in :ref:`references/modules:input types`. + + Args: + preds: Predictions from model (probabilities, logits or labels) + target: Ground truth + threshold: + Threshold for transforming probability or logit predictions to binary (0,1) predictions, in the case + of binary or multi-label inputs. Default value of 0.5 corresponds to input being probabilities. + + Example: + >>> from paddlemetrics.functional import hamming_distance + >>> target = B.tensor([[0, 1], [1, 1]]) + >>> preds = B.tensor([[0, 1], [0, 1]]) + >>> hamming_distance(preds, target) + tensor(0.2500) + + """ + + correct, total = _hamming_distance_update(preds, target, threshold) + return _hamming_distance_compute(correct, total) diff --git a/RE/paddlemetric/src/paddlemetrics/functional/classification/hinge.py b/RE/paddlemetric/src/paddlemetrics/functional/classification/hinge.py new file mode 100644 index 00000000..59d8be1a --- /dev/null +++ b/RE/paddlemetric/src/paddlemetrics/functional/classification/hinge.py @@ -0,0 +1,231 @@ +# Copyright The PyTorch Lightning team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from typing import Optional, Tuple, Union + +import paddleext.torchapi as B +from paddleext.torchapi import Tensor, tensor + +from paddlemetrics.utilities.checks import _input_squeeze +from paddlemetrics.utilities.data import to_onehot +from paddlemetrics.utilities.enums import DataType, EnumStr + + +class MulticlassMode(EnumStr): + """Enum to represent possible multiclass modes of hinge. + + >>> "Crammer-Singer" in list(MulticlassMode) + True + """ + + CRAMMER_SINGER = "crammer-singer" + ONE_VS_ALL = "one-vs-all" + + +def _check_shape_and_type_consistency_hinge( + preds: Tensor, + target: Tensor, +) -> DataType: + """Checks shape and type of `preds` and `target` and returns mode of the input tensors. + + Args: + preds: Predicted tensor + target: Ground truth tensor + + Raises: + `ValueError`: if `target` is not one dimensional + `ValueError`: if `preds` and `target` do not have the same shape in the first dimension + `ValueError`: if `pred` is neither one nor two dimensional + """ + + if target.ndim > 1: + raise ValueError( + f"The `target` should be one dimensional, got `target` with shape={target.shape}.", + ) + + if preds.ndim == 1: + if preds.shape != target.shape: + raise ValueError( + "The `preds` and `target` should have the same shape,", + f" got `preds` with shape={preds.shape} and `target` with shape={target.shape}.", + ) + mode = DataType.BINARY + elif preds.ndim == 2: + if preds.shape[0] != target.shape[0]: + raise ValueError( + "The `preds` and `target` should have the same shape in the first dimension,", + f" got `preds` with shape={preds.shape} and `target` with shape={target.shape}.", + ) + mode = DataType.MULTICLASS + else: + raise ValueError(f"The `preds` should be one or two dimensional, got `preds` with shape={preds.shape}.") + return mode + + +def _hinge_update( + preds: Tensor, + target: Tensor, + squared: bool = False, + multiclass_mode: Optional[Union[str, MulticlassMode]] = None, +) -> Tuple[Tensor, Tensor]: + """Updates and returns sum over Hinge loss scores for each observation and the total number of observations. + + Args: + preds: Predicted tensor + target: Ground truth tensor + squared: If True, this will compute the squared hinge loss. Otherwise, computes the regular hinge loss. + multiclass_mode: + Which approach to use for multi-class inputs (has no effect in the binary case). ``None`` (default), + ``MulticlassMode.CRAMMER_SINGER`` or ``"crammer-singer"``, uses the Crammer Singer multi-class hinge loss. + ``MulticlassMode.ONE_VS_ALL`` or ``"one-vs-all"`` computes the hinge loss in a one-vs-all fashion. + """ + preds, target = _input_squeeze(preds, target) + + mode = _check_shape_and_type_consistency_hinge(preds, target) + + if mode == DataType.MULTICLASS: + target = to_onehot(target, max(2, preds.shape[1])).bool() + + if mode == DataType.MULTICLASS and (multiclass_mode is None or multiclass_mode == MulticlassMode.CRAMMER_SINGER): + margin = preds[target] + margin -= B.max(preds[~target].view(preds.shape[0], -1), dim=1)[0] + elif mode == DataType.BINARY or multiclass_mode == MulticlassMode.ONE_VS_ALL: + target = target.bool() + margin = B.zeros_like(preds) + margin[target] = preds[target] + margin[~target] = -preds[~target] + else: + raise ValueError( + "The `multiclass_mode` should be either None / 'crammer-singer' / MulticlassMode.CRAMMER_SINGER" + "(default) or 'one-vs-all' / MulticlassMode.ONE_VS_ALL," + f" got {multiclass_mode}." + ) + + measures = 1 - margin + measures = B.clamp(measures, 0) + + if squared: + measures = measures.pow(2) + + total = tensor(target.shape[0], device=target.device) + return measures.sum(dim=0), total + + +def _hinge_compute(measure: Tensor, total: Tensor) -> Tensor: + """Computes mean Hinge loss. + + Args: + measure: Sum over hinge losses for each each observation + total: Number of observations + + Example: + >>> # binary case + >>> target = B.tensor([0, 1, 1]) + >>> preds = B.tensor([-2.2, 2.4, 0.1]) + >>> measure, total = _hinge_update(preds, target) + >>> _hinge_compute(measure, total) + tensor(0.3000) + + >>> # multiclass case + >>> target = B.tensor([0, 1, 2]) + >>> preds = B.tensor([[-1.0, 0.9, 0.2], [0.5, -1.1, 0.8], [2.2, -0.5, 0.3]]) + >>> measure, total = _hinge_update(preds, target) + >>> _hinge_compute(measure, total) + tensor(2.9000) + + >>> # multiclass one-vs-all mode case + >>> target = B.tensor([0, 1, 2]) + >>> preds = B.tensor([[-1.0, 0.9, 0.2], [0.5, -1.1, 0.8], [2.2, -0.5, 0.3]]) + >>> measure, total = _hinge_update(preds, target, multiclass_mode="one-vs-all") + >>> _hinge_compute(measure, total) + tensor([2.2333, 1.5000, 1.2333]) + """ + + return measure / total + + +def hinge( + preds: Tensor, + target: Tensor, + squared: bool = False, + multiclass_mode: Optional[Union[str, MulticlassMode]] = None, +) -> Tensor: + r""" + Computes the mean `Hinge loss`_ typically used for Support Vector Machines (SVMs). + + In the binary case it is defined as: + + .. math:: + \text{Hinge loss} = \max(0, 1 - y \times \hat{y}) + + Where :math:`y \in {-1, 1}` is the target, and :math:`\hat{y} \in \mathbb{R}` is the prediction. + + In the multi-class case, when ``multiclass_mode=None`` (default), ``multiclass_mode=MulticlassMode.CRAMMER_SINGER`` + or ``multiclass_mode="crammer-singer"``, this metric will compute the multi-class hinge loss defined by Crammer and + Singer as: + + .. math:: + \text{Hinge loss} = \max\left(0, 1 - \hat{y}_y + \max_{i \ne y} (\hat{y}_i)\right) + + Where :math:`y \in {0, ..., \mathrm{C}}` is the target class (where :math:`\mathrm{C}` is the number of classes), + and :math:`\hat{y} \in \mathbb{R}^\mathrm{C}` is the predicted output per class. + + In the multi-class case when ``multiclass_mode=MulticlassMode.ONE_VS_ALL`` or ``multiclass_mode='one-vs-all'``, this + metric will use a one-vs-all approach to compute the hinge loss, giving a vector of C outputs where each entry pits + that class against all remaining classes. + + This metric can optionally output the mean of the squared hinge loss by setting ``squared=True`` + + Only accepts inputs with preds shape of (N) (binary) or (N, C) (multi-class) and target shape of (N). + + Args: + preds: Predictions from model (as float outputs from decision function). + target: Ground truth labels. + squared: + If True, this will compute the squared hinge loss. Otherwise, computes the regular hinge loss (default). + multiclass_mode: + Which approach to use for multi-class inputs (has no effect in the binary case). ``None`` (default), + ``MulticlassMode.CRAMMER_SINGER`` or ``"crammer-singer"``, uses the Crammer Singer multi-class hinge loss. + ``MulticlassMode.ONE_VS_ALL`` or ``"one-vs-all"`` computes the hinge loss in a one-vs-all fashion. + + Raises: + ValueError: + If preds shape is not of size (N) or (N, C). + ValueError: + If target shape is not of size (N). + ValueError: + If ``multiclass_mode`` is not: None, ``MulticlassMode.CRAMMER_SINGER``, ``"crammer-singer"``, + ``MulticlassMode.ONE_VS_ALL`` or ``"one-vs-all"``. + + Example (binary case): + >>> import torchapi as B + >>> from paddlemetrics.functional import hinge + >>> target = B.tensor([0, 1, 1]) + >>> preds = B.tensor([-2.2, 2.4, 0.1]) + >>> hinge(preds, target) + tensor(0.3000) + + Example (default / multiclass case): + >>> target = B.tensor([0, 1, 2]) + >>> preds = B.tensor([[-1.0, 0.9, 0.2], [0.5, -1.1, 0.8], [2.2, -0.5, 0.3]]) + >>> hinge(preds, target) + tensor(2.9000) + + Example (multiclass example, one vs all mode): + >>> target = B.tensor([0, 1, 2]) + >>> preds = B.tensor([[-1.0, 0.9, 0.2], [0.5, -1.1, 0.8], [2.2, -0.5, 0.3]]) + >>> hinge(preds, target, multiclass_mode="one-vs-all") + tensor([2.2333, 1.5000, 1.2333]) + """ + measure, total = _hinge_update(preds, target, squared=squared, multiclass_mode=multiclass_mode) + return _hinge_compute(measure, total) diff --git a/RE/paddlemetric/src/paddlemetrics/functional/classification/iou.py b/RE/paddlemetric/src/paddlemetrics/functional/classification/iou.py new file mode 100644 index 00000000..b7cf6077 --- /dev/null +++ b/RE/paddlemetric/src/paddlemetrics/functional/classification/iou.py @@ -0,0 +1,133 @@ +# Copyright The PyTorch Lightning team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from typing import Optional + +import paddleext.torchapi as B +from paddleext.torchapi import Tensor + +from paddlemetrics.functional.classification.confusion_matrix import _confusion_matrix_update +from paddlemetrics.utilities.data import get_num_classes +from paddlemetrics.utilities.distributed import reduce + + +def _iou_from_confmat( + confmat: Tensor, + num_classes: int, + ignore_index: Optional[int] = None, + absent_score: float = 0.0, + reduction: str = "elementwise_mean", +) -> Tensor: + """Computes the intersection over union from confusion matrix. + + Args: + confmat: Confusion matrix without normalization + num_classes: Number of classes for a given prediction and target tensor + ignore_index: optional int specifying a target class to ignore. If given, this class index does not contribute + to the returned score, regardless of reduction method. + absent_score: score to use for an individual class, if no instances of the class index were present in `pred` + AND no instances of the class index were present in `target`. + reduction: a method to reduce metric score over labels. + + - ``'elementwise_mean'``: takes the mean (default) + - ``'sum'``: takes the sum + - ``'none'``: no reduction will be applied + """ + + # Remove the ignored class index from the scores. + if ignore_index is not None and 0 <= ignore_index < num_classes: + confmat[ignore_index] = 0.0 + + intersection = B.diag(confmat) + union = confmat.sum(0) + confmat.sum(1) - intersection + + # If this class is absent in both target AND pred (union == 0), then use the absent_score for this class. + scores = intersection.float() / union.float() + scores[union == 0] = absent_score + + if ignore_index is not None and 0 <= ignore_index < num_classes: + scores = B.cat( + [ + scores[:ignore_index], + scores[ignore_index + 1 :], + ] + ) + + return reduce(scores, reduction=reduction) + + +def iou( + preds: Tensor, + target: Tensor, + ignore_index: Optional[int] = None, + absent_score: float = 0.0, + threshold: float = 0.5, + num_classes: Optional[int] = None, + reduction: str = "elementwise_mean", +) -> Tensor: + r""" + Computes `Jaccard index`_ + + .. math:: J(A,B) = \frac{|A\cap B|}{|A\cup B|} + + Where: :math:`A` and :math:`B` are both tensors of the same size, + containing integer class values. They may be subject to conversion from + input data (see description below). + + Note that it is different from box IoU. + + If preds and target are the same shape and preds is a float tensor, we use the ``self.threshold`` argument + to convert into integer labels. This is the case for binary and multi-label probabilities. + + If pred has an extra dimension as in the case of multi-class scores we + perform an argmax on ``dim=1``. + + Args: + preds: tensor containing predictions from model (probabilities, or labels) with shape ``[N, d1, d2, ...]`` + target: tensor containing ground truth labels with shape ``[N, d1, d2, ...]`` + ignore_index: optional int specifying a target class to ignore. If given, + this class index does not contribute to the returned score, regardless + of reduction method. Has no effect if given an int that is not in the + range [0, num_classes-1], where num_classes is either given or derived + from pred and target. By default, no index is ignored, and all classes are used. + absent_score: score to use for an individual class, if no instances of + the class index were present in `pred` AND no instances of the class + index were present in `target`. For example, if we have 3 classes, + [0, 0] for `pred`, and [0, 2] for `target`, then class 1 would be + assigned the `absent_score`. + threshold: + Threshold value for binary or multi-label probabilities. default: 0.5 + num_classes: + Optionally specify the number of classes + reduction: a method to reduce metric score over labels. + + - ``'elementwise_mean'``: takes the mean (default) + - ``'sum'``: takes the sum + - ``'none'``: no reduction will be applied + + Return: + IoU score: Tensor containing single value if reduction is + 'elementwise_mean', or number of classes if reduction is 'none' + + Example: + >>> from paddlemetrics.functional import iou + >>> target = B.randint(0, 2, (10, 25, 25)) + >>> pred = B.tensor(target) + >>> pred[2:5, 7:13, 9:15] = 1 - pred[2:5, 7:13, 9:15] + >>> iou(pred, target) + tensor(0.9660) + """ + + num_classes = get_num_classes(preds=preds, target=target, num_classes=num_classes) + confmat = _confusion_matrix_update(preds, target, num_classes, threshold) + return _iou_from_confmat(confmat, num_classes, ignore_index, absent_score, reduction) diff --git a/RE/paddlemetric/src/paddlemetrics/functional/classification/kl_divergence.py b/RE/paddlemetric/src/paddlemetrics/functional/classification/kl_divergence.py new file mode 100644 index 00000000..0d7685c1 --- /dev/null +++ b/RE/paddlemetric/src/paddlemetrics/functional/classification/kl_divergence.py @@ -0,0 +1,110 @@ +# Copyright The PyTorch Lightning team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from typing import Optional, Tuple + +import paddleext.torchapi as B +from paddleext.torchapi import Tensor + +from paddlemetrics.utilities.checks import _check_same_shape +from paddlemetrics.utilities.data import METRIC_EPS + + +def _kld_update(p: Tensor, q: Tensor, log_prob: bool) -> Tuple[Tensor, int]: + """Updates and returns KL divergence scores for each observation and the total number of observations. Checks + same shape and 2D nature of the input tensors else raises ValueError. + + Args: + p: data distribution with shape ``[N, d]`` + q: prior or approximate distribution with shape ``[N, d]`` + log_prob: bool indicating if input is log-probabilities or probabilities. If given as probabilities, + will normalize to make sure the distributes sum to 1 + """ + _check_same_shape(p, q) + if p.ndim != 2 or q.ndim != 2: + raise ValueError(f"Expected both p and q distribution to be 2D but got {p.ndim} and {q.ndim} respectively") + + total = p.shape[0] + if log_prob: + measures = B.sum(p.exp() * (p - q), axis=-1) + else: + p = p / p.sum(axis=-1, keepdim=True) + q = q / q.sum(axis=-1, keepdim=True) + q = B.clamp(q, METRIC_EPS) + measures = B.sum(p * B.log(p / q), axis=-1) + + return measures, total + + +def _kld_compute(measures: Tensor, total: Tensor, reduction: Optional[str] = "mean") -> Tensor: + """Computes the KL divergenece based on the type of reduction. + + Args: + measures: Tensor of KL divergence scores for each observation + total: Number of observations + reduction: + Determines how to reduce over the ``N``/batch dimension: + + - ``'mean'`` [default]: Averages score across samples + - ``'sum'``: Sum score across samples + - ``'none'`` or ``None``: Returns score per sample + + Example: + >>> p = B.tensor([[0.36, 0.48, 0.16]]) + >>> q = B.tensor([[1/3, 1/3, 1/3]]) + >>> measures, total = _kld_update(p, q, log_prob=False) + >>> _kld_compute(measures, total) + tensor(0.0853) + """ + + if reduction == "sum": + return measures.sum() + if reduction == "mean": + return measures.sum() / total + if reduction is None or reduction == "none": + return measures + return measures / total + + +def kl_divergence(p: Tensor, q: Tensor, log_prob: bool = False, reduction: Optional[str] = "mean") -> Tensor: + r"""Computes `KL divergence`_ + + .. math:: + D_{KL}(P||Q) = \sum_{x\in\mathcal{X}} P(x) \log\frac{P(x)}{Q{x}} + + Where :math:`P` and :math:`Q` are probability distributions where :math:`P` usually represents a distribution + over data and :math:`Q` is often a prior or approximation of :math:`P`. It should be noted that the KL divergence + is a non-symetrical metric i.e. :math:`D_{KL}(P||Q) \neq D_{KL}(Q||P)`. + + Args: + p: data distribution with shape ``[N, d]`` + q: prior or approximate distribution with shape ``[N, d]`` + log_prob: bool indicating if input is log-probabilities or probabilities. If given as probabilities, + will normalize to make sure the distributes sum to 1 + reduction: + Determines how to reduce over the ``N``/batch dimension: + + - ``'mean'`` [default]: Averages score across samples + - ``'sum'``: Sum score across samples + - ``'none'`` or ``None``: Returns score per sample + + Example: + >>> import torchapi as B + >>> p = B.tensor([[0.36, 0.48, 0.16]]) + >>> q = B.tensor([[1/3, 1/3, 1/3]]) + >>> kl_divergence(p, q) + tensor(0.0853) + """ + measures, total = _kld_update(p, q, log_prob) + return _kld_compute(measures, total, reduction) diff --git a/RE/paddlemetric/src/paddlemetrics/functional/classification/matthews_corrcoef.py b/RE/paddlemetric/src/paddlemetrics/functional/classification/matthews_corrcoef.py new file mode 100644 index 00000000..8532a358 --- /dev/null +++ b/RE/paddlemetric/src/paddlemetrics/functional/classification/matthews_corrcoef.py @@ -0,0 +1,78 @@ +# Copyright The PyTorch Lightning team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import paddleext.torchapi as B +from paddleext.torchapi import Tensor + +from paddlemetrics.functional.classification.confusion_matrix import _confusion_matrix_update + +_matthews_corrcoef_update = _confusion_matrix_update + + +def _matthews_corrcoef_compute(confmat: Tensor) -> Tensor: + """Computes Matthews correlation coefficient. + + Args: + confmat: Confusion matrix + + Example: + >>> target = B.tensor([1, 1, 0, 0]) + >>> preds = B.tensor([0, 1, 0, 0]) + >>> confmat = _matthews_corrcoef_update(preds, target, num_classes=2) + >>> _matthews_corrcoef_compute(confmat) + tensor(0.5774) + """ + + tk = confmat.sum(dim=1).float() + pk = confmat.sum(dim=0).float() + c = B.trace(confmat).float() + s = confmat.sum().float() + return (c * s - sum(tk * pk)) / (B.sqrt(s ** 2 - sum(pk * pk)) * B.sqrt(s ** 2 - sum(tk * tk))) + + +def matthews_corrcoef( + preds: Tensor, + target: Tensor, + num_classes: int, + threshold: float = 0.5, +) -> Tensor: + r""" + Calculates `Matthews correlation coefficient`_ that measures + the general correlation or quality of a classification. In the binary case it + is defined as: + + .. math:: + MCC = \frac{TP*TN - FP*FN}{\sqrt{(TP+FP)*(TP+FN)*(TN+FP)*(TN+FN)}} + + where TP, TN, FP and FN are respectively the true postitives, true negatives, + false positives and false negatives. Also works in the case of multi-label or + multi-class input. + + Args: + preds: (float or long tensor), Either a ``(N, ...)`` tensor with labels or + ``(N, C, ...)`` where C is the number of classes, tensor with labels/probabilities + target: ``target`` (long tensor), tensor with shape ``(N, ...)`` with ground true labels + num_classes: Number of classes in the dataset. + threshold: + Threshold value for binary or multi-label probabilities. default: 0.5 + + Example: + >>> from paddlemetrics.functional import matthews_corrcoef + >>> target = B.tensor([1, 1, 0, 0]) + >>> preds = B.tensor([0, 1, 0, 0]) + >>> matthews_corrcoef(preds, target, num_classes=2) + tensor(0.5774) + + """ + confmat = _matthews_corrcoef_update(preds, target, num_classes, threshold) + return _matthews_corrcoef_compute(confmat) diff --git a/RE/paddlemetric/src/paddlemetrics/functional/classification/precision_recall.py b/RE/paddlemetric/src/paddlemetrics/functional/classification/precision_recall.py new file mode 100644 index 00000000..4b8528dc --- /dev/null +++ b/RE/paddlemetric/src/paddlemetrics/functional/classification/precision_recall.py @@ -0,0 +1,568 @@ +# Copyright The PyTorch Lightning team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from typing import Optional, Tuple + +import paddleext.torchapi as B +from paddleext.torchapi import Tensor + +from paddlemetrics.functional.classification.stat_scores import _reduce_stat_scores, _stat_scores_update +from paddlemetrics.utilities.enums import AverageMethod, MDMCAverageMethod + + +def _precision_compute( + tp: Tensor, + fp: Tensor, + fn: Tensor, + average: str, + mdmc_average: Optional[str], +) -> Tensor: + """Computes precision from the stat scores: true positives, false positives, true negatives, false negatives. + + Args: + tp: True positives + fp: False positives + fn: False negatives + average: Defines the reduction that is applied + mdmc_average: Defines how averaging is done for multi-dimensional multi-class inputs (on top of the + ``average`` parameter) + + Example: + >>> from paddlemetrics.functional.classification.stat_scores import _stat_scores_update + >>> preds = B.tensor([2, 0, 2, 1]) + >>> target = B.tensor([1, 1, 2, 0]) + >>> tp, fp, tn, fn = _stat_scores_update( preds, target, reduce='macro', num_classes=3) + >>> _precision_compute(tp, fp, fn, average='macro', mdmc_average=None) + tensor(0.1667) + >>> tp, fp, tn, fn = _stat_scores_update(preds, target, reduce='micro') + >>> _precision_compute(tp, fp, fn, average='micro', mdmc_average=None) + tensor(0.2500) + """ + + numerator = tp + denominator = tp + fp + + if average == AverageMethod.MACRO and mdmc_average != MDMCAverageMethod.SAMPLEWISE: + cond = tp + fp + fn == 0 + numerator = numerator[~cond] + denominator = denominator[~cond] + + if average == AverageMethod.NONE and mdmc_average != MDMCAverageMethod.SAMPLEWISE: + # a class is not present if there exists no TPs, no FPs, and no FNs + meaningless_indeces = B.nonzero((tp | fn | fp) == 0).cpu() + numerator[meaningless_indeces, ...] = -1 + denominator[meaningless_indeces, ...] = -1 + + return _reduce_stat_scores( + numerator=numerator, + denominator=denominator, + weights=None if average != "weighted" else tp + fn, + average=average, + mdmc_average=mdmc_average, + ) + + +def precision( + preds: Tensor, + target: Tensor, + average: str = "micro", + mdmc_average: Optional[str] = None, + ignore_index: Optional[int] = None, + num_classes: Optional[int] = None, + threshold: float = 0.5, + top_k: Optional[int] = None, + multiclass: Optional[bool] = None, +) -> Tensor: + r""" + Computes `Precision`_ + + .. math:: \text{Precision} = \frac{\text{TP}}{\text{TP} + \text{FP}} + + Where :math:`\text{TP}` and :math:`\text{FP}` represent the number of true positives and + false positives respecitively. With the use of ``top_k`` parameter, this metric can + generalize to Precision@K. + + The reduction method (how the precision scores are aggregated) is controlled by the + ``average`` parameter, and additionally by the ``mdmc_average`` parameter in the + multi-dimensional multi-class case. Accepts all inputs listed in :ref:`references/modules:input types`. + + Args: + preds: Predictions from model (probabilities, logits or labels) + target: Ground truth values + average: + Defines the reduction that is applied. Should be one of the following: + + - ``'micro'`` [default]: Calculate the metric globally, across all samples and classes. + - ``'macro'``: Calculate the metric for each class separately, and average the + metrics across classes (with equal weights for each class). + - ``'weighted'``: Calculate the metric for each class separately, and average the + metrics across classes, weighting each class by its support (``tp + fn``). + - ``'none'`` or ``None``: Calculate the metric for each class separately, and return + the metric for every class. + - ``'samples'``: Calculate the metric for each sample, and average the metrics + across samples (with equal weights for each sample). + + .. note:: What is considered a sample in the multi-dimensional multi-class case + depends on the value of ``mdmc_average``. + + .. note:: If ``'none'`` and a given class doesn't occur in the `preds` or `target`, + the value for the class will be ``nan``. + + mdmc_average: + Defines how averaging is done for multi-dimensional multi-class inputs (on top of the + ``average`` parameter). Should be one of the following: + + - ``None`` [default]: Should be left unchanged if your data is not multi-dimensional + multi-class. + + - ``'samplewise'``: In this case, the statistics are computed separately for each + sample on the ``N`` axis, and then averaged over samples. + The computation for each sample is done by treating the flattened extra axes ``...`` + (see :ref:`references/modules:input types`) as the ``N`` dimension within the sample, + and computing the metric for the sample based on that. + + - ``'global'``: In this case the ``N`` and ``...`` dimensions of the inputs + (see :ref:`references/modules:input types`) + are flattened into a new ``N_X`` sample axis, i.e. the inputs are treated as if they + were ``(N_X, C)``. From here on the ``average`` parameter applies as usual. + + ignore_index: + Integer specifying a target class to ignore. If given, this class index does not contribute + to the returned score, regardless of reduction method. If an index is ignored, and ``average=None`` + or ``'none'``, the score for the ignored class will be returned as ``nan``. + + num_classes: + Number of classes. Necessary for ``'macro'``, ``'weighted'`` and ``None`` average methods. + + threshold: + Threshold for transforming probability or logit predictions to binary (0,1) predictions, in the case + of binary or multi-label inputs. Default value of 0.5 corresponds to input being probabilities. + top_k: + Number of highest probability or logit score predictions considered to find the correct label, + relevant only for (multi-dimensional) multi-class inputs. The + default value (``None``) will be interpreted as 1 for these inputs. + + Should be left at default (``None``) for all other types of inputs. + multiclass: + Used only in certain special cases, where you want to treat inputs as a different type + than what they appear to be. See the parameter's + :ref:`documentation section ` + for a more detailed explanation and examples. + + Return: + The shape of the returned tensor depends on the ``average`` parameter + + - If ``average in ['micro', 'macro', 'weighted', 'samples']``, a one-element tensor will be returned + - If ``average in ['none', None]``, the shape will be ``(C,)``, where ``C`` stands for the number + of classes + + Raises: + ValueError: + If ``average`` is not one of ``"micro"``, ``"macro"``, ``"weighted"``, + ``"samples"``, ``"none"`` or ``None``. + ValueError: + If ``mdmc_average`` is not one of ``None``, ``"samplewise"``, ``"global"``. + ValueError: + If ``average`` is set but ``num_classes`` is not provided. + ValueError: + If ``num_classes`` is set + and ``ignore_index`` is not in the range ``[0, num_classes)``. + + Example: + >>> from paddlemetrics.functional import precision + >>> preds = B.tensor([2, 0, 2, 1]) + >>> target = B.tensor([1, 1, 2, 0]) + >>> precision(preds, target, average='macro', num_classes=3) + tensor(0.1667) + >>> precision(preds, target, average='micro') + tensor(0.2500) + + """ + allowed_average = ["micro", "macro", "weighted", "samples", "none", None] + if average not in allowed_average: + raise ValueError(f"The `average` has to be one of {allowed_average}, got {average}.") + + allowed_mdmc_average = [None, "samplewise", "global"] + if mdmc_average not in allowed_mdmc_average: + raise ValueError(f"The `mdmc_average` has to be one of {allowed_mdmc_average}, got {mdmc_average}.") + + if average in ["macro", "weighted", "none", None] and (not num_classes or num_classes < 1): + raise ValueError(f"When you set `average` as {average}, you have to provide the number of classes.") + + if num_classes and ignore_index is not None and (not 0 <= ignore_index < num_classes or num_classes == 1): + raise ValueError(f"The `ignore_index` {ignore_index} is not valid for inputs with {num_classes} classes") + + reduce = "macro" if average in ["weighted", "none", None] else average + tp, fp, _, fn = _stat_scores_update( + preds, + target, + reduce=reduce, + mdmc_reduce=mdmc_average, + threshold=threshold, + num_classes=num_classes, + top_k=top_k, + multiclass=multiclass, + ignore_index=ignore_index, + ) + + return _precision_compute(tp, fp, fn, average, mdmc_average) + + +def _recall_compute( + tp: Tensor, + fp: Tensor, + fn: Tensor, + average: str, + mdmc_average: Optional[str], +) -> Tensor: + """Computes precision from the stat scores: true positives, false positives, true negatives, false negatives. + + Args: + tp: True positives + fp: False positives + fn: False negatives + average: Defines the reduction that is applied + mdmc_average: Defines how averaging is done for multi-dimensional multi-class inputs (on top of the + ``average`` parameter) + + Example: + >>> from paddlemetrics.functional.classification.stat_scores import _stat_scores_update + >>> preds = B.tensor([2, 0, 2, 1]) + >>> target = B.tensor([1, 1, 2, 0]) + >>> tp, fp, tn, fn = _stat_scores_update(preds, target, reduce='macro', num_classes=3) + >>> _recall_compute(tp, fp, fn, average='macro', mdmc_average=None) + tensor(0.3333) + >>> tp, fp, tn, fn = _stat_scores_update(preds, target, reduce='micro') + >>> _recall_compute(tp, fp, fn, average='micro', mdmc_average=None) + tensor(0.2500) + """ + numerator = tp + denominator = tp + fn + + if average == AverageMethod.MACRO and mdmc_average != MDMCAverageMethod.SAMPLEWISE: + cond = tp + fp + fn == 0 + numerator = numerator[~cond] + denominator = denominator[~cond] + + if average == AverageMethod.NONE and mdmc_average != MDMCAverageMethod.SAMPLEWISE: + # a class is not present if there exists no TPs, no FPs, and no FNs + meaningless_indeces = ((tp | fn | fp) == 0).nonzero().cpu() + numerator[meaningless_indeces, ...] = -1 + denominator[meaningless_indeces, ...] = -1 + + return _reduce_stat_scores( + numerator=numerator, + denominator=denominator, + weights=None if average != AverageMethod.WEIGHTED else tp + fn, + average=average, + mdmc_average=mdmc_average, + ) + + +def recall( + preds: Tensor, + target: Tensor, + average: str = "micro", + mdmc_average: Optional[str] = None, + ignore_index: Optional[int] = None, + num_classes: Optional[int] = None, + threshold: float = 0.5, + top_k: Optional[int] = None, + multiclass: Optional[bool] = None, +) -> Tensor: + r""" + Computes `Recall`_ + + .. math:: \text{Recall} = \frac{\text{TP}}{\text{TP} + \text{FN}} + + Where :math:`\text{TP}` and :math:`\text{FN}` represent the number of true positives and + false negatives respecitively. With the use of ``top_k`` parameter, this metric can + generalize to Recall@K. + + The reduction method (how the recall scores are aggregated) is controlled by the + ``average`` parameter, and additionally by the ``mdmc_average`` parameter in the + multi-dimensional multi-class case. Accepts all inputs listed in :ref:`references/modules:input types`. + + Args: + preds: Predictions from model (probabilities, logits or labels) + target: Ground truth values + average: + Defines the reduction that is applied. Should be one of the following: + + - ``'micro'`` [default]: Calculate the metric globally, across all samples and classes. + - ``'macro'``: Calculate the metric for each class separately, and average the + metrics across classes (with equal weights for each class). + - ``'weighted'``: Calculate the metric for each class separately, and average the + metrics across classes, weighting each class by its support (``tp + fn``). + - ``'none'`` or ``None``: Calculate the metric for each class separately, and return + the metric for every class. + - ``'samples'``: Calculate the metric for each sample, and average the metrics + across samples (with equal weights for each sample). + + .. note:: What is considered a sample in the multi-dimensional multi-class case + depends on the value of ``mdmc_average``. + + .. note:: If ``'none'`` and a given class doesn't occur in the `preds` or `target`, + the value for the class will be ``nan``. + + mdmc_average: + Defines how averaging is done for multi-dimensional multi-class inputs (on top of the + ``average`` parameter). Should be one of the following: + + - ``None`` [default]: Should be left unchanged if your data is not multi-dimensional + multi-class. + + - ``'samplewise'``: In this case, the statistics are computed separately for each + sample on the ``N`` axis, and then averaged over samples. + The computation for each sample is done by treating the flattened extra axes ``...`` + (see :ref:`references/modules:input types`) as the ``N`` dimension within the sample, + and computing the metric for the sample based on that. + + - ``'global'``: In this case the ``N`` and ``...`` dimensions of the inputs + (see :ref:`references/modules:input types`) + are flattened into a new ``N_X`` sample axis, i.e. the inputs are treated as if they + were ``(N_X, C)``. From here on the ``average`` parameter applies as usual. + + ignore_index: + Integer specifying a target class to ignore. If given, this class index does not contribute + to the returned score, regardless of reduction method. If an index is ignored, and ``average=None`` + or ``'none'``, the score for the ignored class will be returned as ``nan``. + + num_classes: + Number of classes. Necessary for ``'macro'``, ``'weighted'`` and ``None`` average methods. + + threshold: + Threshold for transforming probability or logit predictions to binary (0,1) predictions, in the case + of binary or multi-label inputs. Default value of 0.5 corresponds to input being probabilities. + top_k: + Number of highest probability or logit score predictions considered to find the correct label, + relevant only for (multi-dimensional) multi-class inputs. The + default value (``None``) will be interpreted as 1 for these inputs. + + Should be left at default (``None``) for all other types of inputs. + multiclass: + Used only in certain special cases, where you want to treat inputs as a different type + than what they appear to be. See the parameter's + :ref:`documentation section ` + for a more detailed explanation and examples. + + Return: + The shape of the returned tensor depends on the ``average`` parameter + + - If ``average in ['micro', 'macro', 'weighted', 'samples']``, a one-element tensor will be returned + - If ``average in ['none', None]``, the shape will be ``(C,)``, where ``C`` stands for the number + of classes + + Raises: + ValueError: + If ``average`` is not one of ``"micro"``, ``"macro"``, ``"weighted"``, + ``"samples"``, ``"none"`` or ``None``. + ValueError: + If ``mdmc_average`` is not one of ``None``, ``"samplewise"``, ``"global"``. + ValueError: + If ``average`` is set but ``num_classes`` is not provided. + ValueError: + If ``num_classes`` is set + and ``ignore_index`` is not in the range ``[0, num_classes)``. + + Example: + >>> from paddlemetrics.functional import recall + >>> preds = B.tensor([2, 0, 2, 1]) + >>> target = B.tensor([1, 1, 2, 0]) + >>> recall(preds, target, average='macro', num_classes=3) + tensor(0.3333) + >>> recall(preds, target, average='micro') + tensor(0.2500) + + """ + allowed_average = ["micro", "macro", "weighted", "samples", "none", None] + if average not in allowed_average: + raise ValueError(f"The `average` has to be one of {allowed_average}, got {average}.") + + allowed_mdmc_average = [None, "samplewise", "global"] + if mdmc_average not in allowed_mdmc_average: + raise ValueError("The `mdmc_average` has to be one of {allowed_mdmc_average}, got {mdmc_average}.") + + if average in ["macro", "weighted", "none", None] and (not num_classes or num_classes < 1): + raise ValueError(f"When you set `average` as {average}, you have to provide the number of classes.") + + if num_classes and ignore_index is not None and (not 0 <= ignore_index < num_classes or num_classes == 1): + raise ValueError(f"The `ignore_index` {ignore_index} is not valid for inputs with {num_classes} classes") + + reduce = "macro" if average in ["weighted", "none", None] else average + tp, fp, _, fn = _stat_scores_update( + preds, + target, + reduce=reduce, + mdmc_reduce=mdmc_average, + threshold=threshold, + num_classes=num_classes, + top_k=top_k, + multiclass=multiclass, + ignore_index=ignore_index, + ) + + return _recall_compute(tp, fp, fn, average, mdmc_average) + + +def precision_recall( + preds: Tensor, + target: Tensor, + average: str = "micro", + mdmc_average: Optional[str] = None, + ignore_index: Optional[int] = None, + num_classes: Optional[int] = None, + threshold: float = 0.5, + top_k: Optional[int] = None, + multiclass: Optional[bool] = None, +) -> Tuple[Tensor, Tensor]: + r""" + Computes `Precision`_ + + .. math:: \text{Precision} = \frac{\text{TP}}{\text{TP} + \text{FP}} + + + .. math:: \text{Recall} = \frac{\text{TP}}{\text{TP} + \text{FN}} + + Where :math:`\text{TP}`m :math:`\text{FN}` and :math:`\text{FP}` represent the number + of true positives, false negatives and false positives respecitively. With the use of + ``top_k`` parameter, this metric can generalize to Recall@K and Precision@K. + + The reduction method (how the recall scores are aggregated) is controlled by the + ``average`` parameter, and additionally by the ``mdmc_average`` parameter in the + multi-dimensional multi-class case. Accepts all inputs listed in :ref:`references/modules:input types`. + + Args: + preds: Predictions from model (probabilities, logits or labels) + target: Ground truth values + average: + Defines the reduction that is applied. Should be one of the following: + + - ``'micro'`` [default]: Calculate the metric globally, across all samples and classes. + - ``'macro'``: Calculate the metric for each class separately, and average the + metrics across classes (with equal weights for each class). + - ``'weighted'``: Calculate the metric for each class separately, and average the + metrics across classes, weighting each class by its support (``tp + fn``). + - ``'none'`` or ``None``: Calculate the metric for each class separately, and return + the metric for every class. + - ``'samples'``: Calculate the metric for each sample, and average the metrics + across samples (with equal weights for each sample). + + .. note:: What is considered a sample in the multi-dimensional multi-class case + depends on the value of ``mdmc_average``. + + .. note:: If ``'none'`` and a given class doesn't occur in the `preds` or `target`, + the value for the class will be ``nan``. + + mdmc_average: + Defines how averaging is done for multi-dimensional multi-class inputs (on top of the + ``average`` parameter). Should be one of the following: + + - ``None`` [default]: Should be left unchanged if your data is not multi-dimensional + multi-class. + + - ``'samplewise'``: In this case, the statistics are computed separately for each + sample on the ``N`` axis, and then averaged over samples. + The computation for each sample is done by treating the flattened extra axes ``...`` + (see :ref:`references/modules:input types`) as the ``N`` dimension within the sample, + and computing the metric for the sample based on that. + + - ``'global'``: In this case the ``N`` and ``...`` dimensions of the inputs + (see :ref:`references/modules:input types`) + are flattened into a new ``N_X`` sample axis, i.e. the inputs are treated as if they + were ``(N_X, C)``. From here on the ``average`` parameter applies as usual. + + ignore_index: + Integer specifying a target class to ignore. If given, this class index does not contribute + to the returned score, regardless of reduction method. If an index is ignored, and ``average=None`` + or ``'none'``, the score for the ignored class will be returned as ``nan``. + + num_classes: + Number of classes. Necessary for ``'macro'``, ``'weighted'`` and ``None`` average methods. + + threshold: + Threshold for transforming probability or logit predictions to binary (0,1) predictions, in the case + of binary or multi-label inputs. Default value of 0.5 corresponds to input being probabilities. + top_k: + Number of highest probability or logit score predictions considered to find the correct label, + relevant only for (multi-dimensional) multi-class inputs. The + default value (``None``) will be interpreted as 1 for these inputs. + + Should be left at default (``None``) for all other types of inputs. + multiclass: + Used only in certain special cases, where you want to treat inputs as a different type + than what they appear to be. See the parameter's + :ref:`documentation section ` + for a more detailed explanation and examples. + + Return: + The function returns a tuple with two elements: precision and recall. Their shape + depends on the ``average`` parameter + + - If ``average in ['micro', 'macro', 'weighted', 'samples']``, they are a single element tensor + - If ``average in ['none', None]``, they are a tensor of shape ``(C, )``, where ``C`` stands for + the number of classes + + Raises: + ValueError: + If ``average`` is not one of ``"micro"``, ``"macro"``, ``"weighted"``, + ``"samples"``, ``"none"`` or ``None``. + ValueError: + If ``mdmc_average`` is not one of ``None``, ``"samplewise"``, ``"global"``. + ValueError: + If ``average`` is set but ``num_classes`` is not provided. + ValueError: + If ``num_classes`` is set + and ``ignore_index`` is not in the range ``[0, num_classes)``. + + Example: + >>> from paddlemetrics.functional import precision_recall + >>> preds = B.tensor([2, 0, 2, 1]) + >>> target = B.tensor([1, 1, 2, 0]) + >>> precision_recall(preds, target, average='macro', num_classes=3) + (tensor(0.1667), tensor(0.3333)) + >>> precision_recall(preds, target, average='micro') + (tensor(0.2500), tensor(0.2500)) + + """ + allowed_average = ["micro", "macro", "weighted", "samples", "none", None] + if average not in allowed_average: + raise ValueError(f"The `average` has to be one of {allowed_average}, got {average}.") + + allowed_mdmc_average = [None, "samplewise", "global"] + if mdmc_average not in allowed_mdmc_average: + raise ValueError("The `mdmc_average` has to be one of {allowed_mdmc_average}, got {mdmc_average}.") + + if average in ["macro", "weighted", "none", None] and (not num_classes or num_classes < 1): + raise ValueError(f"When you set `average` as {average}, you have to provide the number of classes.") + + if num_classes and ignore_index is not None and (not 0 <= ignore_index < num_classes or num_classes == 1): + raise ValueError(f"The `ignore_index` {ignore_index} is not valid for inputs with {num_classes} classes") + + reduce = "macro" if average in ["weighted", "none", None] else average + tp, fp, _, fn = _stat_scores_update( + preds, + target, + reduce=reduce, + mdmc_reduce=mdmc_average, + threshold=threshold, + num_classes=num_classes, + top_k=top_k, + multiclass=multiclass, + ignore_index=ignore_index, + ) + + precision_ = _precision_compute(tp, fp, fn, average, mdmc_average) + recall_ = _recall_compute(tp, fp, fn, average, mdmc_average) + + return precision_, recall_ diff --git a/RE/paddlemetric/src/paddlemetrics/functional/classification/precision_recall_curve.py b/RE/paddlemetric/src/paddlemetrics/functional/classification/precision_recall_curve.py new file mode 100644 index 00000000..11b32500 --- /dev/null +++ b/RE/paddlemetric/src/paddlemetrics/functional/classification/precision_recall_curve.py @@ -0,0 +1,332 @@ +# Copyright The PyTorch Lightning team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from typing import List, Optional, Sequence, Tuple, Union + +import paddleext.torchapi as B +from paddleext.torchapi import Tensor, tensor + +from paddlemetrics.utilities import rank_zero_warn + + +def _binary_clf_curve( + preds: Tensor, + target: Tensor, + sample_weights: Optional[Sequence] = None, + pos_label: int = 1, +) -> Tuple[Tensor, Tensor, Tensor]: + """adapted from https://github.com/scikit-learn/scikit- learn/blob/master/sklearn/metrics/_ranking.py.""" + if sample_weights is not None and not isinstance(sample_weights, Tensor): + sample_weights = tensor(sample_weights, device=preds.device, dtype=B.float) + + # remove class dimension if necessary + if preds.ndim > target.ndim: + preds = preds[:, 0] + desc_score_indices = B.argsort(preds, descending=True) + + preds = preds[desc_score_indices] + target = target[desc_score_indices] + + if sample_weights is not None: + weight = sample_weights[desc_score_indices] + else: + weight = 1.0 + + # pred typically has many tied values. Here we extract + # the indices associated with the distinct values. We also + # concatenate a value for the end of the curve. + distinct_value_indices = B.where(preds[1:] - preds[:-1])[0] + threshold_idxs = B.nn.functional.pad(distinct_value_indices, [0, 1], value=target.size(0) - 1) + target = (target == pos_label).to(B.long) + tps = B.cumsum(target * weight, dim=0)[threshold_idxs] + + if sample_weights is not None: + # express fps as a cumsum to ensure fps is increasing even in + # the presence of floating point errors + fps = B.cumsum((1 - target) * weight, dim=0)[threshold_idxs] + else: + fps = 1 + threshold_idxs - tps + + return fps, tps, preds[threshold_idxs] + + +def _precision_recall_curve_update( + preds: Tensor, + target: Tensor, + num_classes: Optional[int] = None, + pos_label: Optional[int] = None, +) -> Tuple[Tensor, Tensor, int, Optional[int]]: + """Updates and returns variables required to compute the precision-recall pairs for different thresholds. + + Args: + preds: Predicted tensor + target: Ground truth tensor + num_classes: integer with number of classes for multi-label and multiclass problems. + Should be set to ``None`` for binary problems + pos_label: integer determining the positive class. Default is ``None`` + which for binary problem is translate to 1. For multiclass problems + this argument should not be set as we iteratively change it in the + range [0,num_classes-1] + """ + + if len(preds.shape) == len(target.shape): + if pos_label is None: + pos_label = 1 + if num_classes is not None and num_classes != 1: + # multilabel problem + if num_classes != preds.shape[1]: + raise ValueError( + f"Argument `num_classes` was set to {num_classes} in" + f" metric `precision_recall_curve` but detected {preds.shape[1]}" + " number of classes from predictions" + ) + preds = preds.transpose(0, 1).reshape(num_classes, -1).transpose(0, 1) + target = target.transpose(0, 1).reshape(num_classes, -1).transpose(0, 1) + else: + # binary problem + preds = preds.flatten() + target = target.flatten() + num_classes = 1 + + # multi class problem + elif len(preds.shape) == len(target.shape) + 1: + if pos_label is not None: + rank_zero_warn( + "Argument `pos_label` should be `None` when running" + f" multiclass precision recall curve. Got {pos_label}" + ) + if num_classes != preds.shape[1]: + raise ValueError( + f"Argument `num_classes` was set to {num_classes} in" + f" metric `precision_recall_curve` but detected {preds.shape[1]}" + " number of classes from predictions" + ) + preds = preds.transpose(0, 1).reshape(num_classes, -1).transpose(0, 1) + target = target.flatten() + + else: + raise ValueError("preds and target must have same number of dimensions, or one additional dimension for preds") + + return preds, target, num_classes, pos_label + + +def _precision_recall_curve_compute_single_class( + preds: Tensor, + target: Tensor, + pos_label: int, + sample_weights: Optional[Sequence] = None, +) -> Tuple[Tensor, Tensor, Tensor]: + """Computes precision-recall pairs for single class inputs. + + Args: + preds: Predicted tensor + target: Ground truth tensor + pos_label: integer determining the positive class. + sample_weights: sample weights for each data point + """ + + fps, tps, thresholds = _binary_clf_curve( + preds=preds, target=target, sample_weights=sample_weights, pos_label=pos_label + ) + precision = tps / (tps + fps) + recall = tps / tps[-1] + + # stop when full recall attained and reverse the outputs so recall is decreasing + last_ind = B.where(tps == tps[-1])[0][0] + sl = slice(0, last_ind.item() + 1) + + # need to call reversed explicitly, since including that to slice would + # introduce negative strides that are not yet supported in pytorch + precision = B.cat([reversed(precision[sl]), B.ones(1, dtype=precision.dtype, device=precision.device)]) + + recall = B.cat([reversed(recall[sl]), B.zeros(1, dtype=recall.dtype, device=recall.device)]) + + thresholds = reversed(thresholds[sl]).detach().clone() # type: ignore + + return precision, recall, thresholds + + +def _precision_recall_curve_compute_multi_class( + preds: Tensor, + target: Tensor, + num_classes: int, + sample_weights: Optional[Sequence] = None, +) -> Tuple[List[Tensor], List[Tensor], List[Tensor]]: + """Computes precision-recall pairs for multi class inputs. + + Args: + preds: Predicted tensor + target: Ground truth tensor + num_classes: integer with number of classes for multi-label and multiclass problems. + Should be set to ``None`` for binary problems + sample_weights: sample weights for each data point + """ + + # Recursively call per class + precision, recall, thresholds = [], [], [] + for cls in range(num_classes): + preds_cls = preds[:, cls] + + prc_args = dict( + preds=preds_cls, + target=target, + num_classes=1, + pos_label=cls, + sample_weights=sample_weights, + ) + if target.ndim > 1: + prc_args.update( + dict( + target=target[:, cls], + pos_label=1, + ) + ) + res = precision_recall_curve(**prc_args) + precision.append(res[0]) + recall.append(res[1]) + thresholds.append(res[2]) + + return precision, recall, thresholds + + +def _precision_recall_curve_compute( + preds: Tensor, + target: Tensor, + num_classes: int, + pos_label: Optional[int] = None, + sample_weights: Optional[Sequence] = None, +) -> Union[Tuple[Tensor, Tensor, Tensor], Tuple[List[Tensor], List[Tensor], List[Tensor]]]: + """Computes precision-recall pairs based on the number of classes. + + Args: + preds: Predicted tensor + target: Ground truth tensor + num_classes: integer with number of classes for multi-label and multiclass problems. + Should be set to ``None`` for binary problems + pos_label: integer determining the positive class. Default is ``None`` + which for binary problem is translate to 1. For multiclass problems + this argument should not be set as we iteratively change it in the + range [0,num_classes-1] + sample_weights: sample weights for each data point + + Example: + >>> # binary case + >>> preds = B.tensor([0, 1, 2, 3]) + >>> target = B.tensor([0, 1, 1, 0]) + >>> pos_label = 1 + >>> preds, target, num_classes, pos_label = _precision_recall_curve_update(preds, target, pos_label=pos_label) + >>> precision, recall, thresholds = _precision_recall_curve_compute(preds, target, num_classes, pos_label) + >>> precision + tensor([0.6667, 0.5000, 0.0000, 1.0000]) + >>> recall + tensor([1.0000, 0.5000, 0.0000, 0.0000]) + >>> thresholds + tensor([1, 2, 3]) + + >>> # multiclass case + >>> preds = B.tensor([[0.75, 0.05, 0.05, 0.05, 0.05], + ... [0.05, 0.75, 0.05, 0.05, 0.05], + ... [0.05, 0.05, 0.75, 0.05, 0.05], + ... [0.05, 0.05, 0.05, 0.75, 0.05]]) + >>> target = B.tensor([0, 1, 3, 2]) + >>> num_classes = 5 + >>> preds, target, num_classes, pos_label = _precision_recall_curve_update(preds, target, num_classes) + >>> precision, recall, thresholds = _precision_recall_curve_compute(preds, target, num_classes) + >>> precision # doctest: +NORMALIZE_WHITESPACE + [tensor([1., 1.]), tensor([1., 1.]), tensor([0.2500, 0.0000, 1.0000]), + tensor([0.2500, 0.0000, 1.0000]), tensor([0., 1.])] + >>> recall + [tensor([1., 0.]), tensor([1., 0.]), tensor([1., 0., 0.]), tensor([1., 0., 0.]), tensor([nan, 0.])] + >>> thresholds + [tensor([0.7500]), tensor([0.7500]), tensor([0.0500, 0.7500]), tensor([0.0500, 0.7500]), tensor([0.0500])] + """ + + with B.no_grad(): + if num_classes == 1: + if pos_label is None: + pos_label = 1 + return _precision_recall_curve_compute_single_class(preds, target, pos_label, sample_weights) + return _precision_recall_curve_compute_multi_class(preds, target, num_classes, sample_weights) + + +def precision_recall_curve( + preds: Tensor, + target: Tensor, + num_classes: Optional[int] = None, + pos_label: Optional[int] = None, + sample_weights: Optional[Sequence] = None, +) -> Union[Tuple[Tensor, Tensor, Tensor], Tuple[List[Tensor], List[Tensor], List[Tensor]]]: + """Computes precision-recall pairs for different thresholds. + + Args: + preds: predictions from model (probabilities) + target: ground truth labels + num_classes: integer with number of classes for multi-label and multiclass problems. + Should be set to ``None`` for binary problems + pos_label: integer determining the positive class. Default is ``None`` + which for binary problem is translate to 1. For multiclass problems + this argument should not be set as we iteratively change it in the + range [0,num_classes-1] + sample_weights: sample weights for each data point + + Returns: + 3-element tuple containing + + precision: + tensor where element i is the precision of predictions with + score >= thresholds[i] and the last element is 1. + If multiclass, this is a list of such tensors, one for each class. + recall: + tensor where element i is the recall of predictions with + score >= thresholds[i] and the last element is 0. + If multiclass, this is a list of such tensors, one for each class. + thresholds: + Thresholds used for computing precision/recall scores + + Raises: + ValueError: + If ``preds`` and ``target`` don't have the same number of dimensions, + or one additional dimension for ``preds``. + ValueError: + If the number of classes deduced from ``preds`` is not the same as the + ``num_classes`` provided. + + Example (binary case): + >>> from paddlemetrics.functional import precision_recall_curve + >>> pred = B.tensor([0, 1, 2, 3]) + >>> target = B.tensor([0, 1, 1, 0]) + >>> precision, recall, thresholds = precision_recall_curve(pred, target, pos_label=1) + >>> precision + tensor([0.6667, 0.5000, 0.0000, 1.0000]) + >>> recall + tensor([1.0000, 0.5000, 0.0000, 0.0000]) + >>> thresholds + tensor([1, 2, 3]) + + Example (multiclass case): + >>> pred = B.tensor([[0.75, 0.05, 0.05, 0.05, 0.05], + ... [0.05, 0.75, 0.05, 0.05, 0.05], + ... [0.05, 0.05, 0.75, 0.05, 0.05], + ... [0.05, 0.05, 0.05, 0.75, 0.05]]) + >>> target = B.tensor([0, 1, 3, 2]) + >>> precision, recall, thresholds = precision_recall_curve(pred, target, num_classes=5) + >>> precision # doctest: +NORMALIZE_WHITESPACE + [tensor([1., 1.]), tensor([1., 1.]), tensor([0.2500, 0.0000, 1.0000]), + tensor([0.2500, 0.0000, 1.0000]), tensor([0., 1.])] + >>> recall + [tensor([1., 0.]), tensor([1., 0.]), tensor([1., 0., 0.]), tensor([1., 0., 0.]), tensor([nan, 0.])] + >>> thresholds + [tensor([0.7500]), tensor([0.7500]), tensor([0.0500, 0.7500]), tensor([0.0500, 0.7500]), tensor([0.0500])] + """ + preds, target, num_classes, pos_label = _precision_recall_curve_update(preds, target, num_classes, pos_label) + return _precision_recall_curve_compute(preds, target, num_classes, pos_label, sample_weights) diff --git a/RE/paddlemetric/src/paddlemetrics/functional/classification/roc.py b/RE/paddlemetric/src/paddlemetrics/functional/classification/roc.py new file mode 100644 index 00000000..86f4e2a4 --- /dev/null +++ b/RE/paddlemetric/src/paddlemetrics/functional/classification/roc.py @@ -0,0 +1,273 @@ +# Copyright The PyTorch Lightning team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from typing import List, Optional, Sequence, Tuple, Union + +import paddleext.torchapi as B +from paddleext.torchapi import Tensor + +from paddlemetrics.functional.classification.precision_recall_curve import ( + _binary_clf_curve, + _precision_recall_curve_update, +) + + +def _roc_update( + preds: Tensor, + target: Tensor, + num_classes: Optional[int] = None, + pos_label: Optional[int] = None, +) -> Tuple[Tensor, Tensor, int, Optional[int]]: + """Updates and returns variables required to compute the Receiver Operating Characteristic. + + Args: + preds: Predicted tensor + target: Ground truth tensor + num_classes: integer with number of classes for multi-label and multiclass problems. + Should be set to ``None`` for binary problems + pos_label: integer determining the positive class. Default is ``None`` + which for binary problem is translate to 1. For multiclass problems + this argument should not be set as we iteratively change it in the + range [0,num_classes-1] + """ + + return _precision_recall_curve_update(preds, target, num_classes, pos_label) + + +def _roc_compute_single_class( + preds: Tensor, + target: Tensor, + pos_label: int, + sample_weights: Optional[Sequence] = None, +) -> Tuple[Tensor, Tensor, Tensor]: + """Computes Receiver Operating Characteristic for single class inputs. Returns tensor with false positive + rates, tensor with true positive rates, tensor with thresholds used for computing false- and true postive + rates. + + Args: + preds: Predicted tensor + target: Ground truth tensor + pos_label: integer determining the positive class. Default is ``None`` + which for binary problem is translate to 1. For multiclass problems + this argument should not be set as we iteratively change it in the + range [0,num_classes-1] + sample_weights: sample weights for each data point + """ + + fps, tps, thresholds = _binary_clf_curve( + preds=preds, target=target, sample_weights=sample_weights, pos_label=pos_label + ) + # Add an extra threshold position to make sure that the curve starts at (0, 0) + tps = B.cat([B.zeros(1, dtype=tps.dtype, device=tps.device), tps]) + fps = B.cat([B.zeros(1, dtype=fps.dtype, device=fps.device), fps]) + thresholds = B.cat([thresholds[0][None] + 1, thresholds]) + + if fps[-1] <= 0: + raise ValueError("No negative samples in targets, false positive value should be meaningless") + fpr = fps / fps[-1] + + if tps[-1] <= 0: + raise ValueError("No positive samples in targets, true positive value should be meaningless") + tpr = tps / tps[-1] + + return fpr, tpr, thresholds + + +def _roc_compute_multi_class( + preds: Tensor, + target: Tensor, + num_classes: int, + sample_weights: Optional[Sequence] = None, +) -> Tuple[List[Tensor], List[Tensor], List[Tensor]]: + """Computes Receiver Operating Characteristic for multi class inputs. Returns tensor with false positive rates, + tensor with true positive rates, tensor with thresholds used for computing false- and true postive rates. + + Args: + preds: Predicted tensor + target: Ground truth tensor + pos_label: integer determining the positive class. Default is ``None`` + which for binary problem is translate to 1. For multiclass problems + this argument should not be set as we iteratively change it in the + range [0,num_classes-1] + sample_weights: sample weights for each data point + """ + + fpr, tpr, thresholds = [], [], [] + for cls in range(num_classes): + if preds.shape == target.shape: + target_cls = target[:, cls] + pos_label = 1 + else: + target_cls = target + pos_label = cls + res = roc( + preds=preds[:, cls], + target=target_cls, + num_classes=1, + pos_label=pos_label, + sample_weights=sample_weights, + ) + fpr.append(res[0]) + tpr.append(res[1]) + thresholds.append(res[2]) + + return fpr, tpr, thresholds + + +def _roc_compute( + preds: Tensor, + target: Tensor, + num_classes: int, + pos_label: Optional[int] = None, + sample_weights: Optional[Sequence] = None, +) -> Union[Tuple[Tensor, Tensor, Tensor], Tuple[List[Tensor], List[Tensor], List[Tensor]]]: + """Computes Receiver Operating Characteristic based on the number of classes. + + Args: + preds: Predicted tensor + target: Ground truth tensor + num_classes: integer with number of classes for multi-label and multiclass problems. + Should be set to ``None`` for binary problems + pos_label: integer determining the positive class. Default is ``None`` + which for binary problem is translate to 1. For multiclass problems + this argument should not be set as we iteratively change it in the + range [0,num_classes-1] + sample_weights: sample weights for each data point + + Example: + >>> # binary case + >>> preds = B.tensor([0, 1, 2, 3]) + >>> target = B.tensor([0, 1, 1, 1]) + >>> pos_label = 1 + >>> preds, target, num_classes, pos_label = _roc_update(preds, target, pos_label=pos_label) + >>> fpr, tpr, thresholds = _roc_compute(preds, target, num_classes, pos_label) + >>> fpr + tensor([0., 0., 0., 0., 1.]) + >>> tpr + tensor([0.0000, 0.3333, 0.6667, 1.0000, 1.0000]) + >>> thresholds + tensor([4, 3, 2, 1, 0]) + + >>> # multiclass case + >>> preds = B.tensor([[0.75, 0.05, 0.05, 0.05], + ... [0.05, 0.75, 0.05, 0.05], + ... [0.05, 0.05, 0.75, 0.05], + ... [0.05, 0.05, 0.05, 0.75]]) + >>> target = B.tensor([0, 1, 3, 2]) + >>> num_classes = 4 + >>> preds, target, num_classes, pos_label = _roc_update(preds, target, num_classes) + >>> fpr, tpr, thresholds = _roc_compute(preds, target, num_classes) + >>> fpr + [tensor([0., 0., 1.]), tensor([0., 0., 1.]), tensor([0.0000, 0.3333, 1.0000]), tensor([0.0000, 0.3333, 1.0000])] + >>> tpr + [tensor([0., 1., 1.]), tensor([0., 1., 1.]), tensor([0., 0., 1.]), tensor([0., 0., 1.])] + >>> thresholds # doctest: +NORMALIZE_WHITESPACE + [tensor([1.7500, 0.7500, 0.0500]), + tensor([1.7500, 0.7500, 0.0500]), + tensor([1.7500, 0.7500, 0.0500]), + tensor([1.7500, 0.7500, 0.0500])] + """ + + with B.no_grad(): + if num_classes == 1 and preds.ndim == 1: # binary + if pos_label is None: + pos_label = 1 + return _roc_compute_single_class(preds, target, pos_label, sample_weights) + return _roc_compute_multi_class(preds, target, num_classes, sample_weights) + + +def roc( + preds: Tensor, + target: Tensor, + num_classes: Optional[int] = None, + pos_label: Optional[int] = None, + sample_weights: Optional[Sequence] = None, +) -> Union[Tuple[Tensor, Tensor, Tensor], Tuple[List[Tensor], List[Tensor], List[Tensor]]]: + """Computes the Receiver Operating Characteristic (ROC). Works with both binary, multiclass and multilabel + input. + + Args: + preds: predictions from model (logits or probabilities) + target: ground truth values + num_classes: integer with number of classes for multi-label and multiclass problems. + Should be set to ``None`` for binary problems + pos_label: integer determining the positive class. Default is ``None`` + which for binary problem is translate to 1. For multiclass problems + this argument should not be set as we iteratively change it in the + range [0,num_classes-1] + sample_weights: sample weights for each data point + + Returns: + 3-element tuple containing + + fpr: + tensor with false positive rates. + If multiclass or multilabel, this is a list of such tensors, one for each class/label. + tpr: + tensor with true positive rates. + If multiclass or multilabel, this is a list of such tensors, one for each class/label. + thresholds: + tensor with thresholds used for computing false- and true postive rates + If multiclass or multilabel, this is a list of such tensors, one for each class/label. + + Example (binary case): + >>> from paddlemetrics.functional import roc + >>> pred = B.tensor([0, 1, 2, 3]) + >>> target = B.tensor([0, 1, 1, 1]) + >>> fpr, tpr, thresholds = roc(pred, target, pos_label=1) + >>> fpr + tensor([0., 0., 0., 0., 1.]) + >>> tpr + tensor([0.0000, 0.3333, 0.6667, 1.0000, 1.0000]) + >>> thresholds + tensor([4, 3, 2, 1, 0]) + + Example (multiclass case): + >>> from paddlemetrics.functional import roc + >>> pred = B.tensor([[0.75, 0.05, 0.05, 0.05], + ... [0.05, 0.75, 0.05, 0.05], + ... [0.05, 0.05, 0.75, 0.05], + ... [0.05, 0.05, 0.05, 0.75]]) + >>> target = B.tensor([0, 1, 3, 2]) + >>> fpr, tpr, thresholds = roc(pred, target, num_classes=4) + >>> fpr + [tensor([0., 0., 1.]), tensor([0., 0., 1.]), tensor([0.0000, 0.3333, 1.0000]), tensor([0.0000, 0.3333, 1.0000])] + >>> tpr + [tensor([0., 1., 1.]), tensor([0., 1., 1.]), tensor([0., 0., 1.]), tensor([0., 0., 1.])] + >>> thresholds # doctest: +NORMALIZE_WHITESPACE + [tensor([1.7500, 0.7500, 0.0500]), + tensor([1.7500, 0.7500, 0.0500]), + tensor([1.7500, 0.7500, 0.0500]), + tensor([1.7500, 0.7500, 0.0500])] + + Example (multilabel case): + >>> from paddlemetrics.functional import roc + >>> pred = B.tensor([[0.8191, 0.3680, 0.1138], + ... [0.3584, 0.7576, 0.1183], + ... [0.2286, 0.3468, 0.1338], + ... [0.8603, 0.0745, 0.1837]]) + >>> target = B.tensor([[1, 1, 0], [0, 1, 0], [0, 0, 0], [0, 1, 1]]) + >>> fpr, tpr, thresholds = roc(pred, target, num_classes=3, pos_label=1) + >>> fpr # doctest: +NORMALIZE_WHITESPACE + [tensor([0.0000, 0.3333, 0.3333, 0.6667, 1.0000]), + tensor([0., 0., 0., 1., 1.]), + tensor([0.0000, 0.0000, 0.3333, 0.6667, 1.0000])] + >>> tpr + [tensor([0., 0., 1., 1., 1.]), tensor([0.0000, 0.3333, 0.6667, 0.6667, 1.0000]), tensor([0., 1., 1., 1., 1.])] + >>> thresholds # doctest: +NORMALIZE_WHITESPACE + [tensor([1.8603, 0.8603, 0.8191, 0.3584, 0.2286]), + tensor([1.7576, 0.7576, 0.3680, 0.3468, 0.0745]), + tensor([1.1837, 0.1837, 0.1338, 0.1183, 0.1138])] + """ + preds, target, num_classes, pos_label = _roc_update(preds, target, num_classes, pos_label) + return _roc_compute(preds, target, num_classes, pos_label, sample_weights) diff --git a/RE/paddlemetric/src/paddlemetrics/functional/classification/specificity.py b/RE/paddlemetric/src/paddlemetrics/functional/classification/specificity.py new file mode 100644 index 00000000..be87dce7 --- /dev/null +++ b/RE/paddlemetric/src/paddlemetrics/functional/classification/specificity.py @@ -0,0 +1,215 @@ +# Copyright The PyTorch Lightning team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from typing import Optional + +import paddleext.torchapi as B +from paddleext.torchapi import Tensor + +from paddlemetrics.functional.classification.stat_scores import _reduce_stat_scores, _stat_scores_update +from paddlemetrics.utilities.enums import AverageMethod, MDMCAverageMethod + + +def _specificity_compute( + tp: Tensor, + fp: Tensor, + tn: Tensor, + fn: Tensor, + average: str, + mdmc_average: Optional[str], +) -> Tensor: + """Computes specificity from the stat scores: true positives, false positives, true negatives, false negatives. + + Args: + tp: True positives + fp: False positives + tn: True negatives + fn: False negatives + average: Defines the reduction that is applied + mdmc_average: Defines how averaging is done for multi-dimensional multi-class inputs (on top of the + ``average`` parameter) + + Example: + >>> from paddlemetrics.functional.classification.stat_scores import _stat_scores_update + >>> preds = B.tensor([2, 0, 2, 1]) + >>> target = B.tensor([1, 1, 2, 0]) + >>> tp, fp, tn, fn = _stat_scores_update(preds, target, reduce='macro', num_classes=3) + >>> _specificity_compute(tp, fp, tn, fn, average='macro', mdmc_average=None) + tensor(0.6111) + >>> tp, fp, tn, fn = _stat_scores_update(preds, target, reduce='micro') + >>> _specificity_compute(tp, fp, tn, fn, average='micro', mdmc_average=None) + tensor(0.6250) + """ + + numerator = tn + denominator = tn + fp + if average == AverageMethod.NONE and mdmc_average != MDMCAverageMethod.SAMPLEWISE: + # a class is not present if there exists no TPs, no FPs, and no FNs + meaningless_indeces = B.nonzero((tp | fn | fp) == 0).cpu() + numerator[meaningless_indeces, ...] = -1 + denominator[meaningless_indeces, ...] = -1 + return _reduce_stat_scores( + numerator=numerator, + denominator=denominator, + weights=None if average != AverageMethod.WEIGHTED else denominator, + average=average, + mdmc_average=mdmc_average, + ) + + +def specificity( + preds: Tensor, + target: Tensor, + average: str = "micro", + mdmc_average: Optional[str] = None, + ignore_index: Optional[int] = None, + num_classes: Optional[int] = None, + threshold: float = 0.5, + top_k: Optional[int] = None, + multiclass: Optional[bool] = None, +) -> Tensor: + r""" + Computes `Specificity`_ + + .. math:: \text{Specificity} = \frac{\text{TN}}{\text{TN} + \text{FP}} + + Where :math:`\text{TN}` and :math:`\text{FP}` represent the number of true negatives and + false positives respecitively. With the use of ``top_k`` parameter, this metric can + generalize to Specificity@K. + + The reduction method (how the specificity scores are aggregated) is controlled by the + ``average`` parameter, and additionally by the ``mdmc_average`` parameter in the + multi-dimensional multi-class case. Accepts all inputs listed in :ref:`references/modules:input types`. + + Args: + preds: Predictions from model (probabilities, or labels) + target: Ground truth values + average: + Defines the reduction that is applied. Should be one of the following: + + - ``'micro'`` [default]: Calculate the metric globally, across all samples and classes. + - ``'macro'``: Calculate the metric for each class separately, and average the + metrics across classes (with equal weights for each class). + - ``'weighted'``: Calculate the metric for each class separately, and average the + metrics across classes, weighting each class by its support (``tn + fp``). + - ``'none'`` or ``None``: Calculate the metric for each class separately, and return + the metric for every class. + - ``'samples'``: Calculate the metric for each sample, and average the metrics + across samples (with equal weights for each sample). + + .. note:: What is considered a sample in the multi-dimensional multi-class case + depends on the value of ``mdmc_average``. + + .. note:: If ``'none'`` and a given class doesn't occur in the `preds` or `target`, + the value for the class will be ``nan``. + + mdmc_average: + Defines how averaging is done for multi-dimensional multi-class inputs (on top of the + ``average`` parameter). Should be one of the following: + + - ``None`` [default]: Should be left unchanged if your data is not multi-dimensional + multi-class. + + - ``'samplewise'``: In this case, the statistics are computed separately for each + sample on the ``N`` axis, and then averaged over samples. + The computation for each sample is done by treating the flattened extra axes ``...`` + (see :ref:`references/modules:input types`) as the ``N`` dimension within the sample, + and computing the metric for the sample based on that. + + - ``'global'``: In this case the ``N`` and ``...`` dimensions of the inputs + (see :ref:`references/modules:input types`) + are flattened into a new ``N_X`` sample axis, i.e. the inputs are treated as if they + were ``(N_X, C)``. From here on the ``average`` parameter applies as usual. + + ignore_index: + Integer specifying a target class to ignore. If given, this class index does not contribute + to the returned score, regardless of reduction method. If an index is ignored, and ``average=None`` + or ``'none'``, the score for the ignored class will be returned as ``nan``. + + num_classes: + Number of classes. Necessary for ``'macro'``, ``'weighted'`` and ``None`` average methods. + + threshold: + Threshold probability value for transforming probability predictions to binary + (0,1) predictions, in the case of binary or multi-label inputs + top_k: + Number of highest probability entries for each sample to convert to 1s - relevant + only for inputs with probability predictions. If this parameter is set for multi-label + inputs, it will take precedence over ``threshold``. For (multi-dim) multi-class inputs, + this parameter defaults to 1. + + Should be left unset (``None``) for inputs with label predictions. + multiclass: + Used only in certain special cases, where you want to treat inputs as a different type + than what they appear to be. See the parameter's + :ref:`documentation section ` + for a more detailed explanation and examples. + + Return: + The shape of the returned tensor depends on the ``average`` parameter + + - If ``average in ['micro', 'macro', 'weighted', 'samples']``, a one-element tensor will be returned + - If ``average in ['none', None]``, the shape will be ``(C,)``, where ``C`` stands for the number + of classes + + Raises: + ValueError: + If ``average`` is not one of ``"micro"``, ``"macro"``, ``"weighted"``, + ``"samples"``, ``"none"`` or ``None``. + ValueError: + If ``mdmc_average`` is not one of ``None``, ``"samplewise"``, ``"global"``. + ValueError: + If ``average`` is set but ``num_classes`` is not provided. + ValueError: + If ``num_classes`` is set + and ``ignore_index`` is not in the range ``[0, num_classes)``. + + Example: + >>> from paddlemetrics.functional import specificity + >>> preds = B.tensor([2, 0, 2, 1]) + >>> target = B.tensor([1, 1, 2, 0]) + >>> specificity(preds, target, average='macro', num_classes=3) + tensor(0.6111) + >>> specificity(preds, target, average='micro') + tensor(0.6250) + + """ + + allowed_average = ["micro", "macro", "weighted", "samples", "none", None] + if average not in allowed_average: + raise ValueError(f"The `average` has to be one of {allowed_average}, got {average}.") + + allowed_mdmc_average = [None, "samplewise", "global"] + if mdmc_average not in allowed_mdmc_average: + raise ValueError("The `mdmc_average` has to be one of {allowed_mdmc_average}, got {mdmc_average}.") + + if average in ["macro", "weighted", "none", None] and (not num_classes or num_classes < 1): + raise ValueError(f"When you set `average` as {average}, you have to provide the number of classes.") + + if num_classes and ignore_index is not None and (not 0 <= ignore_index < num_classes or num_classes == 1): + raise ValueError(f"The `ignore_index` {ignore_index} is not valid for inputs with {num_classes} classes") + + reduce = "macro" if average in ["weighted", "none", None] else average + tp, fp, tn, fn = _stat_scores_update( + preds, + target, + reduce=reduce, + mdmc_reduce=mdmc_average, + threshold=threshold, + num_classes=num_classes, + top_k=top_k, + multiclass=multiclass, + ignore_index=ignore_index, + ) + + return _specificity_compute(tp, fp, tn, fn, average, mdmc_average) diff --git a/RE/paddlemetric/src/paddlemetrics/functional/classification/stat_scores.py b/RE/paddlemetric/src/paddlemetrics/functional/classification/stat_scores.py new file mode 100644 index 00000000..33e1cafd --- /dev/null +++ b/RE/paddlemetric/src/paddlemetrics/functional/classification/stat_scores.py @@ -0,0 +1,396 @@ +# Copyright The PyTorch Lightning team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from typing import List, Optional, Tuple, Union + +import paddleext.torchapi as B +from paddleext.torchapi import Tensor, tensor + +from paddlemetrics.utilities.checks import _input_format_classification +from paddlemetrics.utilities.enums import AverageMethod, MDMCAverageMethod + + +def _del_column(data: Tensor, idx: int) -> Tensor: + """Delete the column at index.""" + return B.cat([data[:, :idx], data[:, (idx + 1) :]], 1) + + +def _stat_scores( + preds: Tensor, + target: Tensor, + reduce: Optional[str] = "micro", +) -> Tuple[Tensor, Tensor, Tensor, Tensor]: + """Calculate the number of tp, fp, tn, fn. + + Args: + preds: + An ``(N, C)`` or ``(N, C, X)`` tensor of predictions (0 or 1) + target: + An ``(N, C)`` or ``(N, C, X)`` tensor of true labels (0 or 1) + reduce: + One of ``'micro'``, ``'macro'``, ``'samples'`` + + Return: + Returns a list of 4 tensors; tp, fp, tn, fn. + The shape of the returned tensors depnds on the shape of the inputs + and the ``reduce`` parameter: + + If inputs are of the shape ``(N, C)``, then + - If ``reduce='micro'``, the returned tensors are 1 element tensors + - If ``reduce='macro'``, the returned tensors are ``(C,)`` tensors + - If ``reduce'samples'``, the returned tensors are ``(N,)`` tensors + + If inputs are of the shape ``(N, C, X)``, then + - If ``reduce='micro'``, the returned tensors are ``(N,)`` tensors + - If ``reduce='macro'``, the returned tensors are ``(N,C)`` tensors + - If ``reduce='samples'``, the returned tensors are ``(N,X)`` tensors + """ + dim: Union[int, List[int]] = 1 # for "samples" + if reduce == "micro": + dim = [0, 1] if preds.ndim == 2 else [1, 2] + elif reduce == "macro": + dim = 0 if preds.ndim == 2 else 2 + + true_pred, false_pred = target == preds, target != preds + pos_pred, neg_pred = preds == 1, preds == 0 + + tp = (true_pred * pos_pred).sum(dim=dim) + fp = (false_pred * pos_pred).sum(dim=dim) + + tn = (true_pred * neg_pred).sum(dim=dim) + fn = (false_pred * neg_pred).sum(dim=dim) + return tp.long(), fp.long(), tn.long(), fn.long() + + +def _stat_scores_update( + preds: Tensor, + target: Tensor, + reduce: Optional[str] = "micro", + mdmc_reduce: Optional[str] = None, + num_classes: Optional[int] = None, + top_k: Optional[int] = None, + threshold: float = 0.5, + multiclass: Optional[bool] = None, + ignore_index: Optional[int] = None, +) -> Tuple[Tensor, Tensor, Tensor, Tensor]: + """Updates and returns the the number of true positives, false positives, true negatives, false negatives. + Raises ValueError if: + + - The `ignore_index` is not valid + - When `ignore_index` is used with binary data + - When inputs are multi-dimensional multi-class, and the `mdmc_reduce` parameter is not set + + Args: + preds: Predicted tensor + target: Ground truth tensor + reduce: Defines the reduction that is applied + mdmc_reduce: Defines how the multi-dimensional multi-class inputs are handeled + num_classes: Number of classes. Necessary for (multi-dimensional) multi-class or multi-label data. + top_k: Number of highest probability or logit score predictions considered to find the correct label, + relevant only for (multi-dimensional) multi-class inputs + threshold: Threshold for transforming probability or logit predictions to binary (0,1) predictions, in the case + of binary or multi-label inputs. Default value of 0.5 corresponds to input being probabilities + multiclass: Used only in certain special cases, where you want to treat inputs as a different type + than what they appear to be + ignore_index: Specify a class (label) to ignore. If given, this class index does not contribute + to the returned score, regardless of reduction method. If an index is ignored, and + ``reduce='macro'``, the class statistics for the ignored class will all be returned + as ``-1``. + """ + + preds, target, _ = _input_format_classification( + preds, target, threshold=threshold, num_classes=num_classes, multiclass=multiclass, top_k=top_k + ) + + if ignore_index is not None and not 0 <= ignore_index < preds.shape[1]: + raise ValueError(f"The `ignore_index` {ignore_index} is not valid for inputs with {preds.shape[0]} classes") + + if ignore_index is not None and preds.shape[1] == 1: + raise ValueError("You can not use `ignore_index` with binary data.") + + if preds.ndim == 3: + if not mdmc_reduce: + raise ValueError( + "When your inputs are multi-dimensional multi-class, you have to set the `mdmc_reduce` parameter" + ) + if mdmc_reduce == "global": + preds = B.transpose(preds, 1, 2).reshape(-1, preds.shape[1]) + target = B.transpose(target, 1, 2).reshape(-1, target.shape[1]) + + # Delete what is in ignore_index, if applicable (and classes don't matter): + if ignore_index is not None and reduce != "macro": + preds = _del_column(preds, ignore_index) + target = _del_column(target, ignore_index) + + tp, fp, tn, fn = _stat_scores(preds, target, reduce=reduce) + + # Take care of ignore_index + if ignore_index is not None and reduce == "macro": + tp[..., ignore_index] = -1 + fp[..., ignore_index] = -1 + tn[..., ignore_index] = -1 + fn[..., ignore_index] = -1 + + return tp, fp, tn, fn + + +def _stat_scores_compute(tp: Tensor, fp: Tensor, tn: Tensor, fn: Tensor) -> Tensor: + """Computes the number of true positives, false positives, true negatives, false negatives. Concatenates the + input tensors along with the support into one output. + + Args: + tp: True positives + fp: False positives + tn: True negatives + fn: False negatives + + Example: + >>> preds = B.tensor([1, 0, 2, 1]) + >>> target = B.tensor([1, 1, 2, 0]) + >>> tp, fp, tn, fn = _stat_scores_update(preds, target, reduce='macro', num_classes=3) + >>> _stat_scores_compute(tp, fp, tn, fn) + tensor([[0, 1, 2, 1, 1], + [1, 1, 1, 1, 2], + [1, 0, 3, 0, 1]]) + >>> tp, fp, tn, fn = _stat_scores_update(preds, target, reduce='micro') + >>> _stat_scores_compute(tp, fp, tn, fn) + tensor([2, 2, 6, 2, 4]) + """ + stats = [ + tp.unsqueeze(-1), + fp.unsqueeze(-1), + tn.unsqueeze(-1), + fn.unsqueeze(-1), + tp.unsqueeze(-1) + fn.unsqueeze(-1), # support + ] + outputs: Tensor = B.cat(stats, -1) + outputs = B.where(outputs < 0, tensor(-1, device=outputs.device, dtype=outputs.dtype), outputs) + + return outputs + + +def _reduce_stat_scores( + numerator: Tensor, + denominator: Tensor, + weights: Optional[Tensor], + average: Optional[str], + mdmc_average: Optional[str], + zero_division: int = 0, +) -> Tensor: + """Reduces scores of type ``numerator/denominator`` or. + + ``weights * (numerator/denominator)``, if ``average='weighted'``. + + Args: + numerator: A tensor with numerator numbers. + denominator: A tensor with denominator numbers. If a denominator is + negative, the class will be ignored (if averaging), or its score + will be returned as ``nan`` (if ``average=None``). + If the denominator is zero, then ``zero_division`` score will be + used for those elements. + weights: A tensor of weights to be used if ``average='weighted'``. + average: The method to average the scores + mdmc_average: The method to average the scores if inputs were multi-dimensional multi-class (MDMC) + zero_division: The value to use for the score if denominator equals zero. + """ + numerator, denominator = numerator.float(), denominator.float() + zero_div_mask = denominator == 0 + ignore_mask = denominator < 0 + + if weights is None: + weights = B.ones_like(denominator) + else: + weights = weights.float() + + numerator = B.where(zero_div_mask, tensor(float(zero_division), device=numerator.device), numerator) + denominator = B.where(zero_div_mask | ignore_mask, tensor(1.0, device=denominator.device), denominator) + weights = B.where(ignore_mask, tensor(0.0, device=weights.device), weights) + + if average not in (AverageMethod.MICRO, AverageMethod.NONE, None): + weights = weights / weights.sum(dim=-1, keepdim=True) + + scores = weights * (numerator / denominator) + + # This is in case where sum(weights) = 0, which happens if we ignore the only present class with average='weighted' + scores = B.where(B.isnan(scores), tensor(float(zero_division), device=scores.device), scores) + + if mdmc_average == MDMCAverageMethod.SAMPLEWISE: + scores = scores.mean(dim=0) + ignore_mask = ignore_mask.sum(dim=0).bool() + + if average in (AverageMethod.NONE, None): + scores = B.where(ignore_mask, tensor(float("nan"), device=scores.device), scores) + else: + scores = scores.sum() + + return scores + + +def stat_scores( + preds: Tensor, + target: Tensor, + reduce: str = "micro", + mdmc_reduce: Optional[str] = None, + num_classes: Optional[int] = None, + top_k: Optional[int] = None, + threshold: float = 0.5, + multiclass: Optional[bool] = None, + ignore_index: Optional[int] = None, +) -> Tensor: + r"""Computes the number of true positives, false positives, true negatives, false negatives. + Related to `Type I and Type II errors`_ + and the `confusion matrix`_. + + The reduction method (how the statistics are aggregated) is controlled by the + ``reduce`` parameter, and additionally by the ``mdmc_reduce`` parameter in the + multi-dimensional multi-class case. Accepts all inputs listed in :ref:`references/modules:input types`. + + Args: + preds: Predictions from model (probabilities, logits or labels) + target: Ground truth values + threshold: + Threshold for transforming probability or logit predictions to binary (0,1) predictions, in the case + of binary or multi-label inputs. Default value of 0.5 corresponds to input being probabilities. + + top_k: + Number of highest probability or logit score predictions considered to find the correct label, + relevant only for (multi-dimensional) multi-class inputs. The + default value (``None``) will be interpreted as 1 for these inputs. + + Should be left at default (``None``) for all other types of inputs. + + reduce: + Defines the reduction that is applied. Should be one of the following: + + - ``'micro'`` [default]: Counts the statistics by summing over all [sample, class] + combinations (globally). Each statistic is represented by a single integer. + - ``'macro'``: Counts the statistics for each class separately (over all samples). + Each statistic is represented by a ``(C,)`` tensor. Requires ``num_classes`` + to be set. + - ``'samples'``: Counts the statistics for each sample separately (over all classes). + Each statistic is represented by a ``(N, )`` 1d tensor. + + .. note:: What is considered a sample in the multi-dimensional multi-class case + depends on the value of ``mdmc_reduce``. + + num_classes: + Number of classes. Necessary for (multi-dimensional) multi-class or multi-label data. + + ignore_index: + Specify a class (label) to ignore. If given, this class index does not contribute + to the returned score, regardless of reduction method. If an index is ignored, and + ``reduce='macro'``, the class statistics for the ignored class will all be returned + as ``-1``. + + mdmc_reduce: + Defines how the multi-dimensional multi-class inputs are handeled. Should be + one of the following: + + - ``None`` [default]: Should be left unchanged if your data is not multi-dimensional + multi-class (see :ref:`references/modules:input types` for the definition of input types). + + - ``'samplewise'``: In this case, the statistics are computed separately for each + sample on the ``N`` axis, and then the outputs are concatenated together. In each + sample the extra axes ``...`` are flattened to become the sub-sample axis, and + statistics for each sample are computed by treating the sub-sample axis as the + ``N`` axis for that sample. + + - ``'global'``: In this case the ``N`` and ``...`` dimensions of the inputs are + flattened into a new ``N_X`` sample axis, i.e. the inputs are treated as if they + were ``(N_X, C)``. From here on the ``reduce`` parameter applies as usual. + + multiclass: + Used only in certain special cases, where you want to treat inputs as a different type + than what they appear to be. See the parameter's + :ref:`documentation section ` + for a more detailed explanation and examples. + + Return: + The metric returns a tensor of shape ``(..., 5)``, where the last dimension corresponds + to ``[tp, fp, tn, fn, sup]`` (``sup`` stands for support and equals ``tp + fn``). The + shape depends on the ``reduce`` and ``mdmc_reduce`` (in case of multi-dimensional + multi-class data) parameters: + + - If the data is not multi-dimensional multi-class, then + + - If ``reduce='micro'``, the shape will be ``(5, )`` + - If ``reduce='macro'``, the shape will be ``(C, 5)``, + where ``C`` stands for the number of classes + - If ``reduce='samples'``, the shape will be ``(N, 5)``, where ``N`` stands for + the number of samples + + - If the data is multi-dimensional multi-class and ``mdmc_reduce='global'``, then + + - If ``reduce='micro'``, the shape will be ``(5, )`` + - If ``reduce='macro'``, the shape will be ``(C, 5)`` + - If ``reduce='samples'``, the shape will be ``(N*X, 5)``, where ``X`` stands for + the product of sizes of all "extra" dimensions of the data (i.e. all dimensions + except for ``C`` and ``N``) + + - If the data is multi-dimensional multi-class and ``mdmc_reduce='samplewise'``, then + + - If ``reduce='micro'``, the shape will be ``(N, 5)`` + - If ``reduce='macro'``, the shape will be ``(N, C, 5)`` + - If ``reduce='samples'``, the shape will be ``(N, X, 5)`` + + Raises: + ValueError: + If ``reduce`` is none of ``"micro"``, ``"macro"`` or ``"samples"``. + ValueError: + If ``mdmc_reduce`` is none of ``None``, ``"samplewise"``, ``"global"``. + ValueError: + If ``reduce`` is set to ``"macro"`` and ``num_classes`` is not provided. + ValueError: + If ``num_classes`` is set + and ``ignore_index`` is not in the range ``[0, num_classes)``. + ValueError: + If ``ignore_index`` is used with ``binary data``. + ValueError: + If inputs are ``multi-dimensional multi-class`` and ``mdmc_reduce`` is not provided. + + Example: + >>> from paddlemetrics.functional import stat_scores + >>> preds = B.tensor([1, 0, 2, 1]) + >>> target = B.tensor([1, 1, 2, 0]) + >>> stat_scores(preds, target, reduce='macro', num_classes=3) + tensor([[0, 1, 2, 1, 1], + [1, 1, 1, 1, 2], + [1, 0, 3, 0, 1]]) + >>> stat_scores(preds, target, reduce='micro') + tensor([2, 2, 6, 2, 4]) + + """ + if reduce not in ["micro", "macro", "samples"]: + raise ValueError(f"The `reduce` {reduce} is not valid.") + + if mdmc_reduce not in [None, "samplewise", "global"]: + raise ValueError(f"The `mdmc_reduce` {mdmc_reduce} is not valid.") + + if reduce == "macro" and (not num_classes or num_classes < 1): + raise ValueError("When you set `reduce` as 'macro', you have to provide the number of classes.") + + if num_classes and ignore_index is not None and (not 0 <= ignore_index < num_classes or num_classes == 1): + raise ValueError(f"The `ignore_index` {ignore_index} is not valid for inputs with {num_classes} classes") + + tp, fp, tn, fn = _stat_scores_update( + preds, + target, + reduce=reduce, + mdmc_reduce=mdmc_reduce, + top_k=top_k, + threshold=threshold, + num_classes=num_classes, + multiclass=multiclass, + ignore_index=ignore_index, + ) + return _stat_scores_compute(tp, fp, tn, fn) diff --git a/RE/paddlemetric/src/paddlemetrics/functional/image/__init__.py b/RE/paddlemetric/src/paddlemetrics/functional/image/__init__.py new file mode 100644 index 00000000..9fe64120 --- /dev/null +++ b/RE/paddlemetric/src/paddlemetrics/functional/image/__init__.py @@ -0,0 +1,16 @@ +# Copyright The PyTorch Lightning team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from paddlemetrics.functional.image.gradients import image_gradients # noqa: F401 +from paddlemetrics.functional.image.psnr import psnr # noqa: F401 diff --git a/RE/paddlemetric/src/paddlemetrics/functional/image/gradients.py b/RE/paddlemetric/src/paddlemetrics/functional/image/gradients.py new file mode 100644 index 00000000..abe1b08d --- /dev/null +++ b/RE/paddlemetric/src/paddlemetrics/functional/image/gradients.py @@ -0,0 +1,81 @@ +# Copyright The PyTorch Lightning team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from typing import Tuple + +import paddleext.torchapi as B +from paddleext.torchapi import Tensor + + +def _image_gradients_validate(img: Tensor) -> None: + """Validates whether img is a 4D torch Tensor.""" + + if not isinstance(img, Tensor): + raise TypeError(f"The `img` expects a value of type but got {type(img)}") + if img.ndim != 4: + raise RuntimeError(f"The `img` expects a 4D tensor but got {img.ndim}D tensor") + + +def _compute_image_gradients(img: Tensor) -> Tuple[Tensor, Tensor]: + """Computes image gradients (dy/dx) for a given image.""" + + batch_size, channels, height, width = img.shape + + dy = img[..., 1:, :] - img[..., :-1, :] + dx = img[..., :, 1:] - img[..., :, :-1] + + shapey = [batch_size, channels, 1, width] + dy = B.cat([dy, B.zeros(shapey, device=img.device, dtype=img.dtype)], dim=2) + dy = dy.view(img.shape) + + shapex = [batch_size, channels, height, 1] + dx = B.cat([dx, B.zeros(shapex, device=img.device, dtype=img.dtype)], dim=3) + dx = dx.view(img.shape) + + return dy, dx + + +def image_gradients(img: Tensor) -> Tuple[Tensor, Tensor]: + """Computes `Gradient Computation of Image`_ of a given image using finite difference. + + Args: + img: An ``(N, C, H, W)`` input tensor where C is the number of image channels + + Return: + Tuple of (dy, dx) with each gradient of shape ``[N, C, H, W]`` + + Raises: + TypeError: + If ``img`` is not of the type . + RuntimeError: + If ``img`` is not a 4D tensor. + + Example: + >>> from paddlemetrics.functional import image_gradients + >>> image = B.arange(0, 1*1*5*5, dtype=B.float32) + >>> image = B.reshape(image, (1, 1, 5, 5)) + >>> dy, dx = image_gradients(image) + >>> dy[0, 0, :, :] + tensor([[5., 5., 5., 5., 5.], + [5., 5., 5., 5., 5.], + [5., 5., 5., 5., 5.], + [5., 5., 5., 5., 5.], + [0., 0., 0., 0., 0.]]) + + .. note:: The implementation follows the 1-step finite difference method as followed + by the TF implementation. The values are organized such that the gradient of + [I(x+1, y)-[I(x, y)]] are at the (x, y) location + """ + _image_gradients_validate(img) + + return _compute_image_gradients(img) diff --git a/RE/paddlemetric/src/paddlemetrics/functional/image/psnr.py b/RE/paddlemetric/src/paddlemetrics/functional/image/psnr.py new file mode 100644 index 00000000..2ffd6046 --- /dev/null +++ b/RE/paddlemetric/src/paddlemetrics/functional/image/psnr.py @@ -0,0 +1,150 @@ +# Copyright The PyTorch Lightning team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from typing import Optional, Tuple, Union + +import paddleext.torchapi as B +from paddleext.torchapi import Tensor, tensor + +from paddlemetrics.utilities import rank_zero_warn, reduce + + +def _psnr_compute( + sum_squared_error: Tensor, + n_obs: Tensor, + data_range: Tensor, + base: float = 10.0, + reduction: str = "elementwise_mean", +) -> Tensor: + """Computes peak signal-to-noise ratio. + + Args: + sum_squared_error: Sum of square of errors over all observations + n_obs: Number of predictions or observations + data_range: + the range of the data. If None, it is determined from the data (max - min). ``data_range`` must be given + when ``dim`` is not None. + base: a base of a logarithm to use (default: 10) + reduction: a method to reduce metric score over labels. + + - ``'elementwise_mean'``: takes the mean (default) + - ``'sum'``: takes the sum + - ``'none'``: no reduction will be applied + + Example: + >>> preds = B.tensor([[0.0, 1.0], [2.0, 3.0]]) + >>> target = B.tensor([[3.0, 2.0], [1.0, 0.0]]) + >>> data_range = target.max() - target.min() + >>> sum_squared_error, n_obs = _psnr_update(preds, target) + >>> _psnr_compute(sum_squared_error, n_obs, data_range) + tensor(2.5527) + """ + + psnr_base_e = 2 * B.log(data_range) - B.log(sum_squared_error / n_obs) + psnr_vals = psnr_base_e * (10 / B.log(tensor(base))) + return reduce(psnr_vals, reduction=reduction) + + +def _psnr_update( + preds: Tensor, + target: Tensor, + dim: Optional[Union[int, Tuple[int, ...]]] = None, +) -> Tuple[Tensor, Tensor]: + """Updates and returns variables required to compute peak signal-to-noise ratio. + + Args: + preds: Predicted tensor + target: Ground truth tensor + dim: + Dimensions to reduce PSNR scores over provided as either an integer or a list of integers. Default is + None meaning scores will be reduced across all dimensions. + """ + + if dim is None: + sum_squared_error = B.sum(B.pow(preds - target, 2)) + n_obs = tensor(target.numel(), device=target.device) + return sum_squared_error, n_obs + + diff = preds - target + sum_squared_error = B.sum(diff * diff, dim=dim) + + if isinstance(dim, int): + dim_list = [dim] + else: + dim_list = list(dim) + if not dim_list: + n_obs = tensor(target.numel(), device=target.device) + else: + n_obs = tensor(target.size(), device=target.device)[dim_list].prod() + n_obs = n_obs.expand_as(sum_squared_error) + + return sum_squared_error, n_obs + + +def psnr( + preds: Tensor, + target: Tensor, + data_range: Optional[float] = None, + base: float = 10.0, + reduction: str = "elementwise_mean", + dim: Optional[Union[int, Tuple[int, ...]]] = None, +) -> Tensor: + """Computes the peak signal-to-noise ratio. + + Args: + preds: estimated signal + target: groun truth signal + data_range: + the range of the data. If None, it is determined from the data (max - min). ``data_range`` must be given + when ``dim`` is not None. + base: a base of a logarithm to use (default: 10) + reduction: a method to reduce metric score over labels. + + - ``'elementwise_mean'``: takes the mean (default) + - ``'sum'``: takes the sum + - ``'none'``: no reduction will be applied + + dim: + Dimensions to reduce PSNR scores over provided as either an integer or a list of integers. Default is + None meaning scores will be reduced across all dimensions. + Return: + Tensor with PSNR score + + Raises: + ValueError: + If ``dim`` is not ``None`` and ``data_range`` is not provided. + + Example: + >>> from paddlemetrics.functional import psnr + >>> pred = B.tensor([[0.0, 1.0], [2.0, 3.0]]) + >>> target = B.tensor([[3.0, 2.0], [1.0, 0.0]]) + >>> psnr(pred, target) + tensor(2.5527) + + .. note:: + Half precision is only support on GPU for this metric + """ + if dim is None and reduction != "elementwise_mean": + rank_zero_warn(f"The `reduction={reduction}` will not have any effect when `dim` is None.") + + if data_range is None: + if dim is not None: + # Maybe we could use `B.amax(target, dim=dim) - B.amin(target, dim=dim)` in PyTorch 1.7 to calculate + # `data_range` in the future. + raise ValueError("The `data_range` must be given when `dim` is not None.") + + data_range = target.max() - target.min() + else: + data_range = tensor(float(data_range)) + sum_squared_error, n_obs = _psnr_update(preds, target, dim=dim) + return _psnr_compute(sum_squared_error, n_obs, data_range, base=base, reduction=reduction) diff --git a/RE/paddlemetric/src/paddlemetrics/functional/image/ssim.py b/RE/paddlemetric/src/paddlemetrics/functional/image/ssim.py new file mode 100644 index 00000000..52af9b79 --- /dev/null +++ b/RE/paddlemetric/src/paddlemetrics/functional/image/ssim.py @@ -0,0 +1,225 @@ +# Copyright The PyTorch Lightning team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from typing import Optional, Sequence, Tuple + +import paddleext.torchapi as B +from paddleext.torchapi import Tensor + +from paddlemetrics.utilities.checks import _check_same_shape +from paddlemetrics.utilities.distributed import reduce + + +def _gaussian(kernel_size: int, sigma: float, dtype: B.dtype, device: B.device) -> Tensor: + """Computes 1D gaussian kernel. + + Args: + kernel_size: size of the gaussian kernel + sigma: Standard deviation of the gaussian kernel + dtype: data type of the output tensor + device: device of the output tensor + + Example: + >>> _gaussian(3, 1, B.float, 'cpu') + tensor([[0.2741, 0.4519, 0.2741]]) + """ + dist = B.arange(start=(1 - kernel_size) / 2, end=(1 + kernel_size) / 2, step=1, dtype=dtype, device=device) + gauss = B.exp(-B.pow(dist / sigma, 2) / 2) + return (gauss / gauss.sum()).unsqueeze(dim=0) # (1, kernel_size) + + +def _gaussian_kernel( + channel: int, kernel_size: Sequence[int], sigma: Sequence[float], dtype: B.dtype, device: B.device +) -> Tensor: + """Computes 2D gaussian kernel. + + Args: + channel: number of channels in the image + kernel_size: size of the gaussian kernel as a tuple (h, w) + sigma: Standard deviation of the gaussian kernel + dtype: data type of the output tensor + device: device of the output tensor + + Example: + >>> _gaussian_kernel(1, (5,5), (1,1), B.float, "cpu") + tensor([[[[0.0030, 0.0133, 0.0219, 0.0133, 0.0030], + [0.0133, 0.0596, 0.0983, 0.0596, 0.0133], + [0.0219, 0.0983, 0.1621, 0.0983, 0.0219], + [0.0133, 0.0596, 0.0983, 0.0596, 0.0133], + [0.0030, 0.0133, 0.0219, 0.0133, 0.0030]]]]) + """ + + gaussian_kernel_x = _gaussian(kernel_size[0], sigma[0], dtype, device) + gaussian_kernel_y = _gaussian(kernel_size[1], sigma[1], dtype, device) + kernel = B.matmul(gaussian_kernel_x.t(), gaussian_kernel_y) # (kernel_size, 1) * (1, kernel_size) + + return kernel.expand(channel, 1, kernel_size[0], kernel_size[1]) + + +def _ssim_update(preds: Tensor, target: Tensor) -> Tuple[Tensor, Tensor]: + """Updates and returns variables required to compute Structural Similarity Index Measure. Checks for same shape + and type of the input tensors. + + Args: + preds: Predicted tensor + target: Ground truth tensor + """ + + if preds.dtype != target.dtype: + raise TypeError( + "Expected `preds` and `target` to have the same data type." + f" Got preds: {preds.dtype} and target: {target.dtype}." + ) + _check_same_shape(preds, target) + if len(preds.shape) != 4: + raise ValueError( + "Expected `preds` and `target` to have BxCxHxW shape." + f" Got preds: {preds.shape} and target: {target.shape}." + ) + return preds, target + + +def _ssim_compute( + preds: Tensor, + target: Tensor, + kernel_size: Sequence[int] = (11, 11), + sigma: Sequence[float] = (1.5, 1.5), + reduction: str = "elementwise_mean", + data_range: Optional[float] = None, + k1: float = 0.01, + k2: float = 0.03, +) -> Tensor: + """Computes Structual Similarity Index Measure. + + Args: + preds: estimated image + target: ground truth image + kernel_size: size of the gaussian kernel (default: (11, 11)) + sigma: Standard deviation of the gaussian kernel (default: (1.5, 1.5)) + reduction: a method to reduce metric score over labels. + + - ``'elementwise_mean'``: takes the mean (default) + - ``'sum'``: takes the sum + - ``'none'``: no reduction will be applied + + data_range: Range of the image. If ``None``, it is determined from the image (max - min) + k1: Parameter of SSIM. Default: 0.01 + k2: Parameter of SSIM. Default: 0.03 + + Example: + >>> preds = B.rand([16, 1, 16, 16]) + >>> target = preds * 0.75 + >>> preds, target = _ssim_update(preds, target) + >>> _ssim_compute(preds, target) + tensor(0.9219) + """ + if len(kernel_size) != 2 or len(sigma) != 2: + raise ValueError( + "Expected `kernel_size` and `sigma` to have the length of two." + f" Got kernel_size: {len(kernel_size)} and sigma: {len(sigma)}." + ) + + if any(x % 2 == 0 or x <= 0 for x in kernel_size): + raise ValueError(f"Expected `kernel_size` to have odd positive number. Got {kernel_size}.") + + if any(y <= 0 for y in sigma): + raise ValueError(f"Expected `sigma` to have positive number. Got {sigma}.") + + if data_range is None: + data_range = max(preds.max() - preds.min(), target.max() - target.min()) + + c1 = pow(k1 * data_range, 2) + c2 = pow(k2 * data_range, 2) + device = preds.device + + channel = preds.size(1) + dtype = preds.dtype + kernel = _gaussian_kernel(channel, kernel_size, sigma, dtype, device) + pad_h = (kernel_size[0] - 1) // 2 + pad_w = (kernel_size[1] - 1) // 2 + + preds = B.pad(preds, (pad_h, pad_h, pad_w, pad_w), mode="reflect") + target = B.pad(target, (pad_h, pad_h, pad_w, pad_w), mode="reflect") + + input_list = B.cat((preds, target, preds * preds, target * target, preds * target)) # (5 * B, C, H, W) + outputs = B.conv2d(input_list, kernel, groups=channel) + output_list = outputs.split(preds.shape[0]) + + mu_pred_sq = output_list[0].pow(2) + mu_target_sq = output_list[1].pow(2) + mu_pred_target = output_list[0] * output_list[1] + + sigma_pred_sq = output_list[2] - mu_pred_sq + sigma_target_sq = output_list[3] - mu_target_sq + sigma_pred_target = output_list[4] - mu_pred_target + + upper = 2 * sigma_pred_target + c2 + lower = sigma_pred_sq + sigma_target_sq + c2 + + ssim_idx = ((2 * mu_pred_target + c1) * upper) / ((mu_pred_sq + mu_target_sq + c1) * lower) + ssim_idx = ssim_idx[..., pad_h:-pad_h, pad_w:-pad_w] + + return reduce(ssim_idx, reduction) + + +def ssim( + preds: Tensor, + target: Tensor, + kernel_size: Sequence[int] = (11, 11), + sigma: Sequence[float] = (1.5, 1.5), + reduction: str = "elementwise_mean", + data_range: Optional[float] = None, + k1: float = 0.01, + k2: float = 0.03, +) -> Tensor: + """Computes Structual Similarity Index Measure. + + Args: + preds: estimated image + target: ground truth image + kernel_size: size of the gaussian kernel (default: (11, 11)) + sigma: Standard deviation of the gaussian kernel (default: (1.5, 1.5)) + reduction: a method to reduce metric score over labels. + + - ``'elementwise_mean'``: takes the mean (default) + - ``'sum'``: takes the sum + - ``'none'``: no reduction will be applied + + data_range: Range of the image. If ``None``, it is determined from the image (max - min) + k1: Parameter of SSIM. Default: 0.01 + k2: Parameter of SSIM. Default: 0.03 + + Return: + Tensor with SSIM score + + Raises: + TypeError: + If ``preds`` and ``target`` don't have the same data type. + ValueError: + If ``preds`` and ``target`` don't have ``BxCxHxW shape``. + ValueError: + If the length of ``kernel_size`` or ``sigma`` is not ``2``. + ValueError: + If one of the elements of ``kernel_size`` is not an ``odd positive number``. + ValueError: + If one of the elements of ``sigma`` is not a ``positive number``. + + Example: + >>> from paddlemetrics.functional import ssim + >>> preds = B.rand([16, 1, 16, 16]) + >>> target = preds * 0.75 + >>> ssim(preds, target) + tensor(0.9219) + """ + preds, target = _ssim_update(preds, target) + return _ssim_compute(preds, target, kernel_size, sigma, reduction, data_range, k1, k2) diff --git a/RE/paddlemetric/src/paddlemetrics/functional/pairwise/__init__.py b/RE/paddlemetric/src/paddlemetrics/functional/pairwise/__init__.py new file mode 100644 index 00000000..1d28d0c4 --- /dev/null +++ b/RE/paddlemetric/src/paddlemetrics/functional/pairwise/__init__.py @@ -0,0 +1,17 @@ +# Copyright The PyTorch Lightning team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from paddlemetrics.functional.pairwise.cosine import pairwise_cosine_similarity # noqa: F401 +from paddlemetrics.functional.pairwise.euclidean import pairwise_euclidean_distance # noqa: F401 +from paddlemetrics.functional.pairwise.linear import pairwise_linear_similarity # noqa: F401 +from paddlemetrics.functional.pairwise.manhatten import pairwise_manhatten_distance # noqa: F401 diff --git a/RE/paddlemetric/src/paddlemetrics/functional/pairwise/cosine.py b/RE/paddlemetric/src/paddlemetrics/functional/pairwise/cosine.py new file mode 100644 index 00000000..cdd24e15 --- /dev/null +++ b/RE/paddlemetric/src/paddlemetrics/functional/pairwise/cosine.py @@ -0,0 +1,85 @@ +# Copyright The PyTorch Lightning team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from typing import Optional + +import paddleext.torchapi as B +from paddleext.torchapi import Tensor + +from paddlemetrics.functional.pairwise.helpers import _check_input, _reduce_distance_matrix + + +def _pairwise_cosine_similarity_update( + x: Tensor, y: Optional[Tensor] = None, zero_diagonal: Optional[bool] = None +) -> Tensor: + """Calculates the pairwise cosine similarity matrix. + + Args: + x: tensor of shape ``[N,d]`` + y: tensor of shape ``[M,d]`` + zero_diagonal: determines if the diagonal of the distance matrix should be set to zero + """ + x, y, zero_diagonal = _check_input(x, y, zero_diagonal) + + norm = B.norm(x, p=2, dim=1) + x /= norm.unsqueeze(1) + norm = B.norm(y, p=2, dim=1) + y /= norm.unsqueeze(1) + + distance = x @ y.T + if zero_diagonal: + distance.fill_diagonal_(0) + return distance + + +def pairwise_cosine_similarity( + x: Tensor, y: Optional[Tensor] = None, reduction: Optional[str] = None, zero_diagonal: Optional[bool] = None +) -> Tensor: + r""" + Calculates pairwise cosine similarity: + + .. math:: + s_{cos}(x,y) = \frac{}{||x|| \cdot ||y||} + = \frac{\sum_{d=1}^D x_d \cdot y_d }{\sqrt{\sum_{d=1}^D x_i^2} \cdot \sqrt{\sum_{d=1}^D x_i^2}} + + If both `x` and `y` are passed in, the calculation will be performed pairwise between the rows of `x` and `y`. + If only `x` is passed in, the calculation will be performed between the rows of `x`. + + Args: + x: Tensor with shape ``[N, d]`` + y: Tensor with shape ``[M, d]``, optional + reduction: reduction to apply along the last dimension. Choose between `'mean'`, `'sum'` + (applied along column dimension) or `'none'`, `None` for no reduction + zero_diagonal: if the diagonal of the distance matrix should be set to 0. If only `x` is given + this defaults to `True` else if `y` is also given it defaults to `False` + + Returns: + A ``[N,N]`` matrix of distances if only ``x`` is given, else a ``[N,M]`` matrix + + Example: + >>> import torchapi as B + >>> from paddlemetrics.functional import pairwise_cosine_similarity + >>> x = B.tensor([[2, 3], [3, 5], [5, 8]], dtype=B.float32) + >>> y = B.tensor([[1, 0], [2, 1]], dtype=B.float32) + >>> pairwise_cosine_similarity(x, y) + tensor([[0.5547, 0.8682], + [0.5145, 0.8437], + [0.5300, 0.8533]]) + >>> pairwise_cosine_similarity(x) + tensor([[0.0000, 0.9989, 0.9996], + [0.9989, 0.0000, 0.9998], + [0.9996, 0.9998, 0.0000]]) + + """ + distance = _pairwise_cosine_similarity_update(x, y, zero_diagonal) + return _reduce_distance_matrix(distance, reduction) diff --git a/RE/paddlemetric/src/paddlemetrics/functional/pairwise/euclidean.py b/RE/paddlemetric/src/paddlemetrics/functional/pairwise/euclidean.py new file mode 100644 index 00000000..fd31cd7f --- /dev/null +++ b/RE/paddlemetric/src/paddlemetrics/functional/pairwise/euclidean.py @@ -0,0 +1,79 @@ +# Copyright The PyTorch Lightning team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from typing import Optional + +from paddleext.torchapi import Tensor + +from paddlemetrics.functional.pairwise.helpers import _check_input, _reduce_distance_matrix + + +def _pairwise_euclidean_distance_update( + x: Tensor, y: Optional[Tensor] = None, zero_diagonal: Optional[bool] = None +) -> Tensor: + """Calculates the pairwise euclidean distance matrix. + + Args: + x: tensor of shape ``[N,d]`` + y: tensor of shape ``[M,d]`` + zero_diagonal: determines if the diagonal of the distance matrix should be set to zero + """ + x, y, zero_diagonal = _check_input(x, y, zero_diagonal) + x_norm = x.norm(dim=1, keepdim=True) + y_norm = y.norm(dim=1).T + distance = x_norm * x_norm + y_norm * y_norm - 2 * x.mm(y.T) + if zero_diagonal: + distance.fill_diagonal_(0) + return distance.sqrt() + + +def pairwise_euclidean_distance( + x: Tensor, y: Optional[Tensor] = None, reduction: Optional[str] = None, zero_diagonal: Optional[bool] = None +) -> Tensor: + r""" + Calculates pairwise euclidean distances: + + .. math:: + d_{euc}(x,y) = ||x - y||_2 = \sqrt{\sum_{d=1}^D (x_d - y_d)^2} + + If both `x` and `y` are passed in, the calculation will be performed pairwise between the rows of `x` and `y`. + If only `x` is passed in, the calculation will be performed between the rows of `x`. + + Args: + x: Tensor with shape ``[N, d]`` + y: Tensor with shape ``[M, d]``, optional + reduction: reduction to apply along the last dimension. Choose between `'mean'`, `'sum'` + (applied along column dimension) or `'none'`, `None` for no reduction + zero_diagonal: if the diagonal of the distance matrix should be set to 0. If only `x` is given + this defaults to `True` else if `y` is also given it defaults to `False` + + Returns: + A ``[N,N]`` matrix of distances if only ``x`` is given, else a ``[N,M]`` matrix + + Example: + >>> import torchapi as B + >>> from paddlemetrics.functional import pairwise_euclidean_distance + >>> x = B.tensor([[2, 3], [3, 5], [5, 8]], dtype=B.float32) + >>> y = B.tensor([[1, 0], [2, 1]], dtype=B.float32) + >>> pairwise_euclidean_distance(x, y) + tensor([[3.1623, 2.0000], + [5.3852, 4.1231], + [8.9443, 7.6158]]) + >>> pairwise_euclidean_distance(x) + tensor([[0.0000, 2.2361, 5.8310], + [2.2361, 0.0000, 3.6056], + [5.8310, 3.6056, 0.0000]]) + + """ + distance = _pairwise_euclidean_distance_update(x, y, zero_diagonal) + return _reduce_distance_matrix(distance, reduction) diff --git a/RE/paddlemetric/src/paddlemetrics/functional/pairwise/helpers.py b/RE/paddlemetric/src/paddlemetrics/functional/pairwise/helpers.py new file mode 100644 index 00000000..2d38916a --- /dev/null +++ b/RE/paddlemetric/src/paddlemetrics/functional/pairwise/helpers.py @@ -0,0 +1,59 @@ +# Copyright The PyTorch Lightning team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from typing import Optional, Tuple + +from paddleext.torchapi import Tensor + + +def _check_input( + x: Tensor, y: Optional[Tensor] = None, zero_diagonal: Optional[bool] = None +) -> Tuple[Tensor, Tensor, bool]: + """Check that input has the right dimensionality and sets the zero_diagonal argument if user has not provided + import module. + + Args: + x: tensor of shape ``[N,d]`` + y: if provided, a tensor of shape ``[M,d]`` + zero_diagonal: determines if the diagonal of the distance matrix should be set to zero + """ + if x.ndim != 2: + raise ValueError(f"Expected argument `x` to be a 2D tensor of shape `[N, d]` but got {x.shape}") + + if y is not None: + if y.ndim != 2 or y.shape[1] != x.shape[1]: + raise ValueError( + "Expected argument `y` to be a 2D tensor of shape `[M, d]` where" + " `d` should be same as the last dimension of `x`" + ) + zero_diagonal = False if zero_diagonal is None else zero_diagonal + else: + y = x.clone() + zero_diagonal = True if zero_diagonal is None else zero_diagonal + return x, y, zero_diagonal + + +def _reduce_distance_matrix(distmat: Tensor, reduction: Optional[str] = None) -> Tensor: + """Final reduction of distance matrix. + + Args: + distance: a ``[N,M]`` matrix + reduction: string determining how to reduce along last dimension + """ + if reduction == "mean": + return distmat.mean(dim=-1) + if reduction == "sum": + return distmat.sum(dim=-1) + if reduction is None or reduction == "none": + return distmat + raise ValueError(f"Expected reduction to be one of `['mean', 'sum', None]` but got {reduction}") diff --git a/RE/paddlemetric/src/paddlemetrics/functional/pairwise/linear.py b/RE/paddlemetric/src/paddlemetrics/functional/pairwise/linear.py new file mode 100644 index 00000000..08e79301 --- /dev/null +++ b/RE/paddlemetric/src/paddlemetrics/functional/pairwise/linear.py @@ -0,0 +1,78 @@ +# Copyright The PyTorch Lightning team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from typing import Optional + +from paddleext.torchapi import Tensor + +from paddlemetrics.functional.pairwise.helpers import _check_input, _reduce_distance_matrix + + +def _pairwise_linear_similarity_update( + x: Tensor, y: Optional[Tensor] = None, zero_diagonal: Optional[bool] = None +) -> Tensor: + """Calculates the pairwise linear similarity matrix. + + Args: + x: tensor of shape ``[N,d]`` + y: tensor of shape ``[M,d]`` + zero_diagonal: determines if the diagonal of the distance matrix should be set to zero + """ + x, y, zero_diagonal = _check_input(x, y, zero_diagonal) + + distance = x @ y.T + if zero_diagonal: + distance.fill_diagonal_(0) + return distance + + +def pairwise_linear_similarity( + x: Tensor, y: Optional[Tensor] = None, reduction: Optional[str] = None, zero_diagonal: Optional[bool] = None +) -> Tensor: + r""" + Calculates pairwise linear similarity: + + .. math:: + s_{lin}(x,y) = = \sum_{d=1}^D x_d \cdot y_d + + If both `x` and `y` are passed in, the calculation will be performed pairwise between the rows of `x` and `y`. + If only `x` is passed in, the calculation will be performed between the rows of `x`. + + Args: + x: Tensor with shape ``[N, d]`` + y: Tensor with shape ``[M, d]``, optional + reduction: reduction to apply along the last dimension. Choose between `'mean'`, `'sum'` + (applied along column dimension) or `'none'`, `None` for no reduction + zero_diagonal: if the diagonal of the distance matrix should be set to 0. If only `x` is given + this defaults to `True` else if `y` is also given it defaults to `False` + + Returns: + A ``[N,N]`` matrix of distances if only ``x`` is given, else a ``[N,M]`` matrix + + Example: + >>> import torchapi as B + >>> from paddlemetrics.functional import pairwise_linear_similarity + >>> x = B.tensor([[2, 3], [3, 5], [5, 8]], dtype=B.float32) + >>> y = B.tensor([[1, 0], [2, 1]], dtype=B.float32) + >>> pairwise_linear_similarity(x, y) + tensor([[ 2., 7.], + [ 3., 11.], + [ 5., 18.]]) + >>> pairwise_linear_similarity(x) + tensor([[ 0., 21., 34.], + [21., 0., 55.], + [34., 55., 0.]]) + + """ + distance = _pairwise_linear_similarity_update(x, y, zero_diagonal) + return _reduce_distance_matrix(distance, reduction) diff --git a/RE/paddlemetric/src/paddlemetrics/functional/pairwise/manhatten.py b/RE/paddlemetric/src/paddlemetrics/functional/pairwise/manhatten.py new file mode 100644 index 00000000..d0079bd6 --- /dev/null +++ b/RE/paddlemetric/src/paddlemetrics/functional/pairwise/manhatten.py @@ -0,0 +1,78 @@ +# Copyright The PyTorch Lightning team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from typing import Optional + +from paddleext.torchapi import Tensor + +from paddlemetrics.functional.pairwise.helpers import _check_input, _reduce_distance_matrix + + +def _pairwise_manhatten_distance_update( + x: Tensor, y: Optional[Tensor] = None, zero_diagonal: Optional[bool] = None +) -> Tensor: + """Calculates the pairwise manhatten similarity matrix. + + Args: + x: tensor of shape ``[N,d]`` + y: if provided, a tensor of shape ``[M,d]`` + zero_diagonal: determines if the diagonal of the distance matrix should be set to zero + """ + x, y, zero_diagonal = _check_input(x, y, zero_diagonal) + + distance = (x.unsqueeze(1) - y.unsqueeze(0).repeat(x.shape[0], 1, 1)).abs().sum(dim=-1) + if zero_diagonal: + distance.fill_diagonal_(0) + return distance + + +def pairwise_manhatten_distance( + x: Tensor, y: Optional[Tensor] = None, reduction: Optional[str] = None, zero_diagonal: Optional[bool] = None +) -> Tensor: + r""" + Calculates pairwise manhatten distance: + + .. math:: + d_{man}(x,y) = ||x-y||_1 = \sum_{d=1}^D |x_d - y_d| + + If both `x` and `y` are passed in, the calculation will be performed pairwise between the rows of `x` and `y`. + If only `x` is passed in, the calculation will be performed between the rows of `x`. + + Args: + x: Tensor with shape ``[N, d]`` + y: Tensor with shape ``[M, d]``, optional + reduction: reduction to apply along the last dimension. Choose between `'mean'`, `'sum'` + (applied along column dimension) or `'none'`, `None` for no reduction + zero_diagonal: if the diagonal of the distance matrix should be set to 0. If only `x` is given + this defaults to `True` else if `y` is also given it defaults to `False` + + Returns: + A ``[N,N]`` matrix of distances if only ``x`` is given, else a ``[N,M]`` matrix + + Example: + >>> import torchapi as B + >>> from paddlemetrics.functional import pairwise_manhatten_distance + >>> x = B.tensor([[2, 3], [3, 5], [5, 8]], dtype=B.float32) + >>> y = B.tensor([[1, 0], [2, 1]], dtype=B.float32) + >>> pairwise_manhatten_distance(x, y) + tensor([[ 4., 2.], + [ 7., 5.], + [12., 10.]]) + >>> pairwise_manhatten_distance(x) + tensor([[0., 3., 8.], + [3., 0., 5.], + [8., 5., 0.]]) + + """ + distance = _pairwise_manhatten_distance_update(x, y, zero_diagonal) + return _reduce_distance_matrix(distance, reduction) diff --git a/RE/paddlemetric/src/paddlemetrics/functional/regression/__init__.py b/RE/paddlemetric/src/paddlemetrics/functional/regression/__init__.py new file mode 100644 index 00000000..7ddc6040 --- /dev/null +++ b/RE/paddlemetric/src/paddlemetrics/functional/regression/__init__.py @@ -0,0 +1,27 @@ +# Copyright The PyTorch Lightning team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from paddlemetrics.functional.image.psnr import psnr # noqa: F401 +from paddlemetrics.functional.image.ssim import ssim # noqa: F401 +from paddlemetrics.functional.regression.cosine_similarity import cosine_similarity # noqa: F401 +from paddlemetrics.functional.regression.explained_variance import explained_variance # noqa: F401 +from paddlemetrics.functional.regression.mean_absolute_error import mean_absolute_error # noqa: F401 +from paddlemetrics.functional.regression.mean_absolute_percentage_error import ( # noqa: F401 + mean_absolute_percentage_error, +) +from paddlemetrics.functional.regression.mean_squared_error import mean_squared_error # noqa: F401 +from paddlemetrics.functional.regression.mean_squared_log_error import mean_squared_log_error # noqa: F401 +from paddlemetrics.functional.regression.pearson import pearson_corrcoef # noqa: F401 +from paddlemetrics.functional.regression.r2 import r2_score # noqa: F401 +from paddlemetrics.functional.regression.spearman import spearman_corrcoef # noqa: F401 +from paddlemetrics.functional.regression.tweedie_deviance import tweedie_deviance_score # noqa: F401 diff --git a/RE/paddlemetric/src/paddlemetrics/functional/regression/cosine_similarity.py b/RE/paddlemetric/src/paddlemetrics/functional/regression/cosine_similarity.py new file mode 100644 index 00000000..ea0f77a3 --- /dev/null +++ b/RE/paddlemetric/src/paddlemetrics/functional/regression/cosine_similarity.py @@ -0,0 +1,98 @@ +# Copyright The PyTorch Lightning team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from typing import Tuple + +import paddleext.torchapi as B +from paddleext.torchapi import Tensor + +from paddlemetrics.utilities.checks import _check_same_shape + + +def _cosine_similarity_update( + preds: Tensor, + target: Tensor, +) -> Tuple[Tensor, Tensor]: + """Updates and returns variables required to compute Cosine Similarity. Checks for same shape of input tensors. + + Args: + preds: Predicted tensor + target: Ground truth tensor + """ + + _check_same_shape(preds, target) + preds = preds.float() + target = target.float() + + return preds, target + + +def _cosine_similarity_compute(preds: Tensor, target: Tensor, reduction: str = "sum") -> Tensor: + """Computes Cosine Similarity. + + Args: + preds: Predicted tensor + target: Ground truth tensor + reduction: + The method of reducing along the batch dimension using sum, mean or taking the individual scores + + Example: + >>> target = B.tensor([[1, 2, 3, 4], [1, 2, 3, 4]]) + >>> preds = B.tensor([[1, 2, 3, 4], [-1, -2, -3, -4]]) + >>> preds, target = _cosine_similarity_update(preds, target) + >>> _cosine_similarity_compute(preds, target, 'none') + tensor([ 1.0000, -1.0000]) + """ + + dot_product = (preds * target).sum(dim=-1) + preds_norm = preds.norm(dim=-1) + target_norm = target.norm(dim=-1) + similarity = dot_product / (preds_norm * target_norm) + reduction_mapping = { + "sum": B.sum, + "mean": B.mean, + "none": lambda x: x, + None: lambda x: x, + } + return reduction_mapping[reduction](similarity) + + +def cosine_similarity(preds: Tensor, target: Tensor, reduction: str = "sum") -> Tensor: + r""" + Computes the `Cosine Similarity`_ + between targets and predictions: + + .. math:: + cos_{sim}(x,y) = \frac{x \cdot y}{||x|| \cdot ||y||} = + \frac{\sum_{i=1}^n x_i y_i}{\sqrt{\sum_{i=1}^n x_i^2}\sqrt{\sum_{i=1}^n y_i^2}} + + where :math:`y` is a tensor of target values, and :math:`x` is a tensor of predictions. + + Args: + preds: Predicted tensor with shape ``(N,d)`` + target: Ground truth tensor with shape ``(N,d)`` + reduction: + The method of reducing along the batch dimension using sum, mean or taking the individual scores + + Example: + >>> from paddlemetrics.functional.regression import cosine_similarity + >>> target = B.tensor([[1, 2, 3, 4], + ... [1, 2, 3, 4]]) + >>> preds = B.tensor([[1, 2, 3, 4], + ... [-1, -2, -3, -4]]) + >>> cosine_similarity(preds, target, 'none') + tensor([ 1.0000, -1.0000]) + + """ + preds, target = _cosine_similarity_update(preds, target) + return _cosine_similarity_compute(preds, target, reduction) diff --git a/RE/paddlemetric/src/paddlemetrics/functional/regression/explained_variance.py b/RE/paddlemetric/src/paddlemetrics/functional/regression/explained_variance.py new file mode 100644 index 00000000..95ef6acf --- /dev/null +++ b/RE/paddlemetric/src/paddlemetrics/functional/regression/explained_variance.py @@ -0,0 +1,137 @@ +# Copyright The PyTorch Lightning team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from typing import Sequence, Tuple, Union + +import paddleext.torchapi as B +from paddleext.torchapi import Tensor + +from paddlemetrics.utilities.checks import _check_same_shape + + +def _explained_variance_update(preds: Tensor, target: Tensor) -> Tuple[int, Tensor, Tensor, Tensor, Tensor]: + """Updates and returns variables required to compute Explained Variance. Checks for same shape of input + tensors. + + Args: + preds: Predicted tensor + target: Ground truth tensor + """ + + _check_same_shape(preds, target) + + n_obs = preds.size(0) + sum_error = B.sum(target - preds, dim=0) + diff = target - preds + sum_squared_error = B.sum(diff * diff, dim=0) + + sum_target = B.sum(target, dim=0) + sum_squared_target = B.sum(target * target, dim=0) + + return n_obs, sum_error, sum_squared_error, sum_target, sum_squared_target + + +def _explained_variance_compute( + n_obs: Tensor, + sum_error: Tensor, + sum_squared_error: Tensor, + sum_target: Tensor, + sum_squared_target: Tensor, + multioutput: str = "uniform_average", +) -> Tensor: + """Computes Explained Variance. + + Args: + n_obs: Number of predictions or observations + sum_error: Sum of errors over all observations + sum_squared_error: Sum of square of errors over all observations + sum_target: Sum of target values + sum_squared_target: Sum of squares of target values + multioutput: Defines aggregation in the case of multiple output scores. Can be one + of the following strings (default is `'uniform_average'`.): + + * `'raw_values'` returns full set of scores + * `'uniform_average'` scores are uniformly averaged + * `'variance_weighted'` scores are weighted by their individual variances + + Example: + >>> target = B.tensor([[0.5, 1], [-1, 1], [7, -6]]) + >>> preds = B.tensor([[0, 2], [-1, 2], [8, -5]]) + >>> n_obs, sum_error, ss_error, sum_target, ss_target = _explained_variance_update(preds, target) + >>> _explained_variance_compute(n_obs, sum_error, ss_error, sum_target, ss_target, multioutput='raw_values') + tensor([0.9677, 1.0000]) + """ + + diff_avg = sum_error / n_obs + numerator = sum_squared_error / n_obs - (diff_avg * diff_avg) + + target_avg = sum_target / n_obs + denominator = sum_squared_target / n_obs - (target_avg * target_avg) + + # Take care of division by zero + nonzero_numerator = numerator != 0 + nonzero_denominator = denominator != 0 + valid_score = nonzero_numerator & nonzero_denominator + output_scores = B.ones_like(diff_avg) + output_scores[valid_score] = 1.0 - (numerator[valid_score] / denominator[valid_score]) + output_scores[nonzero_numerator & ~nonzero_denominator] = 0.0 + + # Decide what to do in multioutput case + # Todo: allow user to pass in tensor with weights + if multioutput == "raw_values": + return output_scores + if multioutput == "uniform_average": + return B.mean(output_scores) + if multioutput == "variance_weighted": + denom_sum = B.sum(denominator) + return B.sum(denominator / denom_sum * output_scores) + + +def explained_variance( + preds: Tensor, + target: Tensor, + multioutput: str = "uniform_average", +) -> Union[Tensor, Sequence[Tensor]]: + """Computes explained variance. + + Args: + preds: estimated labels + target: ground truth labels + multioutput: Defines aggregation in the case of multiple output scores. Can be one + of the following strings (default is `'uniform_average'`.): + + * `'raw_values'` returns full set of scores + * `'uniform_average'` scores are uniformly averaged + * `'variance_weighted'` scores are weighted by their individual variances + + Example: + >>> from paddlemetrics.functional import explained_variance + >>> target = B.tensor([3, -0.5, 2, 7]) + >>> preds = B.tensor([2.5, 0.0, 2, 8]) + >>> explained_variance(preds, target) + tensor(0.9572) + + >>> target = B.tensor([[0.5, 1], [-1, 1], [7, -6]]) + >>> preds = B.tensor([[0, 2], [-1, 2], [8, -5]]) + >>> explained_variance(preds, target, multioutput='raw_values') + tensor([0.9677, 1.0000]) + """ + n_obs, sum_error, sum_squared_error, sum_target, sum_squared_target = _explained_variance_update(preds, target) + return _explained_variance_compute( + n_obs, + sum_error, + sum_squared_error, + sum_target, + sum_squared_target, + multioutput, + ) diff --git a/RE/paddlemetric/src/paddlemetrics/functional/regression/mean_absolute_error.py b/RE/paddlemetric/src/paddlemetrics/functional/regression/mean_absolute_error.py new file mode 100644 index 00000000..1ddb4153 --- /dev/null +++ b/RE/paddlemetric/src/paddlemetrics/functional/regression/mean_absolute_error.py @@ -0,0 +1,73 @@ +# Copyright The PyTorch Lightning team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from typing import Tuple + +import paddleext.torchapi as B +from paddleext.torchapi import Tensor + +from paddlemetrics.utilities.checks import _check_same_shape + + +def _mean_absolute_error_update(preds: Tensor, target: Tensor) -> Tuple[Tensor, int]: + """Updates and returns variables required to compute Mean Absolute Error. Checks for same shape of input + tensors. + + Args: + preds: Predicted tensor + target: Ground truth tensor + """ + + _check_same_shape(preds, target) + sum_abs_error = B.sum(B.abs(preds - target)) + n_obs = target.numel() + return sum_abs_error, n_obs + + +def _mean_absolute_error_compute(sum_abs_error: Tensor, n_obs: int) -> Tensor: + """Computes Mean Absolute Error. + + Args: + sum_abs_error: Sum of absolute value of errors over all observations + n_obs: Number of predictions or observations + + Example: + >>> preds = B.tensor([0., 1, 2, 3]) + >>> target = B.tensor([0., 1, 2, 2]) + >>> sum_abs_error, n_obs = _mean_absolute_error_update(preds, target) + >>> _mean_absolute_error_compute(sum_abs_error, n_obs) + tensor(0.2500) + """ + + return sum_abs_error / n_obs + + +def mean_absolute_error(preds: Tensor, target: Tensor) -> Tensor: + """Computes mean absolute error. + + Args: + preds: estimated labels + target: ground truth labels + + Return: + Tensor with MAE + + Example: + >>> from paddlemetrics.functional import mean_absolute_error + >>> x = B.tensor([0., 1, 2, 3]) + >>> y = B.tensor([0., 1, 2, 2]) + >>> mean_absolute_error(x, y) + tensor(0.2500) + """ + sum_abs_error, n_obs = _mean_absolute_error_update(preds, target) + return _mean_absolute_error_compute(sum_abs_error, n_obs) diff --git a/RE/paddlemetric/src/paddlemetrics/functional/regression/mean_absolute_percentage_error.py b/RE/paddlemetric/src/paddlemetrics/functional/regression/mean_absolute_percentage_error.py new file mode 100644 index 00000000..862617c0 --- /dev/null +++ b/RE/paddlemetric/src/paddlemetrics/functional/regression/mean_absolute_percentage_error.py @@ -0,0 +1,91 @@ +# Copyright The PyTorch Lightning team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from typing import Tuple + +import paddleext.torchapi as B +from paddleext.torchapi import Tensor + +from paddlemetrics.utilities.checks import _check_same_shape + + +def _mean_absolute_percentage_error_update( + preds: Tensor, + target: Tensor, + epsilon: float = 1.17e-06, +) -> Tuple[Tensor, int]: + """Updates and returns variables required to compute Mean Percentage Error. Checks for same shape of input + tensors. + + Args: + preds: Predicted tensor + target: Ground truth tensor + epsilon: Specifies the lower bound for target values. Any target value below epsilon + is set to epsilon (avoids ZeroDivisionError). default: 1.17e-06 + """ + + _check_same_shape(preds, target) + + abs_diff = B.abs(preds - target) + abs_per_error = abs_diff / B.clamp(B.abs(target), min=epsilon) + + sum_abs_per_error = B.sum(abs_per_error) + + num_obs = target.numel() + + return sum_abs_per_error, num_obs + + +def _mean_absolute_percentage_error_compute(sum_abs_per_error: Tensor, num_obs: int) -> Tensor: + """Computes Mean Absolute Percentage Error. + + Args: + sum_abs_per_error: Sum of absolute value of percentage errors over all observations + (percentage error = (target - prediction) / target) + num_obs: Number of predictions or observations + + Example: + >>> target = B.tensor([1, 10, 1e6]) + >>> preds = B.tensor([0.9, 15, 1.2e6]) + >>> sum_abs_per_error, num_obs = _mean_absolute_percentage_error_update(preds, target) + >>> _mean_absolute_percentage_error_compute(sum_abs_per_error, num_obs) + tensor(0.2667) + """ + + return sum_abs_per_error / num_obs + + +def mean_absolute_percentage_error(preds: Tensor, target: Tensor) -> Tensor: + """Computes mean absolute percentage error. + + Args: + preds: estimated labels + target: ground truth labels + + Return: + Tensor with MAPE + + Note: + The epsilon value is taken from `scikit-learn's implementation of MAPE`_. + + Example: + >>> from paddlemetrics.functional import mean_absolute_percentage_error + >>> target = B.tensor([1, 10, 1e6]) + >>> preds = B.tensor([0.9, 15, 1.2e6]) + >>> mean_absolute_percentage_error(preds, target) + tensor(0.2667) + """ + sum_abs_per_error, num_obs = _mean_absolute_percentage_error_update(preds, target) + mean_ape = _mean_absolute_percentage_error_compute(sum_abs_per_error, num_obs) + + return mean_ape diff --git a/RE/paddlemetric/src/paddlemetrics/functional/regression/mean_squared_error.py b/RE/paddlemetric/src/paddlemetrics/functional/regression/mean_squared_error.py new file mode 100644 index 00000000..58af5d21 --- /dev/null +++ b/RE/paddlemetric/src/paddlemetrics/functional/regression/mean_squared_error.py @@ -0,0 +1,74 @@ +# Copyright The PyTorch Lightning team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from typing import Tuple + +import paddleext.torchapi as B +from paddleext.torchapi import Tensor + +from paddlemetrics.utilities.checks import _check_same_shape + + +def _mean_squared_error_update(preds: Tensor, target: Tensor) -> Tuple[Tensor, int]: + """Updates and returns variables required to compute Mean Squared Error. Checks for same shape of input + tensors. + + Args: + preds: Predicted tensor + target: Ground truth tensor + """ + _check_same_shape(preds, target) + diff = preds - target + sum_squared_error = B.sum(diff * diff) + n_obs = target.numel() + return sum_squared_error, n_obs + + +def _mean_squared_error_compute(sum_squared_error: Tensor, n_obs: int, squared: bool = True) -> Tensor: + """Computes Mean Squared Error. + + Args: + sum_squared_error: Sum of square of errors over all observations + n_obs: Number of predictions or observations + squared: Returns RMSE value if set to False. default: True + + Example: + >>> preds = B.tensor([0., 1, 2, 3]) + >>> target = B.tensor([0., 1, 2, 2]) + >>> sum_squared_error, n_obs = _mean_squared_error_update(preds, target) + >>> _mean_squared_error_compute(sum_squared_error, n_obs) + tensor(0.2500) + """ + return sum_squared_error / n_obs if squared else B.sqrt(sum_squared_error / n_obs) + + +def mean_squared_error(preds: Tensor, target: Tensor, squared: bool = True) -> Tensor: + """Computes mean squared error. + + Args: + preds: estimated labels + target: ground truth labels + squared: returns RMSE value if set to False + + Return: + Tensor with MSE + + Example: + >>> from paddlemetrics.functional import mean_squared_error + >>> x = B.tensor([0., 1, 2, 3]) + >>> y = B.tensor([0., 1, 2, 2]) + >>> mean_squared_error(x, y) + tensor(0.2500) + """ + sum_squared_error, n_obs = _mean_squared_error_update(preds, target) + return _mean_squared_error_compute(sum_squared_error, n_obs, squared=squared) diff --git a/RE/paddlemetric/src/paddlemetrics/functional/regression/mean_squared_log_error.py b/RE/paddlemetric/src/paddlemetrics/functional/regression/mean_squared_log_error.py new file mode 100644 index 00000000..7270ffc0 --- /dev/null +++ b/RE/paddlemetric/src/paddlemetrics/functional/regression/mean_squared_log_error.py @@ -0,0 +1,76 @@ +# Copyright The PyTorch Lightning team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from typing import Tuple + +import paddleext.torchapi as B +from paddleext.torchapi import Tensor + +from paddlemetrics.utilities.checks import _check_same_shape + + +def _mean_squared_log_error_update(preds: Tensor, target: Tensor) -> Tuple[Tensor, int]: + """Returns variables required to compute Mean Squared Log Error. Checks for same shape of tensors. + + Args: + preds: Predicted tensor + target: Ground truth tensor + """ + + _check_same_shape(preds, target) + sum_squared_log_error = B.sum(B.pow(B.log1p(preds) - B.log1p(target), 2)) + n_obs = target.numel() + return sum_squared_log_error, n_obs + + +def _mean_squared_log_error_compute(sum_squared_log_error: Tensor, n_obs: int) -> Tensor: + """Computes Mean Squared Log Error. + + Args: + sum_squared_log_error: Sum of square of log errors over all observations + (log error = log(target) - log(prediction)) + n_obs: Number of predictions or observations + + Example: + >>> preds = B.tensor([0., 1, 2, 3]) + >>> target = B.tensor([0., 1, 2, 2]) + >>> sum_squared_log_error, n_obs = _mean_squared_log_error_update(preds, target) + >>> _mean_squared_log_error_compute(sum_squared_log_error, n_obs) + tensor(0.0207) + """ + + return sum_squared_log_error / n_obs + + +def mean_squared_log_error(preds: Tensor, target: Tensor) -> Tensor: + """Computes mean squared log error. + + Args: + preds: estimated labels + target: ground truth labels + + Return: + Tensor with RMSLE + + Example: + >>> from paddlemetrics.functional import mean_squared_log_error + >>> x = B.tensor([0., 1, 2, 3]) + >>> y = B.tensor([0., 1, 2, 2]) + >>> mean_squared_log_error(x, y) + tensor(0.0207) + + .. note:: + Half precision is only support on GPU for this metric + """ + sum_squared_log_error, n_obs = _mean_squared_log_error_update(preds, target) + return _mean_squared_log_error_compute(sum_squared_log_error, n_obs) diff --git a/RE/paddlemetric/src/paddlemetrics/functional/regression/pearson.py b/RE/paddlemetric/src/paddlemetrics/functional/regression/pearson.py new file mode 100644 index 00000000..e1f7dd82 --- /dev/null +++ b/RE/paddlemetric/src/paddlemetrics/functional/regression/pearson.py @@ -0,0 +1,102 @@ +# Copyright The PyTorch Lightning team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from typing import Tuple + +import paddleext.torchapi as B +from paddleext.torchapi import Tensor + +from paddlemetrics.utilities.checks import _check_same_shape + + +def _pearson_corrcoef_update( + preds: Tensor, + target: Tensor, + mean_x: Tensor, + mean_y: Tensor, + var_x: Tensor, + var_y: Tensor, + corr_xy: Tensor, + n_prior: Tensor, +) -> Tuple[Tensor, Tensor, Tensor, Tensor, Tensor, Tensor]: + """Updates and returns variables required to compute Pearson Correlation Coefficient. Checks for same shape of + input tensors. + + Args: + mean_x: current mean estimate of x tensor + mean_y: current mean estimate of y tensor + var_x: current variance estimate of x tensor + var_y: current variance estimate of y tensor + corr_xy: current covariance estimate between x and y tensor + n_prior: current number of observed observations + """ + # Data checking + _check_same_shape(preds, target) + preds = preds.squeeze() + target = target.squeeze() + if preds.ndim > 1 or target.ndim > 1: + raise ValueError("Expected both predictions and target to be 1 dimensional tensors.") + + n_obs = preds.numel() + mx_new = (n_prior * mean_x + preds.mean() * n_obs) / (n_prior + n_obs) + my_new = (n_prior * mean_y + target.mean() * n_obs) / (n_prior + n_obs) + n_prior += n_obs + var_x += ((preds - mx_new) * (preds - mean_x)).sum() + var_y += ((target - my_new) * (target - mean_y)).sum() + corr_xy += ((preds - mx_new) * (target - mean_y)).sum() + mean_x = mx_new + mean_y = my_new + + return mean_x, mean_y, var_x, var_y, corr_xy, n_prior + + +def _pearson_corrcoef_compute( + var_x: Tensor, + var_y: Tensor, + corr_xy: Tensor, + nb: Tensor, +) -> Tensor: + """Computes the final pearson correlation based on accumulated statistics. + + Args: + var_x: variance estimate of x tensor + var_y: variance estimate of y tensor + corr_xy: covariance estimate between x and y tensor + nb: number of observations + """ + var_x /= nb - 1 + var_y /= nb - 1 + corr_xy /= nb - 1 + corrcoef = (corr_xy / (var_x * var_y).sqrt()).squeeze() + return B.clamp(corrcoef, -1.0, 1.0) + + +def pearson_corrcoef(preds: Tensor, target: Tensor) -> Tensor: + """Computes pearson correlation coefficient. + + Args: + preds: estimated scores + target: ground truth scores + + Example: + >>> from paddlemetrics.functional import pearson_corrcoef + >>> target = B.tensor([3, -0.5, 2, 7]) + >>> preds = B.tensor([2.5, 0.0, 2, 8]) + >>> pearson_corrcoef(preds, target) + tensor(0.9849) + """ + _temp = B.zeros(1, dtype=preds.dtype, device=preds.device) + mean_x, mean_y, var_x = _temp.clone(), _temp.clone(), _temp.clone() + var_y, corr_xy, nb = _temp.clone(), _temp.clone(), _temp.clone() + _, _, var_x, var_y, corr_xy, nb = _pearson_corrcoef_update(preds, target, mean_x, mean_y, var_x, var_y, corr_xy, nb) + return _pearson_corrcoef_compute(var_x, var_y, corr_xy, nb) diff --git a/RE/paddlemetric/src/paddlemetrics/functional/regression/r2.py b/RE/paddlemetric/src/paddlemetrics/functional/regression/r2.py new file mode 100644 index 00000000..a8321912 --- /dev/null +++ b/RE/paddlemetric/src/paddlemetrics/functional/regression/r2.py @@ -0,0 +1,173 @@ +# Copyright The PyTorch Lightning team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from typing import Tuple + +import paddleext.torchapi as B +from paddleext.torchapi import Tensor + +from paddlemetrics.utilities import rank_zero_warn +from paddlemetrics.utilities.checks import _check_same_shape + + +def _r2_score_update(preds: Tensor, target: Tensor) -> Tuple[Tensor, Tensor, Tensor, Tensor]: + """Updates and returns variables required to compute R2 score. Checks for same shape and 1D/2D input tensors. + + Args: + preds: Predicted tensor + target: Ground truth tensor + """ + + _check_same_shape(preds, target) + if preds.ndim > 2: + raise ValueError( + "Expected both prediction and target to be 1D or 2D tensors," + f" but received tensors with dimension {preds.shape}" + ) + + sum_obs = B.sum(target, dim=0) + sum_squared_obs = B.sum(target * target, dim=0) + residual = target - preds + rss = B.sum(residual * residual, dim=0) + n_obs = target.size(0) + + return sum_squared_obs, sum_obs, rss, n_obs + + +def _r2_score_compute( + sum_squared_obs: Tensor, + sum_obs: Tensor, + rss: Tensor, + n_obs: Tensor, + adjusted: int = 0, + multioutput: str = "uniform_average", +) -> Tensor: + """Computes R2 score. + + Args: + sum_squared_obs: Sum of square of all observations + sum_obs: Sum of all observations + rss: Residual sum of squares + n_obs: Number of predictions or observations + adjusted: number of independent regressors for calculating adjusted r2 score. + Default 0 (standard r2 score). + multioutput: Defines aggregation in the case of multiple output scores. Can be one + of the following strings (default is `'uniform_average'`.): + + * `'raw_values'` returns full set of scores + * `'uniform_average'` scores are uniformly averaged + * `'variance_weighted'` scores are weighted by their individual variances + + Example: + >>> target = B.tensor([[0.5, 1], [-1, 1], [7, -6]]) + >>> preds = B.tensor([[0, 2], [-1, 2], [8, -5]]) + >>> sum_squared_obs, sum_obs, rss, n_obs = _r2_score_update(preds, target) + >>> _r2_score_compute(sum_squared_obs, sum_obs, rss, n_obs, multioutput="raw_values") + tensor([0.9654, 0.9082]) + """ + if n_obs < 2: + raise ValueError("Needs at least two samples to calculate r2 score.") + + mean_obs = sum_obs / n_obs + tss = sum_squared_obs - sum_obs * mean_obs + raw_scores = 1 - (rss / tss) + + if multioutput == "raw_values": + r2 = raw_scores + elif multioutput == "uniform_average": + r2 = B.mean(raw_scores) + elif multioutput == "variance_weighted": + tss_sum = B.sum(tss) + r2 = B.sum(tss / tss_sum * raw_scores) + else: + raise ValueError( + "Argument `multioutput` must be either `raw_values`," + f" `uniform_average` or `variance_weighted`. Received {multioutput}." + ) + + if adjusted < 0 or not isinstance(adjusted, int): + raise ValueError("`adjusted` parameter should be an integer larger or" " equal to 0.") + + if adjusted != 0: + if adjusted > n_obs - 1: + rank_zero_warn( + "More independent regressions than data points in" + " adjusted r2 score. Falls back to standard r2 score.", + UserWarning, + ) + elif adjusted == n_obs - 1: + rank_zero_warn("Division by zero in adjusted r2 score. Falls back to" " standard r2 score.", UserWarning) + else: + r2 = 1 - (1 - r2) * (n_obs - 1) / (n_obs - adjusted - 1) + return r2 + + +def r2_score( + preds: Tensor, + target: Tensor, + adjusted: int = 0, + multioutput: str = "uniform_average", +) -> Tensor: + r""" + Computes r2 score also known as `R2 Score_Coefficient Determination`_: + + .. math:: R^2 = 1 - \frac{SS_{res}}{SS_{tot}} + + where :math:`SS_{res}=\sum_i (y_i - f(x_i))^2` is the sum of residual squares, and + :math:`SS_{tot}=\sum_i (y_i - \bar{y})^2` is total sum of squares. Can also calculate + adjusted r2 score given by + + .. math:: R^2_{adj} = 1 - \frac{(1-R^2)(n-1)}{n-k-1} + + where the parameter :math:`k` (the number of independent regressors) should + be provided as the ``adjusted`` argument. + + Args: + preds: estimated labels + target: ground truth labels + adjusted: number of independent regressors for calculating adjusted r2 score. + Default 0 (standard r2 score). + multioutput: Defines aggregation in the case of multiple output scores. Can be one + of the following strings (default is ``'uniform_average'``.): + + * ``'raw_values'`` returns full set of scores + * ``'uniform_average'`` scores are uniformly averaged + * ``'variance_weighted'`` scores are weighted by their individual variances + + Raises: + ValueError: + If both ``preds`` and ``targets`` are not ``1D`` or ``2D`` tensors. + ValueError: + If ``len(preds)`` is less than ``2`` + since at least ``2`` sampels are needed to calculate r2 score. + ValueError: + If ``multioutput`` is not one of ``raw_values``, + ``uniform_average`` or ``variance_weighted``. + ValueError: + If ``adjusted`` is not an ``integer`` greater than ``0``. + + Example: + >>> from paddlemetrics.functional import r2_score + >>> target = B.tensor([3, -0.5, 2, 7]) + >>> preds = B.tensor([2.5, 0.0, 2, 8]) + >>> r2_score(preds, target) + tensor(0.9486) + + >>> target = B.tensor([[0.5, 1], [-1, 1], [7, -6]]) + >>> preds = B.tensor([[0, 2], [-1, 2], [8, -5]]) + >>> r2_score(preds, target, multioutput='raw_values') + tensor([0.9654, 0.9082]) + + """ + sum_squared_obs, sum_obs, rss, n_obs = _r2_score_update(preds, target) + return _r2_score_compute(sum_squared_obs, sum_obs, rss, n_obs, adjusted, multioutput) diff --git a/RE/paddlemetric/src/paddlemetrics/functional/regression/spearman.py b/RE/paddlemetric/src/paddlemetrics/functional/regression/spearman.py new file mode 100644 index 00000000..62f7a9d4 --- /dev/null +++ b/RE/paddlemetric/src/paddlemetrics/functional/regression/spearman.py @@ -0,0 +1,129 @@ +# Copyright The PyTorch Lightning team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from typing import Tuple + +import paddleext.torchapi as B +from paddleext.torchapi import Tensor + +from paddlemetrics.utilities.checks import _check_same_shape + + +def _find_repeats(data: Tensor) -> Tensor: + """find and return values which have repeats i.e. the same value are more than once in the tensor.""" + temp = data.detach().clone() + temp = temp.sort()[0] + + change = B.cat([B.tensor([True], device=temp.device), temp[1:] != temp[:-1]]) + unique = temp[change] + change_idx = B.cat([B.nonzero(change), B.tensor([[temp.numel()]], device=temp.device)]).flatten() + freq = change_idx[1:] - change_idx[:-1] + atleast2 = freq > 1 + return unique[atleast2] + + +def _rank_data(data: Tensor) -> Tensor: + """Calculate the rank for each element of a tensor. The rank refers to the indices of an element in the + corresponding sorted tensor (starting from 1). Duplicates of the same value will be assigned the mean of their + rank. + + Adopted from: `Rank of element tensor`_ + """ + n = data.numel() + rank = B.empty_like(data) + idx = data.argsort() + rank[idx[:n]] = B.arange(1, n + 1, dtype=data.dtype, device=data.device) + + repeats = _find_repeats(data) + for r in repeats: + condition = data == r + rank[condition] = rank[condition].mean() + return rank + + +def _spearman_corrcoef_update(preds: Tensor, target: Tensor) -> Tuple[Tensor, Tensor]: + """Updates and returns variables required to compute Spearman Correlation Coefficient. Checks for same shape + and type of input tensors. + + Args: + preds: Predicted tensor + target: Ground truth tensor + """ + + if preds.dtype != target.dtype: + raise TypeError( + "Expected `preds` and `target` to have the same data type." + f" Got preds: {preds.dtype} and target: {target.dtype}." + ) + _check_same_shape(preds, target) + preds = preds.squeeze() + target = target.squeeze() + if preds.ndim > 1 or target.ndim > 1: + raise ValueError("Expected both predictions and target to be 1 dimensional tensors.") + return preds, target + + +def _spearman_corrcoef_compute(preds: Tensor, target: Tensor, eps: float = 1e-6) -> Tensor: + """Computes Spearman Correlation Coefficient. + + Args: + preds: Predicted tensor + target: Ground truth tensor + eps: Avoids ZeroDivisionError. default: 1e-6 + + Example: + >>> target = B.tensor([3, -0.5, 2, 7]) + >>> preds = B.tensor([2.5, 0.0, 2, 8]) + >>> preds, target = _spearman_corrcoef_update(preds, target) + >>> _spearman_corrcoef_compute(preds, target) + tensor(1.0000) + """ + + preds = _rank_data(preds) + target = _rank_data(target) + + preds_diff = preds - preds.mean() + target_diff = target - target.mean() + + cov = (preds_diff * target_diff).mean() + preds_std = B.sqrt((preds_diff * preds_diff).mean()) + target_std = B.sqrt((target_diff * target_diff).mean()) + + corrcoef = cov / (preds_std * target_std + eps) + return B.clamp(corrcoef, -1.0, 1.0) + + +def spearman_corrcoef(preds: Tensor, target: Tensor) -> Tensor: + r""" + Computes `spearmans rank correlation coefficient`_: + + .. math: + r_s = = \frac{cov(rg_x, rg_y)}{\sigma_{rg_x} * \sigma_{rg_y}} + + where :math:`rg_x` and :math:`rg_y` are the rank associated to the variables x and y. Spearmans correlations + coefficient corresponds to the standard pearsons correlation coefficient calculated on the rank variables. + + Args: + preds: estimated scores + target: ground truth scores + + Example: + >>> from paddlemetrics.functional import spearman_corrcoef + >>> target = B.tensor([3, -0.5, 2, 7]) + >>> preds = B.tensor([2.5, 0.0, 2, 8]) + >>> spearman_corrcoef(preds, target) + tensor(1.0000) + + """ + preds, target = _spearman_corrcoef_update(preds, target) + return _spearman_corrcoef_compute(preds, target) diff --git a/RE/paddlemetric/src/paddlemetrics/functional/regression/symmetric_mean_absolute_percentage_error.py b/RE/paddlemetric/src/paddlemetrics/functional/regression/symmetric_mean_absolute_percentage_error.py new file mode 100644 index 00000000..89eadf9e --- /dev/null +++ b/RE/paddlemetric/src/paddlemetrics/functional/regression/symmetric_mean_absolute_percentage_error.py @@ -0,0 +1,99 @@ +# Copyright The PyTorch Lightning team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from typing import Tuple + +import paddleext.torchapi as B +from paddleext.torchapi import Tensor + +from paddlemetrics.utilities.checks import _check_same_shape + + +def _symmetric_mean_absolute_percentage_error_update( + preds: Tensor, + target: Tensor, + epsilon: float = 1.17e-06, +) -> Tuple[Tensor, int]: + """Updates and returns variables required to compute Symmetric Mean Absolute Percentage Error. Checks for same + shape of input tensors. + + Args: + preds: Predicted tensor + target: Ground truth tensor + epsilon: Avoids ZeroDivisionError. default: 1.17e-06 + """ + + _check_same_shape(preds, target) + + abs_diff = B.abs(preds - target) + abs_per_error = abs_diff / B.clamp(B.abs(target) + B.abs(preds), min=epsilon) + + sum_abs_per_error = 2 * B.sum(abs_per_error) + + num_obs = target.numel() + + return sum_abs_per_error, num_obs + + +def _symmetric_mean_absolute_percentage_error_compute(sum_abs_per_error: Tensor, num_obs: int) -> Tensor: + """Computes Symmetric Mean Absolute Percentage Error. + + Args: + sum_abs_per_error: Sum of values of symmetric absolute percentage errors over all observations + (symmetric absolute percentage error = 2 * |target - prediction| / (target + prediction)) + num_obs: Number of predictions or observations + + Example: + >>> target = B.tensor([1, 10, 1e6]) + >>> preds = B.tensor([0.9, 15, 1.2e6]) + >>> sum_abs_per_error, num_obs = _symmetric_mean_absolute_percentage_error_update(preds, target) + >>> _symmetric_mean_absolute_percentage_error_compute(sum_abs_per_error, num_obs) + tensor(0.2290) + """ + + return sum_abs_per_error / num_obs + + +def symmetric_mean_absolute_percentage_error(preds: Tensor, target: Tensor) -> Tensor: + r""" + Computes symmetric mean absolute percentage error (SMAPE_): + + .. math:: \text{SMAPE} = \frac{2}{n}\sum_1^n\frac{max(| y_i - \hat{y_i} |}{| y_i | + | \hat{y_i} |, \epsilon)} + + Where :math:`y` is a tensor of target values, and :math:`\hat{y}` is a tensor of predictions. + + Args: + preds: estimated labels + target: ground truth labels + + Return: + Tensor with SMAPE. + + Example: + >>> from paddlemetrics.functional import symmetric_mean_absolute_percentage_error + >>> target = B.tensor([1, 10, 1e6]) + >>> preds = B.tensor([0.9, 15, 1.2e6]) + >>> symmetric_mean_absolute_percentage_error(preds, target) + tensor(0.2290) + + """ + sum_abs_per_error, num_obs = _symmetric_mean_absolute_percentage_error_update( + preds, + target, + ) + mean_ape = _symmetric_mean_absolute_percentage_error_compute( + sum_abs_per_error, + num_obs, + ) + + return mean_ape diff --git a/RE/paddlemetric/src/paddlemetrics/functional/regression/tweedie_deviance.py b/RE/paddlemetric/src/paddlemetrics/functional/regression/tweedie_deviance.py new file mode 100644 index 00000000..7cb366a2 --- /dev/null +++ b/RE/paddlemetric/src/paddlemetrics/functional/regression/tweedie_deviance.py @@ -0,0 +1,139 @@ +# Copyright The PyTorch Lightning team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from typing import Tuple + +import paddleext.torchapi as B +from paddleext.torchapi import Tensor + +from paddlemetrics.utilities.checks import _check_same_shape + + +def _tweedie_deviance_score_update(preds: Tensor, targets: Tensor, power: float = 0.0) -> Tuple[Tensor, Tensor]: + """Updates and returns variables required to compute Deviance Score for the given power. Checks for same shape + of input tensors. + + Args: + preds: Predicted tensor + targets: Ground truth tensor + power: see :func:`tweedie_deviance_score` + + Example: + >>> targets = B.tensor([1.0, 2.0, 3.0, 4.0]) + >>> preds = B.tensor([4.0, 3.0, 2.0, 1.0]) + >>> _tweedie_deviance_score_update(preds, targets, power=2) + (tensor(4.8333), tensor(4)) + """ + _check_same_shape(preds, targets) + + zero_tensor = B.zeros(preds.shape, device=preds.device) + + if 0 < power < 1: + raise ValueError(f"Deviance Score is not defined for power={power}.") + + if power == 0: + deviance_score = B.pow(targets - preds, exponent=2) + elif power == 1: + # Poisson distribution + if B.any(preds <= 0) or B.any(targets < 0): + raise ValueError( + f"For power={power}, 'preds' has to be strictly positive and 'targets' cannot be negative." + ) + + deviance_score = 2 * (targets * B.log(targets / preds) + preds - targets) + elif power == 2: + # Gamma distribution + if B.any(preds <= 0) or B.any(targets <= 0): + raise ValueError(f"For power={power}, both 'preds' and 'targets' have to be strictly positive.") + + deviance_score = 2 * (B.log(preds / targets) + (targets / preds) - 1) + else: + if power < 0: + if B.any(preds <= 0): + raise ValueError(f"For power={power}, 'preds' has to be strictly positive.") + elif 1 < power < 2: + if B.any(preds <= 0) or B.any(targets < 0): + raise ValueError( + f"For power={power}, 'targets' has to be strictly positive and 'preds' cannot be negative." + ) + else: + if B.any(preds <= 0) or B.any(targets <= 0): + raise ValueError(f"For power={power}, both 'preds' and 'targets' have to be strictly positive.") + + term_1 = B.pow(B.max(targets, zero_tensor), 2 - power) / ((1 - power) * (2 - power)) + term_2 = targets * B.pow(preds, 1 - power) / (1 - power) + term_3 = B.pow(preds, 2 - power) / (2 - power) + deviance_score = 2 * (term_1 - term_2 + term_3) + + sum_deviance_score = B.sum(deviance_score) + num_observations = B.tensor(B.numel(deviance_score), device=preds.device) + + return sum_deviance_score, num_observations + + +def _tweedie_deviance_score_compute(sum_deviance_score: Tensor, num_observations: Tensor) -> Tensor: + """Computes Deviance Score. + + Args: + sum_deviance_score: Sum of deviance scores accumalated until now. + num_observations: Number of observations encountered until now. + + Example: + >>> targets = B.tensor([1.0, 2.0, 3.0, 4.0]) + >>> preds = B.tensor([4.0, 3.0, 2.0, 1.0]) + >>> sum_deviance_score, num_observations = _tweedie_deviance_score_update(preds, targets, power=2) + >>> _tweedie_deviance_score_compute(sum_deviance_score, num_observations) + tensor(1.2083) + """ + + return sum_deviance_score / num_observations + + +def tweedie_deviance_score(preds: Tensor, targets: Tensor, power: float = 0.0) -> Tensor: + r""" + Computes the `Tweedie Deviance Score`_ between targets and predictions: + + .. math:: + deviance\_score(\hat{y},y) = + \begin{cases} + (\hat{y} - y)^2, & \text{for }power=0\\ + 2 * (y * log(\frac{y}{\hat{y}}) + \hat{y} - y), & \text{for }power=1\\ + 2 * (log(\frac{\hat{y}}{y}) + \frac{y}{\hat{y}} - 1), & \text{for }power=2\\ + 2 * (\frac{(max(y,0))^{2}}{(1 - power)(2 - power)} - \frac{y(\hat{y})^{1 - power}}{1 - power} + \frac{(\hat{y}) + ^{2 - power}}{2 - power}), & \text{otherwise} + \end{cases} + + where :math:`y` is a tensor of targets values, and :math:`\hat{y}` is a tensor of predictions. + + Args: + preds: Predicted tensor with shape ``(N,...)`` + targets: Ground truth tensor with shape ``(N,...)`` + power: + - power < 0 : Extreme stable distribution. (Requires: preds > 0.) + - power = 0 : Normal distribution. (Requires: targets and preds can be any real numbers.) + - power = 1 : Poisson distribution. (Requires: targets >= 0 and y_pred > 0.) + - 1 < p < 2 : Compound Poisson distribution. (Requires: targets >= 0 and preds > 0.) + - power = 2 : Gamma distribution. (Requires: targets > 0 and preds > 0.) + - power = 3 : Inverse Gaussian distribution. (Requires: targets > 0 and preds > 0.) + - otherwise : Positive stable distribution. (Requires: targets > 0 and preds > 0.) + + Example: + >>> from paddlemetrics.functional import tweedie_deviance_score + >>> targets = B.tensor([1.0, 2.0, 3.0, 4.0]) + >>> preds = B.tensor([4.0, 3.0, 2.0, 1.0]) + >>> tweedie_deviance_score(preds, targets, power=2) + tensor(1.2083) + + """ + sum_deviance_score, num_observations = _tweedie_deviance_score_update(preds, targets, power=power) + return _tweedie_deviance_score_compute(sum_deviance_score, num_observations) diff --git a/RE/paddlemetric/src/paddlemetrics/functional/retrieval/__init__.py b/RE/paddlemetric/src/paddlemetrics/functional/retrieval/__init__.py new file mode 100644 index 00000000..d05abb6a --- /dev/null +++ b/RE/paddlemetric/src/paddlemetrics/functional/retrieval/__init__.py @@ -0,0 +1,22 @@ +# Copyright The PyTorch Lightning team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from paddlemetrics.functional.retrieval.average_precision import retrieval_average_precision # noqa: F401 +from paddlemetrics.functional.retrieval.fall_out import retrieval_fall_out # noqa: F401 +from paddlemetrics.functional.retrieval.hit_rate import retrieval_hit_rate # noqa: F401 +from paddlemetrics.functional.retrieval.ndcg import retrieval_normalized_dcg # noqa: F401 +from paddlemetrics.functional.retrieval.precision import retrieval_precision # noqa: F401 +from paddlemetrics.functional.retrieval.r_precision import retrieval_r_precision # noqa: F401 +from paddlemetrics.functional.retrieval.recall import retrieval_recall # noqa: F401 +from paddlemetrics.functional.retrieval.reciprocal_rank import retrieval_reciprocal_rank # noqa: F401 diff --git a/RE/paddlemetric/src/paddlemetrics/functional/retrieval/average_precision.py b/RE/paddlemetric/src/paddlemetrics/functional/retrieval/average_precision.py new file mode 100644 index 00000000..0b067a89 --- /dev/null +++ b/RE/paddlemetric/src/paddlemetrics/functional/retrieval/average_precision.py @@ -0,0 +1,49 @@ +# Copyright The PyTorch Lightning team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import paddleext.torchapi as B +from paddleext.torchapi import Tensor, tensor + +from paddlemetrics.utilities.checks import _check_retrieval_functional_inputs + + +def retrieval_average_precision(preds: Tensor, target: Tensor) -> Tensor: + """Computes average precision (for information retrieval), as explained in `IR Average precision`_. + + ``preds`` and ``target`` should be of the same shape and live on the same device. If no ``target`` is ``True``, + ``0`` is returned. ``target`` must be either `bool` or `integers` and ``preds`` must be `float`, + otherwise an error is raised. + + Args: + preds: estimated probabilities of each document to be relevant. + target: ground truth about each document being relevant or not. + + Return: + a single-value tensor with the average precision (AP) of the predictions ``preds`` w.r.t. the labels ``target``. + + Example: + >>> from paddlemetrics.functional import retrieval_average_precision + >>> preds = tensor([0.2, 0.3, 0.5]) + >>> target = tensor([True, False, True]) + >>> retrieval_average_precision(preds, target) + tensor(0.8333) + """ + preds, target = _check_retrieval_functional_inputs(preds, target) + + if not target.sum(): + return tensor(0.0, device=preds.device) + + target = target[B.argsort(preds, dim=-1, descending=True)] + positions = B.arange(1, len(target) + 1, device=target.device, dtype=B.float32)[target > 0] + res = B.div((B.arange(len(positions), device=positions.device, dtype=B.float32) + 1), positions).mean() + return res diff --git a/RE/paddlemetric/src/paddlemetrics/functional/retrieval/fall_out.py b/RE/paddlemetric/src/paddlemetrics/functional/retrieval/fall_out.py new file mode 100644 index 00000000..10c5762b --- /dev/null +++ b/RE/paddlemetric/src/paddlemetrics/functional/retrieval/fall_out.py @@ -0,0 +1,62 @@ +# Copyright The PyTorch Lightning team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from typing import Optional + +import paddleext.torchapi as B +from paddleext.torchapi import Tensor, tensor + +from paddlemetrics.utilities.checks import _check_retrieval_functional_inputs + + +def retrieval_fall_out(preds: Tensor, target: Tensor, k: Optional[int] = None) -> Tensor: + """Computes the Fall-out (for information retrieval), as explained in `IR Fall-out`_ Fall-out is the fraction + of non-relevant documents retrieved among all the non-relevant documents. + + ``preds`` and ``target`` should be of the same shape and live on the same device. If no ``target`` is ``True``, + ``0`` is returned. ``target`` must be either `bool` or `integers` and ``preds`` must be `float`, + otherwise an error is raised. If you want to measure Fall-out@K, ``k`` must be a positive integer. + + Args: + preds: estimated probabilities of each document to be relevant. + target: ground truth about each document being relevant or not. + k: consider only the top k elements (default: None, which considers them all) + + Returns: + a single-value tensor with the fall-out (at ``k``) of the predictions ``preds`` w.r.t. the labels ``target``. + + Raises: + ValueError: + If ``k`` parameter is not `None` or an integer larger than 0 + + Example: + >>> from paddlemetrics.functional import retrieval_fall_out + >>> preds = tensor([0.2, 0.3, 0.5]) + >>> target = tensor([True, False, True]) + >>> retrieval_fall_out(preds, target, k=2) + tensor(1.) + """ + preds, target = _check_retrieval_functional_inputs(preds, target) + + k = preds.shape[-1] if k is None else k + + if not (isinstance(k, int) and k > 0): + raise ValueError("`k` has to be a positive integer or None") + + target = 1 - target + + if not target.sum(): + return tensor(0.0, device=preds.device) + + relevant = target[B.argsort(preds, dim=-1, descending=True)][:k].sum().float() + return relevant / target.sum() diff --git a/RE/paddlemetric/src/paddlemetrics/functional/retrieval/hit_rate.py b/RE/paddlemetric/src/paddlemetrics/functional/retrieval/hit_rate.py new file mode 100644 index 00000000..83336a50 --- /dev/null +++ b/RE/paddlemetric/src/paddlemetrics/functional/retrieval/hit_rate.py @@ -0,0 +1,57 @@ +# Copyright The PyTorch Lightning team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from typing import Optional + +import paddleext.torchapi as B +from paddleext.torchapi import Tensor, tensor + +from paddlemetrics.utilities.checks import _check_retrieval_functional_inputs + + +def retrieval_hit_rate(preds: Tensor, target: Tensor, k: Optional[int] = None) -> Tensor: + """Computes the hit rate (for information retrieval). The hit rate is 1.0 if there is at least one relevant + document among all the top `k` retrieved documents. + + ``preds`` and ``target`` should be of the same shape and live on the same device. If no ``target`` is ``True``, + ``0`` is returned. ``target`` must be either `bool` or `integers` and ``preds`` must be `float`, + otherwise an error is raised. If you want to measure HitRate@K, ``k`` must be a positive integer. + + Args: + preds: estimated probabilities of each document to be relevant. + target: ground truth about each document being relevant or not. + k: consider only the top k elements (default: None, which considers them all) + + Returns: + a single-value tensor with the hit rate (at ``k``) of the predictions ``preds`` w.r.t. the labels ``target``. + + Raises: + ValueError: + If ``k`` parameter is not `None` or an integer larger than 0 + + Example: + >>> preds = tensor([0.2, 0.3, 0.5]) + >>> target = tensor([True, False, True]) + >>> retrieval_hit_rate(preds, target, k=2) + tensor(1.) + """ + preds, target = _check_retrieval_functional_inputs(preds, target) + + if k is None: + k = preds.shape[-1] + + if not (isinstance(k, int) and k > 0): + raise ValueError("`k` has to be a positive integer or None") + + relevant = target[B.argsort(preds, dim=-1, descending=True)][:k].sum() + return (relevant > 0).float() diff --git a/RE/paddlemetric/src/paddlemetrics/functional/retrieval/ndcg.py b/RE/paddlemetric/src/paddlemetrics/functional/retrieval/ndcg.py new file mode 100644 index 00000000..73fedad5 --- /dev/null +++ b/RE/paddlemetric/src/paddlemetrics/functional/retrieval/ndcg.py @@ -0,0 +1,72 @@ +# Copyright The PyTorch Lightning team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from typing import Optional + +import paddleext.torchapi as B +from paddleext.torchapi import Tensor + +from paddlemetrics.utilities.checks import _check_retrieval_functional_inputs + + +def _dcg(target: Tensor) -> Tensor: + """Computes Discounted Cumulative Gain for input tensor.""" + denom = B.log2(B.arange(target.shape[-1], device=target.device) + 2.0) + return (target / denom).sum(dim=-1) + + +def retrieval_normalized_dcg(preds: Tensor, target: Tensor, k: Optional[int] = None) -> Tensor: + """Computes `Normalized Discounted Cumulative Gain`_ (for information retrieval). + + ``preds`` and ``target`` should be of the same shape and live on the same device. + ``target`` must be either `bool` or `integers` and ``preds`` must be `float`, + otherwise an error is raised. + + Args: + preds: estimated probabilities of each document to be relevant. + target: ground truth about each document relevance. + k: consider only the top k elements (default: None, which considers them all) + + Return: + a single-value tensor with the nDCG of the predictions ``preds`` w.r.t. the labels ``target``. + + Raises: + ValueError: + If ``k`` parameter is not `None` or an integer larger than 0 + + Example: + >>> from paddlemetrics.functional import retrieval_normalized_dcg + >>> preds = B.tensor([.1, .2, .3, 4, 70]) + >>> target = B.tensor([10, 0, 0, 1, 5]) + >>> retrieval_normalized_dcg(preds, target) + tensor(0.6957) + """ + preds, target = _check_retrieval_functional_inputs(preds, target, allow_non_binary_target=True) + + k = preds.shape[-1] if k is None else k + + if not (isinstance(k, int) and k > 0): + raise ValueError("`k` has to be a positive integer or None") + + sorted_target = target[B.argsort(preds, dim=-1, descending=True)][:k] + ideal_target = B.sort(target, descending=True)[0][:k] + + ideal_dcg = _dcg(ideal_target) + target_dcg = _dcg(sorted_target) + + # filter undefined scores + all_irrelevant = ideal_dcg == 0 + target_dcg[all_irrelevant] = 0 + target_dcg[~all_irrelevant] /= ideal_dcg[~all_irrelevant] + + return target_dcg.mean() diff --git a/RE/paddlemetric/src/paddlemetrics/functional/retrieval/precision.py b/RE/paddlemetric/src/paddlemetrics/functional/retrieval/precision.py new file mode 100644 index 00000000..83bd1172 --- /dev/null +++ b/RE/paddlemetric/src/paddlemetrics/functional/retrieval/precision.py @@ -0,0 +1,60 @@ +# Copyright The PyTorch Lightning team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from typing import Optional + +import paddleext.torchapi as B +from paddleext.torchapi import Tensor, tensor + +from paddlemetrics.utilities.checks import _check_retrieval_functional_inputs + + +def retrieval_precision(preds: Tensor, target: Tensor, k: Optional[int] = None) -> Tensor: + """Computes the precision metric (for information retrieval). Precision is the fraction of relevant documents + among all the retrieved documents. + + ``preds`` and ``target`` should be of the same shape and live on the same device. If no ``target`` is ``True``, + ``0`` is returned. ``target`` must be either `bool` or `integers` and ``preds`` must be `float`, + otherwise an error is raised. If you want to measure Precision@K, ``k`` must be a positive integer. + + Args: + preds: estimated probabilities of each document to be relevant. + target: ground truth about each document being relevant or not. + k: consider only the top k elements (default: None, which considers them all) + + Returns: + a single-value tensor with the precision (at ``k``) of the predictions ``preds`` w.r.t. the labels ``target``. + + Raises: + ValueError: + If ``k`` parameter is not `None` or an integer larger than 0 + + Example: + >>> preds = tensor([0.2, 0.3, 0.5]) + >>> target = tensor([True, False, True]) + >>> retrieval_precision(preds, target, k=2) + tensor(0.5000) + """ + preds, target = _check_retrieval_functional_inputs(preds, target) + + if k is None: + k = preds.shape[-1] + + if not (isinstance(k, int) and k > 0): + raise ValueError("`k` has to be a positive integer or None") + + if not target.sum(): + return tensor(0.0, device=preds.device) + + relevant = target[B.argsort(preds, dim=-1, descending=True)][:k].sum().float() + return relevant / k diff --git a/RE/paddlemetric/src/paddlemetrics/functional/retrieval/r_precision.py b/RE/paddlemetric/src/paddlemetrics/functional/retrieval/r_precision.py new file mode 100644 index 00000000..d26e32f8 --- /dev/null +++ b/RE/paddlemetric/src/paddlemetrics/functional/retrieval/r_precision.py @@ -0,0 +1,49 @@ +# Copyright The PyTorch Lightning team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import paddleext.torchapi as B +from paddleext.torchapi import Tensor, tensor + +from paddlemetrics.utilities.checks import _check_retrieval_functional_inputs + + +def retrieval_r_precision(preds: Tensor, target: Tensor) -> Tensor: + """Computes the r-precision metric (for information retrieval). R-Precision is the fraction of relevant + documents among all the top ``k`` retrieved documents where ``k`` is equal to the total number of relevant + documents. + + ``preds`` and ``target`` should be of the same shape and live on the same device. If no ``target`` is ``True``, + ``0`` is returned. ``target`` must be either `bool` or `integers` and ``preds`` must be `float`, + otherwise an error is raised. If you want to measure Precision@K, ``k`` must be a positive integer. + + Args: + preds: estimated probabilities of each document to be relevant. + target: ground truth about each document being relevant or not. + + Returns: + a single-value tensor with the r-precision of the predictions ``preds`` w.r.t. the labels ``target``. + + Example: + >>> preds = tensor([0.2, 0.3, 0.5]) + >>> target = tensor([True, False, True]) + >>> retrieval_r_precision(preds, target) + tensor(0.5000) + """ + preds, target = _check_retrieval_functional_inputs(preds, target) + + relevant_number = target.sum() + if not relevant_number: + return tensor(0.0, device=preds.device) + + relevant = target[B.argsort(preds, dim=-1, descending=True)][:relevant_number].sum().float() + return relevant / relevant_number diff --git a/RE/paddlemetric/src/paddlemetrics/functional/retrieval/recall.py b/RE/paddlemetric/src/paddlemetrics/functional/retrieval/recall.py new file mode 100644 index 00000000..e00d450c --- /dev/null +++ b/RE/paddlemetric/src/paddlemetrics/functional/retrieval/recall.py @@ -0,0 +1,61 @@ +# Copyright The PyTorch Lightning team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from typing import Optional + +import paddleext.torchapi as B +from paddleext.torchapi import Tensor, tensor + +from paddlemetrics.utilities.checks import _check_retrieval_functional_inputs + + +def retrieval_recall(preds: Tensor, target: Tensor, k: Optional[int] = None) -> Tensor: + """Computes the recall metric (for information retrieval). Recall is the fraction of relevant documents + retrieved among all the relevant documents. + + ``preds`` and ``target`` should be of the same shape and live on the same device. If no ``target`` is ``True``, + ``0`` is returned. ``target`` must be either `bool` or `integers` and ``preds`` must be `float`, + otherwise an error is raised. If you want to measure Recall@K, ``k`` must be a positive integer. + + Args: + preds: estimated probabilities of each document to be relevant. + target: ground truth about each document being relevant or not. + k: consider only the top k elements (default: None, which considers them all) + + Returns: + a single-value tensor with the recall (at ``k``) of the predictions ``preds`` w.r.t. the labels ``target``. + + Raises: + ValueError: + If ``k`` parameter is not `None` or an integer larger than 0 + + Example: + >>> from paddlemetrics.functional import retrieval_recall + >>> preds = tensor([0.2, 0.3, 0.5]) + >>> target = tensor([True, False, True]) + >>> retrieval_recall(preds, target, k=2) + tensor(0.5000) + """ + preds, target = _check_retrieval_functional_inputs(preds, target) + + if k is None: + k = preds.shape[-1] + + if not (isinstance(k, int) and k > 0): + raise ValueError("`k` has to be a positive integer or None") + + if not target.sum(): + return tensor(0.0, device=preds.device) + + relevant = target[B.argsort(preds, dim=-1, descending=True)][:k].sum().float() + return relevant / target.sum() diff --git a/RE/paddlemetric/src/paddlemetrics/functional/retrieval/reciprocal_rank.py b/RE/paddlemetric/src/paddlemetrics/functional/retrieval/reciprocal_rank.py new file mode 100644 index 00000000..c92c223e --- /dev/null +++ b/RE/paddlemetric/src/paddlemetrics/functional/retrieval/reciprocal_rank.py @@ -0,0 +1,49 @@ +# Copyright The PyTorch Lightning team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import paddleext.torchapi as B +from paddleext.torchapi import Tensor, tensor + +from paddlemetrics.utilities.checks import _check_retrieval_functional_inputs + + +def retrieval_reciprocal_rank(preds: Tensor, target: Tensor) -> Tensor: + """Computes reciprocal rank (for information retrieval). See `Mean Reciprocal Rank`_ + + ``preds`` and ``target`` should be of the same shape and live on the same device. If no ``target`` is ``True``, + 0 is returned. ``target`` must be either `bool` or `integers` and ``preds`` must be `float`, + otherwise an error is raised. + + Args: + preds: estimated probabilities of each document to be relevant. + target: ground truth about each document being relevant or not. + + Return: + a single-value tensor with the reciprocal rank (RR) of the predictions ``preds`` wrt the labels ``target``. + + Example: + >>> from paddlemetrics.functional import retrieval_reciprocal_rank + >>> preds = B.tensor([0.2, 0.3, 0.5]) + >>> target = B.tensor([False, True, False]) + >>> retrieval_reciprocal_rank(preds, target) + tensor(0.5000) + """ + preds, target = _check_retrieval_functional_inputs(preds, target) + + if not target.sum(): + return tensor(0.0, device=preds.device) + + target = target[B.argsort(preds, dim=-1, descending=True)] + position = B.nonzero(target).view(-1) + res = 1.0 / (position[0] + 1.0) + return res diff --git a/RE/paddlemetric/src/paddlemetrics/functional/self_supervised.py b/RE/paddlemetric/src/paddlemetrics/functional/self_supervised.py new file mode 100644 index 00000000..9af407aa --- /dev/null +++ b/RE/paddlemetric/src/paddlemetrics/functional/self_supervised.py @@ -0,0 +1,57 @@ +# Copyright The PyTorch Lightning team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from warnings import warn + +import paddleext.torchapi as B +from paddleext.torchapi import Tensor + +from paddlemetrics.functional.pairwise import pairwise_cosine_similarity, pairwise_linear_similarity + + +def embedding_similarity( + batch: Tensor, similarity: str = "cosine", reduction: str = "none", zero_diagonal: bool = True +) -> Tensor: + """Computes representation similarity. + + Example: + >>> from paddlemetrics.functional import embedding_similarity + >>> embeddings = B.tensor([[1., 2., 3., 4.], [1., 2., 3., 4.], [4., 5., 6., 7.]]) + >>> embedding_similarity(embeddings) + tensor([[0.0000, 1.0000, 0.9759], + [1.0000, 0.0000, 0.9759], + [0.9759, 0.9759, 0.0000]]) + + Args: + batch: (batch, dim) + similarity: 'dot' or 'cosine' + reduction: 'none', 'sum', 'mean' (all along dim -1) + zero_diagonal: if True, the diagonals are set to zero + + Return: + A square matrix (batch, batch) with the similarity scores between all elements + If sum or mean are used, then returns (b, 1) with the reduced value for each row + + .. deprecated:: v0.6 + Use :func:`paddlemetrics.functional.pairwise_cosine_similarity` when `similarity='cosine'` + else use :func:`paddlemetrics.functional.pairwise_euclidean_distance`. Will be removed in v0.7. + """ + warn( + "Function `embedding_similarity` was deprecated v0.6 and will be removed in v0.7." + " Use `paddlemetrics.functional.pairwise_cosine_similarity` instead when argument" + " similarity='cosine' else use `paddlemetrics.functional.pairwise_linear_similarity", + DeprecationWarning, + ) + if similarity == "cosine": + return pairwise_cosine_similarity(batch, reduction=reduction, zero_diagonal=zero_diagonal) + return pairwise_linear_similarity(batch, reduction=reduction, zero_diagonal=zero_diagonal) diff --git a/RE/paddlemetric/src/paddlemetrics/functional/text/__init__.py b/RE/paddlemetric/src/paddlemetrics/functional/text/__init__.py new file mode 100644 index 00000000..97170840 --- /dev/null +++ b/RE/paddlemetric/src/paddlemetrics/functional/text/__init__.py @@ -0,0 +1,17 @@ +# Copyright The PyTorch Lightning team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from paddlemetrics.functional.text.bleu import bleu_score # noqa: F401 +from paddlemetrics.functional.text.sacre_bleu import sacre_bleu_score # noqa: F401 +from paddlemetrics.functional.text.wer import wer # noqa: F401 diff --git a/RE/paddlemetric/src/paddlemetrics/functional/text/bert.py b/RE/paddlemetric/src/paddlemetrics/functional/text/bert.py new file mode 100644 index 00000000..168be6ee --- /dev/null +++ b/RE/paddlemetric/src/paddlemetrics/functional/text/bert.py @@ -0,0 +1,650 @@ +# Copyright The PyTorch Lightning team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import csv +import math +import urllib +import warnings +from collections import Counter, defaultdict +from typing import Any, Callable, Dict, List, Optional, Set, Tuple, Union + +import paddleext.torchapi as B +from paddleext.torchapi import Tensor, nn +from paddleext.torchapi.data import DataLoader, Dataset + +from paddlemetrics.utilities.imports import _TQDM_AVAILABLE, _TRANSFORMERS_AVAILABLE + +if _TRANSFORMERS_AVAILABLE: + from transformers import AutoModel, AutoTokenizer + +if _TQDM_AVAILABLE: + import tqdm + + +def _preprocess_text( + text: List[str], + tokenizer: Any, + max_length: int = 512, + truncation: bool = True, + sort_according_length: bool = True, + own_tokenizer: bool = False, +) -> Dict[str, Tensor]: + """Default text pre-processing function using `transformers` `AutoTokenizer` instance. + + Args: + text: + An iterable of sentences. + tokenizer: + Either `AutoTokenizer` instance from `transformers` package, or a user's own tokenizer. + max_length: + A maximum sequence length. + truncation: + An indication of whether tokenized sequences should be padded only to the length of the longest sequence. + sort_according_length: + An indication of whether tokenized sequences should be sorted from shortest to longest. This is appropriate + to do for leveraging dynamic padding during embedding calculation and thereby to hasten inference. + own_tokenizer: + An indication of whether a non-default user's own tokenizer is used. + + Return: + A dictionary of tokenized sentences including input_ids and attention_mask. + + Raises: + BaseException: + If a tokenization with a user's own tokenizer is not successful. + """ + if not own_tokenizer: + tokenized_data = tokenizer( + text, padding="max_length", max_length=max_length, truncation=truncation, return_tensors="pt" + ) + else: + try: + tokenized_data = tokenizer(text, max_length) + except BaseException as e: + raise BaseException(f"Tokenization was not successful: {e}") + + input_ids, attention_mask = ( + _sort_data_according_length(tokenized_data["input_ids"], tokenized_data["attention_mask"]) + if sort_according_length + else (tokenized_data["input_ids"], tokenized_data["attention_mask"]) + ) + return {"input_ids": input_ids, "attention_mask": attention_mask} + + +def _process_attention_mask_for_special_tokens(attention_mask: Tensor) -> Tensor: + """Process attention mask to be zero for special [CLS] and [SEP] tokens as they're not included in a + calculation for BERT score. + + Args: + attention_mask: An attention mask to be returned, for example, by a `transformers` tokenizer. + + Return: + A processed attention mask. + """ + # Make attention_mask zero for [CLS] token + attention_mask[:, 0] = 0 + # Make attention_mask zero for [SEP] token + sep_token_position = (attention_mask - 0.1).cumsum(-1).argmax(-1) + attention_mask[B.arange(attention_mask.size(0)).long(), sep_token_position] = 0 + return attention_mask + + +def _sort_data_according_length(input_ids: Tensor, attention_mask: Tensor) -> Tuple[Tensor, Tensor]: + """Sort tokenized sentence from the shortest to the longest one.""" + sorted_indices = attention_mask.sum(1).argsort() + input_ids = input_ids[sorted_indices] + attention_mask = attention_mask[sorted_indices] + return input_ids, attention_mask + + +def _input_data_collator( + batch: Dict[str, Tensor], device: Optional[Union[str, B.device]] = None +) -> Dict[str, Tensor]: + """Helper function that trims model inputs to the longest sequence within the batch and put the input on the + proper device.""" + max_len = int(batch["attention_mask"].sum(1).max().item()) + input_ids = batch["input_ids"][:, :max_len].to(device) + attention_mask = batch["attention_mask"][:, :max_len].to(device) + batch.update({"input_ids": input_ids, "attention_mask": attention_mask}) + return batch + + +def _output_data_collator(model_output: Tensor, attention_mask: Tensor, target_len: int) -> Tuple[Tensor, Tensor]: + """Helper function that pads the model output and attention mask to the target length.""" + zeros_shape = list(model_output.shape) + zeros_shape[2] = target_len - zeros_shape[2] + model_output = B.cat( + [model_output, B.zeros(zeros_shape, dtype=model_output.dtype).to(model_output.device)], dim=2 + ) + zeros = B.zeros(zeros_shape[0], zeros_shape[2], dtype=attention_mask.dtype).to(attention_mask.device) + attention_mask = B.cat([attention_mask, zeros], dim=1) + return model_output, attention_mask + + +class TextDataset(Dataset): + """PyTorch dataset class for storing tokenized sentences and other properties used for BERT score + calculation.""" + + def __init__( + self, + text: List[str], + tokenizer: Any, + max_length: int = 512, + preprocess_text_fn: Callable[[List[str], Any, int], Dict[str, Tensor]] = _preprocess_text, + idf: bool = False, + tokens_idf: Optional[Dict[int, float]] = None, + ) -> None: + """ + Args: + text: + An iterable of sentences. + tokenizer: + `AutoTokenizer` instance from `transformers` package. + max_length: + A maximum sequence length. + preprocess_text_fn: + A function used for processing the input sentences. + idf: + An indication of whether calculate token inverse document frequencies to weight the model embeddings. + tokens_idf: + Inverse document frequencies (these should be calculated on reference sentences). + """ + self.text = preprocess_text_fn(text, tokenizer, max_length) + self.max_length = self.text["input_ids"].shape[1] + self.num_sentences = len(text) + self.idf = idf + self.tokens_idf = {} + if idf: + self.tokens_idf = tokens_idf if tokens_idf is not None else self._get_tokens_idf() + + def __getitem__(self, idx: int) -> Dict[str, Tensor]: + input_ids = self.text["input_ids"][idx, :] + attention_mask = self.text["attention_mask"][idx, :] + inputs_dict = {"input_ids": input_ids, "attention_mask": attention_mask} + if self.idf: + input_ids_idf = B.tensor([self.tokens_idf[input_idx] for input_idx in input_ids.tolist()]) + inputs_dict["input_ids_idf"] = input_ids_idf + return inputs_dict + + def __len__(self) -> int: + return self.num_sentences + + def _get_tokens_idf(self) -> Dict[int, float]: + """Calculate token inverse document frequences. + + Return: + A python dictionary containing inverse document frequences for token ids. + """ + token_counter: Counter = Counter() + for tokens in map(self._set_of_tokens, self.text["input_ids"]): + token_counter.update(tokens) + + tokens_idf: Dict[int, float] = defaultdict(self._get_tokens_idf_default_value) + tokens_idf.update( + {idx: math.log((self.num_sentences + 1) / (occurrence + 1)) for idx, occurrence in token_counter.items()} + ) + return tokens_idf + + def _get_tokens_idf_default_value(self) -> float: + """Helper function that ensures `defaultdict` to be pickled.""" + return math.log((self.num_sentences + 1) / 1) + + @staticmethod + def _set_of_tokens(input_ids: Tensor) -> Set: + """Return set of tokens from the `input_ids` `B.Tensor`.""" + return set(input_ids.tolist()) + + +class TokenizedDataset(TextDataset): + """The child class of `TextDataset` class used with already tokenized data.""" + + def __init__( + self, + input_ids: Tensor, + attention_mask: Tensor, + idf: bool = False, + tokens_idf: Optional[Dict[int, float]] = None, + ) -> None: + """ + Args: + input_ids: + Input ids (`B.Tensor`). + attention_mask: + Attention mask (`B.Tensor`). + idf: + An indication of whether calculate token inverse document frequencies to weight the model embeddings. + tokens_idf: + Inverse document frequencies (these should be calculated on reference sentences). + """ + self.text = dict(zip(["input_ids", "attention_mask"], _sort_data_according_length(input_ids, attention_mask))) + self.text = _input_data_collator(self.text) + self.num_sentences = len(self.text["input_ids"]) + self.max_length = self.text["input_ids"].shape[1] + self.idf = idf + self.tokens_idf = {} + if idf: + self.tokens_idf = tokens_idf if tokens_idf is not None else self._get_tokens_idf() + + +def _get_progress_bar(dataloader: DataLoader, verbose: bool = False) -> Union[DataLoader, "tqdm.auto.tqdm"]: + """Helper function returning either the dataloader itself when `verbose = False`, or it wraps the dataloader with + `tqdm.auto.tqdm`, when `verbose = True` to display a progress bar during the embbeddings calculation.""" + return tqdm.auto.tqdm(dataloader) if verbose else dataloader + + +def _check_shape_of_model_output(output: Tensor, input_ids: Tensor) -> None: + """Check if the shape of the user's own model output.""" + bs, seq_len = input_ids.shape[:2] + invalid_out_shape = len(output.shape) != 3 or output.shape[0] != bs or output.shape[1] != seq_len + if invalid_out_shape: + raise ValueError( + "The model output must be `B.Tensor` of a shape `[batch_size, seq_len, model_dim]` " + f"i.e. [{bs}, {seq_len}. , `model_dim`], but got {output.shape}." + ) + + +def _get_embeddings_and_idf_scale( + dataloader: DataLoader, + target_len: int, + model: nn.Module, + device: Optional[Union[str, B.device]] = None, + num_layers: Optional[int] = None, + all_layers: bool = False, + idf: bool = False, + verbose: bool = False, + user_forward_fn: Callable[[nn.Module, Dict[str, Tensor]], Tensor] = None, +) -> Tuple[Tensor, Tensor]: + """Calculate sentence embeddings and the inverse-document-frequence scaling factor. + Args: + dataloader: + `B.utils.data.DataLoader` instance. + target_len: + A length of the longest sequence in the data. Used for padding the model output. + model: + BERT model. + device: + A device to be used for calculation. + num_layers: + The layer of representation to use. + all_layers: + An indication whether representation from all model layers should be used for BERTScore. + idf: + An Indication whether normalization using inverse document frequencies should be used. + verbose: + An indication of whether a progress bar to be displayed during the embeddings calculation. + user_forward_fn: + A user's own forward function used in a combination with `user_model`. This function must take `user_model` + and a python dictionary of containing `"input_ids"` and `"attention_mask"` represented by `B.Tensor` + as an input and return the model's output represented by the single `B.Tensor`. + + Return: + A tuple of B.Tensors containing the model's embeddings and the normalized tokens IDF. + When `idf = False`, tokens IDF is not calculated, and a matrix of mean weights is returned instead. + For a single sentence, `mean_weight = 1/seq_len`, where `seq_len` is a sum over the corresponding + `attention_mask`. + + Raises: + ValueError: + If `all_layers = True` and a model, which is not from the `transformers` package, is used. + """ + embeddings_list: List[Tensor] = [] + idf_scale_list: List[Tensor] = [] + for batch in _get_progress_bar(dataloader, verbose): + with B.no_grad(): + batch = _input_data_collator(batch, device) + # Output shape: batch_size x num_layers OR 1 x sequence_length x bert_dim + if not all_layers: + if not user_forward_fn: + out = model(batch["input_ids"], batch["attention_mask"], output_hidden_states=True) + out = out.hidden_states[num_layers if num_layers is not None else -1] + else: + out = user_forward_fn(model, batch) + _check_shape_of_model_output(out, batch["input_ids"]) + out = out.unsqueeze(1) + else: + if user_forward_fn: + raise ValueError( + "The option `all_layers=True` can be used only with default `transformers` models." + ) + out = model(batch["input_ids"], batch["attention_mask"], output_hidden_states=True) + out = B.cat([o.unsqueeze(1) for o in out.hidden_states], dim=1) + + out /= out.norm(dim=-1).unsqueeze(-1) # normalize embeddings + out, attention_mask = _output_data_collator(out, batch["attention_mask"], target_len) + processed_attention_mask = _process_attention_mask_for_special_tokens(attention_mask) + # Multiply embeddings with attention_mask (b=batch_size, l=num_layers, s=seq_len, d=emb_dim) + out = B.einsum("blsd, bs -> blsd", out, processed_attention_mask) + embeddings_list.append(out.cpu()) + + # Calculate weighted (w.r.t. sentence length) input_ids IDF matrix + input_ids_idf = ( + batch["input_ids_idf"] * processed_attention_mask if idf else processed_attention_mask.type(out.dtype) + ) + input_ids_idf /= input_ids_idf.sum(-1, keepdim=True) + idf_scale_list.append(input_ids_idf) + + embeddings = B.cat(embeddings_list) + idf_scale = B.cat(idf_scale_list) + + return embeddings, idf_scale + + +def _get_scaled_precision_or_recall(cos_sim: Tensor, metric: str, idf_scale: Tensor) -> Tensor: + """Helper function that calculates precision or recall, transpose it and scale it with idf_scale factor.""" + dim = 3 if metric == "precision" else 2 + res = cos_sim.max(dim=dim).values + res = B.einsum("bls, bs -> bls", res, idf_scale).sum(-1) + # We transpose the results and squeeze if possible to match the format of the original BERTScore implementation + res = res.transpose(0, 1).squeeze() + return res + + +def _get_precision_recall_f1( + pred_embeddings: Tensor, ref_embeddings: Tensor, pred_idf_scale: Tensor, ref_idf_scale: Tensor +) -> Tuple[Tensor, Tensor, Tensor]: + """Calculate precision, recall and F1 score over candidate and reference sentences. + + Args: + pred_embeddings: Embeddings of candidate sentenecs. + ref_embeddings: Embeddings of reference sentences. + pred_idf_scale: An IDF scale factor for candidate sentences. + ref_idf_scale: An IDF scale factor for reference sentences. + + Return: + Tensors containing precision, recall and F1 score, respectively. + """ + # Dimensions: b = batch_size, l = num_layers, p = predictions_seq_len, r = references_seq_len, d = bert_dim + cos_sim = B.einsum("blpd, blrd -> blpr", pred_embeddings, ref_embeddings) + # Final metrics shape = (batch_size * num_layers | batch_size) + precision = _get_scaled_precision_or_recall(cos_sim, "precision", pred_idf_scale) + recall = _get_scaled_precision_or_recall(cos_sim, "recall", ref_idf_scale) + + f1_score = 2 * precision * recall / (precision + recall) + f1_score = f1_score.masked_fill(B.isnan(f1_score), 0.0) + + return precision, recall, f1_score + + +def _get_hash(model_name_or_path: Optional[str] = None, num_layers: Optional[int] = None, idf: bool = False) -> str: + """Compute `BERT_score`_ (copied and adjusted)""" + msg = f"{model_name_or_path}_L{num_layers}{'_idf' if idf else '_no-idf'}" + return msg + + +def _read_csv_from_local_file(baseline_path: str) -> Tensor: + """Helper function which reads baseline the csv file from the local file. + + This method implemented to avoid `pandas` dependency. + """ + with open(baseline_path) as fname: + csv_file = csv.reader(fname) + baseline_list = [[float(item) for item in row] for idx, row in enumerate(csv_file) if idx > 0] + baseline = B.tensor(baseline_list)[:, 1:] + return baseline + + +def _read_csv_from_url(baseline_url: str) -> Tensor: + """Helper function which reads the baseline csv file from URL. + + This method is implemented to avoid `pandas` dependency. + """ + with urllib.request.urlopen(baseline_url) as http_request: # type: ignore + baseline_list = [ + [float(item) for item in row.strip().decode("utf-8").split(",")] + for idx, row in enumerate(http_request) + if idx > 0 + ] + baseline = B.tensor(baseline_list)[:, 1:] + return baseline + + +def _load_baseline( + lang: str = "en", + model_name_or_path: Optional[str] = None, + baseline_path: Optional[str] = None, + baseline_url: Optional[str] = None, +) -> Optional[Tensor]: + """Load a CSV file with the baseline values used for rescaling.""" + if baseline_path: + baseline: Optional[Tensor] = _read_csv_from_local_file(baseline_path) + elif baseline_url: + baseline = _read_csv_from_url(baseline_url) + # Read default baseline from the original `bert-score` package https://github.com/Tiiiger/bert_score + elif lang and model_name_or_path: + _URL_BASE = "https://raw.githubusercontent.com/Tiiiger/bert_score/master/bert_score/rescale_baseline" + baseline_url = f"{_URL_BASE}/{lang}/{model_name_or_path}.tsv" + baseline = _read_csv_from_url(baseline_url) + else: + baseline = None + warnings.warn("Baseline was not successfully loaded. No baseline is going to be used.") + + return baseline + + +def _rescale_metrics_with_baseline( + precision: Tensor, + recall: Tensor, + f1_score: Tensor, + baseline: Tensor, + num_layers: Optional[int] = None, + all_layers: bool = False, +) -> Tuple[Tensor, Tensor, Tensor]: + """Rescale the computed metrics with the pre-computed baseline.""" + if num_layers is None and all_layers is False: + num_layers = -1 + all_metrics = B.stack([precision, recall, f1_score], dim=-1) + baseline_scale = baseline.unsqueeze(1) if all_layers else baseline[num_layers] + all_metrics = (all_metrics - baseline_scale) / (1 - baseline_scale) + + return all_metrics[..., 0], all_metrics[..., 1], all_metrics[..., 2] + + +def bert_score( + predictions: Union[List[str], Dict[str, Tensor]], + references: Union[List[str], Dict[str, Tensor]], + model_name_or_path: Optional[str] = None, + num_layers: Optional[int] = None, + all_layers: bool = False, + model: Optional[nn.Module] = None, + user_tokenizer: Any = None, + user_forward_fn: Callable[[nn.Module, Dict[str, Tensor]], Tensor] = None, + verbose: bool = False, + idf: bool = False, + device: Optional[Union[str, B.device]] = None, + max_length: int = 512, + batch_size: int = 64, + num_threads: int = 4, + return_hash: bool = False, + lang: str = "en", + rescale_with_baseline: bool = False, + baseline_path: Optional[str] = None, + baseline_url: Optional[str] = None, +) -> Dict[str, Union[List[float], str]]: + """`Bert_score Evaluating Text Generation`_ leverages the pre-trained contextual embeddings from BERT and + matches words in candidate and reference sentences by cosine similarity. It has been shown to correlate with + human judgment on sentence-level and system-level evaluation. Moreover, BERTScore computes precision, recall, + and F1 measure, which can be useful for evaluating different language generation tasks. + + This implemenation follows the original implementation from `BERT_score`_ + + Args: + predictions: + Either an iterable of predicted sentences or a `Dict[str, B.Tensor]` containing `input_ids` and + `attention_mask` `B.Tensor`. + references: + Either an iterable of target sentences or a `Dict[str, B.Tensor]` containing `input_ids` and + `attention_mask` `B.Tensor`. + model_name_or_path: + A name or a model path used to load `transformers` pretrained model. + num_layers: + A layer of representation to use. + all_layers: + An indication of whether the representation from all model's layers should be used. + If `all_layers = True`, the argument `num_layers` is ignored. + model: + A user's own model. Must be of `nn.Module` instance. + user_tokenizer: + A user's own tokenizer used with the own model. This must be an instance with the `__call__` method. + This method must take an iterable of sentences (`List[str]`) and must return a python dictionary + containing `"input_ids"` and `"attention_mask"` represented by `B.Tensor`. It is up to the user's model + of whether `"input_ids"` is a `B.Tensor` of input ids or embedding vectors. + This tokenizer must prepend an equivalent of `[CLS]` token and append an equivalent of `[SEP]` token + as `transformers` tokenizer does. + user_forward_fn: + A user's own forward function used in a combination with `user_model`. This function must take `user_model` + and a python dictionary of containing `"input_ids"` and `"attention_mask"` represented by `B.Tensor` + as an input and return the model's output represented by the single `B.Tensor`. + verbose: + An indication of whether a progress bar to be displayed during the embeddings calculation. + idf: + An indication of whether normalization using inverse document frequencies should be used. + device: + A device to be used for calculation. + max_length: + A maximum length of input sequences. Sequences longer than `max_length` are to be trimmed. + batch_size: + A batch size used for model processing. + num_threads: + A number of threads to use for a dataloader. + return_hash: + An indication of whether the correspodning `hash_code` should be returned. + lang: + A language of input sentences. It is used when the scores are rescaled with a baseline. + rescale_with_baseline: + An indication of whether bertscore should be rescaled with a pre-computed baseline. + When a pretrained model from `transformers` model is used, the corresponding baseline is downloaded + from the original `bert-score` package from `BERT_score`_ if available. + In other cases, please specify a path to the baseline csv/tsv file, which must follow the formatting + of the files from `BERT_score`_ + baseline_path: + A path to the user's own local csv/tsv file with the baseline scale. + baseline_url: + A url path to the user's own csv/tsv file with the baseline scale. + + Returns: + Python dictionary containing the keys `precision`, `recall` and `f1` with corresponding values. + + Raises: + ValueError: + If `len(predictions) != len(references)`. + ValueError: + If `tqdm` package is required and not installed. + ValueError: + If `transformers` package is required and not installed. + ValueError: + If `num_layer` is larger than the number of the model layers. + ValueError: + If invalid input is provided. + + Example: + >>> predictions = ["hello there", "general kenobi"] + >>> references = ["hello there", "master kenobi"] + >>> bert_score(predictions=predictions, references=references, lang="en") # doctest: +SKIP + {'precision': [0.99..., 0.99...], + 'recall': [0.99..., 0.99...], + 'f1': [0.99..., 0.99...]} + """ + if len(predictions) != len(references): + raise ValueError("Number of predicted and reference sententes must be the same!") + + if verbose and (not _TQDM_AVAILABLE): + raise ValueError( + "An argument `verbose = True` requires `tqdm` package be installed. Install with `pip install tqdm`." + ) + + if model is None: + if not _TRANSFORMERS_AVAILABLE: + raise ValueError( + "`bert_score` metric with default models requires `transformers` package be installed. " + "Either install with `pip install transformers>=4.0` or `pip install paddlemetrics[text]`" + ) + tokenizer = AutoTokenizer.from_pretrained(model_name_or_path) + model = AutoModel.from_pretrained(model_name_or_path) + else: + tokenizer = user_tokenizer + model.eval() + model.to(device) + + try: + if num_layers and num_layers > model.config.num_hidden_layers: # type: ignore + raise ValueError( + f"num_layers={num_layers} is forbidden for {model_name_or_path}. " # type: ignore + f"Please use num_layers <= {model.config.num_hidden_layers}" # type: ignore + ) + except AttributeError: + warnings.warn("It was not possible to retrieve the parameter `num_layers` from the model specification.") + + _are_empty_lists = all(isinstance(text, list) and len(text) == 0 for text in (predictions, references)) + _are_valid_lists = all( + isinstance(text, list) and len(text) > 0 and isinstance(text[0], str) for text in (predictions, references) + ) + _are_valid_tensors = all( + isinstance(text, dict) and isinstance(text["input_ids"], Tensor) for text in (predictions, references) + ) + if _are_empty_lists: + warnings.warn("Predictions and references are empty.") + output_dict: Dict[str, Union[List[float], str]] = { + "precision": [0.0], + "recall": [0.0], + "f1": [0.0], + } + if return_hash: + output_dict.update({"hash": _get_hash(model_name_or_path, num_layers, idf)}) + return output_dict + + # Load baselines if needed + baseline = _load_baseline(lang, model_name_or_path, baseline_path, baseline_url) if rescale_with_baseline else None + + # We ignore mypy typing below as the proper typing is ensured by conditions above, only mypy cannot infer that. + if _are_valid_lists: + ref_dataset = TextDataset(references, tokenizer, max_length, idf=idf) # type: ignore + pred_dataset = TextDataset( + predictions, # type: ignore + tokenizer, + max_length, + idf=idf, + tokens_idf=ref_dataset.tokens_idf, + ) + elif _are_valid_tensors: + ref_dataset = TokenizedDataset(**references, idf=idf) # type: ignore + pred_dataset = TokenizedDataset(**predictions, idf=idf, tokens_idf=ref_dataset.tokens_idf) # type: ignore + else: + raise ValueError("Invalid input provided.") + + ref_loader = DataLoader(ref_dataset, batch_size=batch_size, num_workers=num_threads) + pred_loader = DataLoader(pred_dataset, batch_size=batch_size, num_workers=num_threads) + + ref_embeddings, ref_idf_scale = _get_embeddings_and_idf_scale( + ref_loader, ref_dataset.max_length, model, device, num_layers, all_layers, idf, verbose, user_forward_fn + ) + pred_embeddings, pred_idf_scale = _get_embeddings_and_idf_scale( + pred_loader, pred_dataset.max_length, model, device, num_layers, all_layers, idf, verbose, user_forward_fn + ) + + precision, recall, f1_score = _get_precision_recall_f1( + pred_embeddings, ref_embeddings, pred_idf_scale, ref_idf_scale + ) + + if baseline is not None: + precision, recall, f1_score = _rescale_metrics_with_baseline( + precision, recall, f1_score, baseline, num_layers, all_layers + ) + + output_dict = { + "precision": precision.tolist(), + "recall": recall.tolist(), + "f1": f1_score.tolist(), + } + if return_hash: + output_dict.update({"hash": _get_hash(model_name_or_path, num_layers, idf)}) + return output_dict diff --git a/RE/paddlemetric/src/paddlemetrics/functional/text/bleu.py b/RE/paddlemetric/src/paddlemetrics/functional/text/bleu.py new file mode 100644 index 00000000..4d00946b --- /dev/null +++ b/RE/paddlemetric/src/paddlemetrics/functional/text/bleu.py @@ -0,0 +1,171 @@ +# Copyright The PyTorch Lightning team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# referenced from +# Library Name: torchtext +# Authors: torchtext authors and @sluks +# Date: 2020-07-18 +# Link: https://pyB.org/text/_modules/torchtext/data/metrics.html#bleu_score +from collections import Counter +from typing import Sequence, Tuple + +import paddleext.torchapi as B +from paddleext.torchapi import Tensor, tensor + + +def _count_ngram(ngram_input_list: Sequence[str], n_gram: int) -> Counter: + """Counting how many times each word appears in a given text with ngram. + + Args: + ngram_input_list: A list of translated text or reference texts + n_gram: gram value ranged 1 to 4 + + Return: + ngram_counter: a collections.Counter object of ngram + """ + + ngram_counter: Counter = Counter() + + for i in range(1, n_gram + 1): + for j in range(len(ngram_input_list) - i + 1): + ngram_key = tuple(ngram_input_list[j : (i + j)]) + ngram_counter[ngram_key] += 1 + + return ngram_counter + + +def _bleu_score_update( + reference_corpus: Sequence[Sequence[Sequence[str]]], + translate_corpus: Sequence[Sequence[str]], + numerator: Tensor, + denominator: Tensor, + trans_len: Tensor, + ref_len: Tensor, + n_gram: int = 4, +) -> Tuple[Tensor, Tensor]: + """Updates and returns variables required to compute the BLEU score. + + Args: + reference_corpus: An iterable of iterables of reference corpus + translate_corpus: An iterable of machine translated corpus + numerator: Numerator of precision score (true positives) + denominator: Denominator of precision score (true positives + false positives) + trans_len: count of words in a candidate translation + ref_len: count of words in a reference translation + n_gram: gram value ranged 1 to 4 + """ + + for (translation, references) in zip(translate_corpus, reference_corpus): + trans_len += len(translation) + ref_len_list = [len(ref) for ref in references] + ref_len_diff = [abs(len(translation) - x) for x in ref_len_list] + ref_len += ref_len_list[ref_len_diff.index(min(ref_len_diff))] + translation_counter: Counter = _count_ngram(translation, n_gram) + reference_counter: Counter = Counter() + + for ref in references: + reference_counter |= _count_ngram(ref, n_gram) + + ngram_counter_clip = translation_counter & reference_counter + + for counter_clip in ngram_counter_clip: + numerator[len(counter_clip) - 1] += ngram_counter_clip[counter_clip] + + for counter in translation_counter: + denominator[len(counter) - 1] += translation_counter[counter] + + return trans_len, ref_len + + +def _bleu_score_compute( + trans_len: Tensor, ref_len: Tensor, numerator: Tensor, denominator: Tensor, n_gram: int = 4, smooth: bool = False +) -> Tensor: + """Computes the BLEU score. + + Args: + trans_len: count of words in a candidate translation + ref_len: count of words in a reference translation + numerator: Numerator of precision score (true positives) + denominator: Denominator of precision score (true positives + false positives) + n_gram: gram value ranged 1 to 4 + smooth: Whether or not to apply smoothing + """ + device = numerator.device + if min(numerator) == 0.0: + return tensor(0.0, device=device) + + if smooth: + precision_scores = B.div( + B.add(numerator, B.ones(n_gram, device=device)), + B.add(denominator, B.ones(n_gram, device=device)), + ) + precision_scores[0] = numerator[0] / denominator[0] + else: + precision_scores = numerator / denominator + + log_precision_scores = tensor([1.0 / n_gram] * n_gram, device=device) * B.log(precision_scores) + geometric_mean = B.exp(B.sum(log_precision_scores)) + brevity_penalty = tensor(1.0, device=device) if trans_len > ref_len else B.exp(1 - (ref_len / trans_len)) + bleu = brevity_penalty * geometric_mean + + return bleu + + +def bleu_score( + reference_corpus: Sequence[Sequence[Sequence[str]]], + translate_corpus: Sequence[Sequence[str]], + n_gram: int = 4, + smooth: bool = False, +) -> Tensor: + """Calculate `BLEU score`_ of machine translated text with one or more references. + + Args: + reference_corpus: + An iterable of iterables of reference corpus + translate_corpus: + An iterable of machine translated corpus + n_gram: + Gram value ranged from 1 to 4 (Default 4) + smooth: + Whether or not to apply smoothing – see [2] + + Return: + Tensor with BLEU Score + + Example: + >>> from paddlemetrics.functional import bleu_score + >>> translate_corpus = ['the cat is on the mat'.split()] + >>> reference_corpus = [['there is a cat on the mat'.split(), 'a cat is on the mat'.split()]] + >>> bleu_score(reference_corpus, translate_corpus) + tensor(0.7598) + + References: + [1] BLEU: a Method for Automatic Evaluation of Machine Translation by Papineni, + Kishore, Salim Roukos, Todd Ward, and Wei-Jing Zhu `BLEU`_ + + [2] Automatic Evaluation of Machine Translation Quality Using Longest Common Subsequence + and Skip-Bigram Statistics by Chin-Yew Lin and Franz Josef Och `Machine Translation Evolution`_ + """ + + if len(translate_corpus) != len(reference_corpus): + raise ValueError(f"Corpus has different size {len(translate_corpus)} != {len(reference_corpus)}") + numerator = B.zeros(n_gram) + denominator = B.zeros(n_gram) + trans_len = tensor(0, dtype=B.float) + ref_len = tensor(0, dtype=B.float) + + trans_len, ref_len = _bleu_score_update( + reference_corpus, translate_corpus, numerator, denominator, trans_len, ref_len, n_gram + ) + + return _bleu_score_compute(trans_len, ref_len, numerator, denominator, n_gram, smooth) diff --git a/RE/paddlemetric/src/paddlemetrics/functional/text/rouge.py b/RE/paddlemetric/src/paddlemetrics/functional/text/rouge.py new file mode 100644 index 00000000..e83c00d0 --- /dev/null +++ b/RE/paddlemetric/src/paddlemetrics/functional/text/rouge.py @@ -0,0 +1,325 @@ +# Copyright The PyTorch Lightning team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import re +from collections import Counter +from typing import Any, Dict, List, Optional, Tuple, Union + +import paddleext.torchapi as B +from paddleext.torchapi import Tensor, tensor + +from paddlemetrics.utilities.imports import _NLTK_AVAILABLE + +ALLOWED_ROUGE_KEYS: Dict[str, Union[int, str]] = { + "rouge1": 1, + "rouge2": 2, + "rouge3": 3, + "rouge4": 4, + "rouge5": 5, + "rouge6": 6, + "rouge7": 7, + "rouge8": 8, + "rouge9": 9, + "rougeL": "L", + "rougeLsum": "Lsum", +} + + +def _add_newline_to_end_of_each_sentence(x: str) -> str: + """This was added to get rougeLsum scores matching published rougeL scores for BART and PEGASUS.""" + if not _NLTK_AVAILABLE: + raise ValueError("ROUGE-Lsum calculation requires that nltk is installed. Use `pip install nltk`.") + import nltk + + nltk.download("punkt", quiet=True, force=False) + + re.sub("", "", x) # remove pegasus newline char + return "\n".join(nltk.sent_tokenize(x)) + + +def _compute_metrics(hits_or_lcs: int, pred_len: int, target_len: int) -> Dict[str, Tensor]: + """This computes precision, recall and F1 score based on hits/lcs, and the length of lists of tokenizer + predicted and target sentences. + + Args: + hits_or_lcs: + A number of matches or a length of the longest common subsequence. + pred_len: + A length of a tokenized predicted sentence. + target_len: + A length of a tokenized target sentence. + """ + precision = hits_or_lcs / pred_len + recall = hits_or_lcs / target_len + if precision == recall == 0.0: + return dict(precision=tensor(0.0), recall=tensor(0.0), fmeasure=tensor(0.0)) + + fmeasure = 2 * precision * recall / (precision + recall) + return dict(precision=tensor(precision), recall=tensor(recall), fmeasure=tensor(fmeasure)) + + +def _lcs(pred_tokens: List[str], target_tokens: List[str]) -> int: + """Common DP algorithm to compute the length of the longest common subsequence. + + Args: + pred_tokens: + A tokenized predicted sentence. + target_toknes: + A tokenized target sentence. + """ + LCS = [[0] * (len(pred_tokens) + 1) for _ in range(len(target_tokens) + 1)] + for i in range(1, len(target_tokens) + 1): + for j in range(1, len(pred_tokens) + 1): + if target_tokens[i - 1] == pred_tokens[j - 1]: + LCS[i][j] = LCS[i - 1][j - 1] + 1 + else: + LCS[i][j] = max(LCS[i - 1][j], LCS[i][j - 1]) + return LCS[-1][-1] + + +def _normalize_and_tokenize_text(text: str, stemmer: Optional[Any] = None) -> List[str]: + """Rouge score should be calculated only over lowercased words and digits. Optionally, Porter stemmer can be + used to strip word suffixes to improve matching. The text normalization follows the implemantion from `Rouge + score_Text Normalizition`_ + + Args: + text: + An input sentence. + stemmer: + Porter stemmer instance to strip word suffixes to improve matching. + """ + # Replace any non-alpha-numeric characters with spaces. + text = re.sub(r"[^a-z0-9]+", " ", text.lower()) + + tokens = re.split(r"\s+", text) + if stemmer: + # Only stem words more than 3 characters long. + tokens = [stemmer.stem(x) if len(x) > 3 else x for x in tokens] + + # One final check to drop any empty or invalid tokens. + tokens = [x for x in tokens if (isinstance(x, str) and re.match(r"^[a-z0-9]+$", x))] + + return tokens + + +def _rouge_n_score(pred: List[str], target: List[str], n_gram: int) -> Dict[str, Tensor]: + """This computes precision, recall and F1 score for the Rouge-N metric. + + Args: + pred: + A predicted sentence. + target: + A target sentence. + n_gram: + N-gram overlap. + """ + + def _create_ngrams(tokens: List[str], n: int) -> Counter: + ngrams: Counter = Counter() + for ngram in (tuple(tokens[i : i + n]) for i in range(len(tokens) - n + 1)): + ngrams[ngram] += 1 + return ngrams + + pred_ngrams, target_ngrams = _create_ngrams(pred, n_gram), _create_ngrams(target, n_gram) + pred_len, target_len = sum(pred_ngrams.values()), sum(target_ngrams.values()) + if 0 in (pred_len, target_len): + return dict(precision=tensor(0.0), recall=tensor(0.0), fmeasure=tensor(0.0)) + + # It is sufficient to take a set(pred_tokenized) for hits count as we consider intersenction of pred & target + hits = sum(min(pred_ngrams[w], target_ngrams[w]) for w in set(pred_ngrams)) + return _compute_metrics(hits, max(pred_len, 1), max(target_len, 1)) + + +def _rouge_l_score(pred: List[str], target: List[str]) -> Dict[str, Tensor]: + """This computes precision, recall and F1 score for the Rouge-L or Rouge-LSum metric. + + Args: + pred: + A predicted sentence. + target: + A target sentence. + """ + pred_len, target_len = len(pred), len(target) + if 0 in (pred_len, target_len): + return dict(precision=tensor(0.0), recall=tensor(0.0), fmeasure=tensor(0.0)) + + lcs = _lcs(pred, target) + return _compute_metrics(lcs, pred_len, target_len) + + +def _rouge_score_update( + preds: List[str], + targets: List[str], + rouge_keys_values: List[Union[int, str]], + stemmer: Optional[Any] = None, +) -> Dict[Union[int, str], List[Dict[str, Tensor]]]: + """Update the rouge score with the current set of predicted and target sentences. + + Args: + preds: + An iterable of predicted sentences. + targets: + An iterable of target sentences. + rouge_keys_values: + List of N-grams/'L'/'Lsum' arguments. + stemmer: + Porter stemmer instance to strip word suffixes to improve matching. + + Example: + >>> targets = "Is your name John".split() + >>> preds = "My name is John".split() + >>> from pprint import pprint + >>> score = _rouge_score_update(preds, targets, rouge_keys_values=[1, 2, 3, 'L']) + >>> pprint(score) # doctest: +NORMALIZE_WHITESPACE +SKIP + {1: [{'fmeasure': tensor(0.), 'precision': tensor(0.), 'recall': tensor(0.)}, + {'fmeasure': tensor(0.), 'precision': tensor(0.), 'recall': tensor(0.)}, + {'fmeasure': tensor(0.), 'precision': tensor(0.), 'recall': tensor(0.)}, + {'fmeasure': tensor(1.), 'precision': tensor(1.), 'recall': tensor(1.)}], + 2: [{'fmeasure': tensor(0.), 'precision': tensor(0.), 'recall': tensor(0.)}, + {'fmeasure': tensor(0.), 'precision': tensor(0.), 'recall': tensor(0.)}, + {'fmeasure': tensor(0.), 'precision': tensor(0.), 'recall': tensor(0.)}, + {'fmeasure': tensor(0.), 'precision': tensor(0.), 'recall': tensor(0.)}], + 3: [{'fmeasure': tensor(0.), 'precision': tensor(0.), 'recall': tensor(0.)}, + {'fmeasure': tensor(0.), 'precision': tensor(0.), 'recall': tensor(0.)}, + {'fmeasure': tensor(0.), 'precision': tensor(0.), 'recall': tensor(0.)}, + {'fmeasure': tensor(0.), 'precision': tensor(0.), 'recall': tensor(0.)}], + 'L': [{'fmeasure': tensor(0.), 'precision': tensor(0.), 'recall': tensor(0.)}, + {'fmeasure': tensor(0.), 'precision': tensor(0.), 'recall': tensor(0.)}, + {'fmeasure': tensor(0.), 'precision': tensor(0.), 'recall': tensor(0.)}, + {'fmeasure': tensor(1.), 'precision': tensor(1.), 'recall': tensor(1.)}]} + """ + results: Dict[Union[int, str], List[Dict[str, Tensor]]] = {rouge_key: [] for rouge_key in rouge_keys_values} + for pred_raw, target_raw in zip(preds, targets): + pred = _normalize_and_tokenize_text(pred_raw, stemmer) + target = _normalize_and_tokenize_text(target_raw, stemmer) + + if "Lsum" in rouge_keys_values: + # rougeLsum expects "\n" separated sentences within a summary + pred_Lsum = _normalize_and_tokenize_text(_add_newline_to_end_of_each_sentence(pred_raw), stemmer) + target_Lsum = _normalize_and_tokenize_text(_add_newline_to_end_of_each_sentence(target_raw), stemmer) + + for rouge_key in rouge_keys_values: + if isinstance(rouge_key, int): + score = _rouge_n_score(pred, target, rouge_key) + else: + score = _rouge_l_score( + pred if rouge_key != "Lsum" else pred_Lsum, + target if rouge_key != "Lsum" else target_Lsum, + ) + results[rouge_key].append(score) + return results + + +def _rouge_score_compute(sentence_results: Dict[str, List[Tensor]]) -> Dict[str, Tensor]: + """Compute the combined ROUGE metric for all the input set of predicted and target sentences. + + Args: + sentence_results: + Rouge-N/Rouge-L/Rouge-LSum metrics calculated for single sentence. + """ + results: Dict[str, Tensor] = {} + # Obtain mean scores for individual rouge metrics + if sentence_results == {}: + return results + + for rouge_key, scores in sentence_results.items(): + results[rouge_key] = B.tensor(scores).mean() + + return results + + +def rouge_score( + preds: Union[str, List[str]], + targets: Union[str, List[str]], + use_stemmer: bool = False, + rouge_keys: Union[str, Tuple[str, ...]] = ("rouge1", "rouge2", "rougeL", "rougeLsum"), # type: ignore +) -> Dict[str, Tensor]: + """Calculate `Calculate Rouge Score`_ , used for automatic summarization. + + Args: + preds: + An iterable of predicted sentences. + targets: + An iterable of target sentences. + use_stemmer: + Use Porter stemmer to strip word suffixes to improve matching. + rouge_keys: + A list of rouge types to calculate. + Keys that are allowed are ``rougeL``, ``rougeLsum``, and ``rouge1`` through ``rouge9``. + + Return: + Python dictionary of rouge scores for each input rouge key. + + Example: + >>> targets = "Is your name John".split() + >>> preds = "My name is John".split() + >>> from pprint import pprint + >>> pprint(rouge_score(preds, targets)) # doctest: +NORMALIZE_WHITESPACE +SKIP + {'rouge1_fmeasure': 0.25, + 'rouge1_precision': 0.25, + 'rouge1_recall': 0.25, + 'rouge2_fmeasure': 0.0, + 'rouge2_precision': 0.0, + 'rouge2_recall': 0.0, + 'rougeL_fmeasure': 0.25, + 'rougeL_precision': 0.25, + 'rougeL_recall': 0.25, + 'rougeLsum_fmeasure': 0.25, + 'rougeLsum_precision': 0.25, + 'rougeLsum_recall': 0.25} + + Raises: + ValueError: + If the python package ``nltk`` is not installed. + ValueError: + If any of the ``rouge_keys`` does not belong to the allowed set of keys. + + References: + [1] ROUGE: A Package for Automatic Evaluation of Summaries by Chin-Yew Lin. https://aclanthology.org/W04-1013/ + """ + + if use_stemmer: + if not _NLTK_AVAILABLE: + raise ValueError("Stemmer requires that nltk is installed. Use `pip install nltk`.") + import nltk + + stemmer = nltk.stem.porter.PorterStemmer() if use_stemmer else None + + if not isinstance(rouge_keys, tuple): + rouge_keys = tuple([rouge_keys]) + for key in rouge_keys: + if key not in ALLOWED_ROUGE_KEYS.keys(): + raise ValueError(f"Got unknown rouge key {key}. Expected to be one of {list(ALLOWED_ROUGE_KEYS.keys())}") + rouge_keys_values = [ALLOWED_ROUGE_KEYS[key] for key in rouge_keys] + + if isinstance(preds, str): + preds = [preds] + + if isinstance(targets, str): + targets = [targets] + + sentence_results: Dict[Union[int, str], List[Dict[str, Tensor]]] = _rouge_score_update( + preds, targets, rouge_keys_values, stemmer=stemmer + ) + + output: Dict[str, List[Tensor]] = {} + for rouge_key in rouge_keys_values: + for type in ["fmeasure", "precision", "recall"]: + output[f"rouge{rouge_key}_{type}"] = [] + + for rouge_key, metrics in sentence_results.items(): + for metric in metrics: + for type, value in metric.items(): + output[f"rouge{rouge_key}_{type}"].append(value) + + return _rouge_score_compute(output) diff --git a/RE/paddlemetric/src/paddlemetrics/functional/text/sacre_bleu.py b/RE/paddlemetric/src/paddlemetrics/functional/text/sacre_bleu.py new file mode 100644 index 00000000..1a59377f --- /dev/null +++ b/RE/paddlemetric/src/paddlemetrics/functional/text/sacre_bleu.py @@ -0,0 +1,355 @@ +# Copyright The PyTorch Lightning team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# referenced from +# Library Name: torchtext +# Authors: torchtext authors and @sluks +# Date: 2020-07-18 +# Link: https://pyB.org/text/_modules/torchtext/data/metrics.html#bleu_score + +############## + +# Copyright 2017--2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"). You may not +# use this file except in compliance with the License. A copy of the License +# is located at +# +# http://aws.amazon.com/apache2.0/ +# +# or in the "license" file accompanying this file. This file is distributed on +# an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either +# express or implied. See the License for the specific language governing +# permissions and limitations under the License. + +############## + +# MIT License +# Copyright (c) 2017 - Shujian Huang + + +import re +from typing import Sequence + +import paddleext.torchapi as B +from paddleext.torchapi import Tensor, tensor +from typing_extensions import Literal + +from paddlemetrics.functional.text.bleu import _bleu_score_compute, _bleu_score_update +from paddlemetrics.utilities.imports import _REGEX_AVAILABLE + +AVAILABLE_TOKENIZERS = ("none", "13a", "zh", "intl", "char") + +_UCODE_RANGES = ( + ("\u3400", "\u4db5"), # CJK Unified Ideographs Extension A, release 3.0 + ("\u4e00", "\u9fa5"), # CJK Unified Ideographs, release 1.1 + ("\u9fa6", "\u9fbb"), # CJK Unified Ideographs, release 4.1 + ("\uf900", "\ufa2d"), # CJK Compatibility Ideographs, release 1.1 + ("\ufa30", "\ufa6a"), # CJK Compatibility Ideographs, release 3.2 + ("\ufa70", "\ufad9"), # CJK Compatibility Ideographs, release 4.1 + ("\u20000", "\u2a6d6"), # (UTF16) CJK Unified Ideographs Extension B, release 3.1 + ("\u2f800", "\u2fa1d"), # (UTF16) CJK Compatibility Supplement, release 3.1 + ("\uff00", "\uffef"), # Full width ASCII, full width of English punctuation, + # half width Katakana, half wide half width kana, Korean alphabet + ("\u2e80", "\u2eff"), # CJK Radicals Supplement + ("\u3000", "\u303f"), # CJK punctuation mark + ("\u31c0", "\u31ef"), # CJK stroke + ("\u2f00", "\u2fdf"), # Kangxi Radicals + ("\u2ff0", "\u2fff"), # Chinese character structure + ("\u3100", "\u312f"), # Phonetic symbols + ("\u31a0", "\u31bf"), # Phonetic symbols (Taiwanese and Hakka expansion) + ("\ufe10", "\ufe1f"), + ("\ufe30", "\ufe4f"), + ("\u2600", "\u26ff"), + ("\u2700", "\u27bf"), + ("\u3200", "\u32ff"), + ("\u3300", "\u33ff"), +) + + +class _SacreBLEUTokenizer: + """Tokenizer used for SacreBLEU calculation. + + Source: https://github.com/mjpost/sacrebleu/tree/master/sacrebleu/tokenizers + """ + + _REGEX = ( + # language-dependent part (assuming Western languages) + (re.compile(r"([\{-\~\[-\` -\&\(-\+\:-\@\/])"), r" \1 "), + # tokenize period and comma unless preceded by a digit + (re.compile(r"([^0-9])([\.,])"), r"\1 \2 "), + # tokenize period and comma unless followed by a digit + (re.compile(r"([\.,])([^0-9])"), r" \1 \2"), + # tokenize dash when preceded by a digit + (re.compile(r"([0-9])(-)"), r"\1 \2 "), + # one space only between words + # NOTE: Doing this in Python (below) is faster + # (re.compile(r'\s+'), r' '), + ) + + if _REGEX_AVAILABLE: + import regex + + _INT_REGEX = ( + # Separate out punctuations preceeded by a non-digit + (regex.compile(r"(\P{N})(\p{P})"), r"\1 \2 "), + # Separate out punctuations followed by a non-digit + (regex.compile(r"(\p{P})(\P{N})"), r" \1 \2"), + # Separate out symbols + (regex.compile(r"(\p{S})"), r" \1 "), + ) + + _TOKENIZE_FN = { + "none": "_tokenize_base", + "13a": "_tokenize_13a", + "zh": "_tokenize_zh", + "intl": "_tokenize_international", + "char": "_tokenize_char", + } + + def __init__(self, tokenize: Literal["none", "13a", "zh", "intl", "char"], lowercase: bool = False) -> None: + self.tokenize_fn = getattr(self, self._TOKENIZE_FN[tokenize]) + self.lowercase = lowercase + + def __call__(self, line: str) -> Sequence[str]: + tokenized_line = self.tokenize_fn(line) + return self._lower(tokenized_line, self.lowercase).split() + + @classmethod + def tokenize( + cls, line: str, tokenize: Literal["none", "13a", "zh", "intl", "char"], lowercase: bool = False + ) -> Sequence[str]: + tokenize_fn = getattr(cls, cls._TOKENIZE_FN[tokenize]) + tokenized_line = tokenize_fn(line) + return cls._lower(tokenized_line, lowercase).split() + + @classmethod + def _tokenize_regex(cls, line: str) -> str: + """Common post-processing tokenizer for `13a` and `zh` tokenizers. + Args: + line: a segment to tokenize + + Return: + the tokenized line + """ + for (_re, repl) in cls._REGEX: + line = _re.sub(repl, line) + # no leading or trailing spaces, single space within words + return " ".join(line.split()) + + @staticmethod + def _is_chinese_char(uchar: str) -> bool: + """ + Args: + uchar: input char in unicode + + Return: + whether the input char is a Chinese character. + """ + for start, end in _UCODE_RANGES: + if start <= uchar <= end: + return True + return False + + @classmethod + def _tokenize_base(cls, line: str) -> str: + """Tokenizes an input line with the tokenizer. + + Args: + line: a segment to tokenize + + Return: + the tokenized line + """ + return line + + @classmethod + def _tokenize_13a(cls, line: str) -> str: + """Tokenizes an input line using a relatively minimal tokenization that is however equivalent to + mteval-v13a, used by WMT. + + Args: + line: input sentence + + Return: + tokenized sentence + """ + # language-independent part: + line = line.replace("", "") + line = line.replace("-\n", "") + line = line.replace("\n", " ") + + if "&" in line: + line = line.replace(""", '"') + line = line.replace("&", "&") + line = line.replace("<", "<") + line = line.replace(">", ">") + + return cls._tokenize_regex(line) + + @classmethod + def _tokenize_zh(cls, line: str) -> str: + """The tokenization of Chinese text in this script contains two + steps: separate each Chinese characters (by utf-8 encoding); tokenize + the non Chinese part (following the `13a` i.e. mteval tokenizer). + Author: Shujian Huang huangsj@nju.edu.cn + + Args: + line: input sentence + + Return: + tokenized sentence + """ + + line = line.strip() + line_in_chars = "" + + for char in line: + if cls._is_chinese_char(char): + line_in_chars += " " + line_in_chars += char + line_in_chars += " " + else: + line_in_chars += char + + return cls._tokenize_regex(line_in_chars) + + @classmethod + def _tokenize_international(cls, line: str) -> str: + """Tokenizes a string following the official BLEU implementation. + + See github.com/moses-smt/mosesdecoder/blob/master/scripts/generic/mteval-v14.pl#L954-L983 + + In our case, the input string is expected to be just one line. + We just tokenize on punctuation and symbols, + except when a punctuation is preceded and followed by a digit + (e.g. a comma/dot as a thousand/decimal separator). + We do not recover escaped forms of punctuations such as ' or > + as these should never appear in MT system outputs (see issue #138) + + Note that a number (e.g., a year) followed by a dot at the end of + sentence is NOT tokenized, i.e. the dot stays with the number because + `s/(\\p{P})(\\P{N})/ $1 $2/g` does not match this case (unless we add a + space after each sentence). However, this error is already in the + original mteval-v14.pl and we want to be consistent with it. + The error is not present in the non-international version, + which uses `$norm_text = " $norm_text "`. + + Args: + line: the input string to tokenize. + + Return: + The tokenized string. + """ + for (_re, repl) in cls._INT_REGEX: + line = _re.sub(repl, line) + + return " ".join(line.split()) + + @classmethod + def _tokenize_char(cls, line: str) -> str: + """Tokenizes all the characters in the input line. + + Args: + line: a segment to tokenize + + Return: + the tokenized line + """ + return " ".join(char for char in line) + + @staticmethod + def _lower(line: str, lowercase: bool) -> str: + if lowercase: + return line.lower() + return line + + +def sacre_bleu_score( + reference_corpus: Sequence[Sequence[str]], + translate_corpus: Sequence[str], + n_gram: int = 4, + smooth: bool = False, + tokenize: Literal["none", "13a", "zh", "intl", "char"] = "13a", + lowercase: bool = False, +) -> Tensor: + """Calculate `BLEU score`_ [1] of machine translated text with one or more references. This implementation + follows the behaviour of SacreBLEU [2] implementation from https://github.com/mjpost/sacrebleu. + + Args: + reference_corpus: + An iterable of iterables of reference corpus + translate_corpus: + An iterable of machine translated corpus + n_gram: + Gram value ranged from 1 to 4 (Default 4) + smooth: + Whether or not to apply smoothing – see [2] + tokenize: + Tokenization technique to be used. (Default '13a') + Supported tokenization: ['none', '13a', 'zh', 'intl', 'char'] + lowercase: + If ``True``, BLEU score over lowercased text is calculated. + + Return: + Tensor with BLEU Score + + Example: + >>> from paddlemetrics.functional import sacre_bleu_score + >>> translate_corpus = ['the cat is on the mat'] + >>> reference_corpus = [['there is a cat on the mat', 'a cat is on the mat']] + >>> sacre_bleu_score(reference_corpus, translate_corpus) + tensor(0.7598) + + References: + [1] BLEU: a Method for Automatic Evaluation of Machine Translation by Papineni, + Kishore, Salim Roukos, Todd Ward, and Wei-Jing Zhu `BLEU`_ + + [2] A Call for Clarity in Reporting BLEU Scores by Matt Post. + + [3] Automatic Evaluation of Machine Translation Quality Using Longest Common Subsequence + and Skip-Bigram Statistics by Chin-Yew Lin and Franz Josef Och `Machine Translation Evolution`_ + """ + if tokenize not in AVAILABLE_TOKENIZERS: + raise ValueError(f"Argument `tokenize` expected to be one of {AVAILABLE_TOKENIZERS} but got {tokenize}.") + + if tokenize not in _SacreBLEUTokenizer._TOKENIZE_FN.keys(): + raise ValueError( + f"Unsupported tokenizer selected. Please, choose one of {list(_SacreBLEUTokenizer._TOKENIZE_FN.keys())}" + ) + if len(translate_corpus) != len(reference_corpus): + raise ValueError(f"Corpus has different size {len(translate_corpus)} != {len(reference_corpus)}") + if tokenize == "intl" and not _REGEX_AVAILABLE: + raise ValueError( + "`'intl'` tokenization requires `regex` installed. Use `pip install regex` or `pip install " + "paddlemetrics[text]`." + ) + + reference_corpus_: Sequence[Sequence[Sequence[str]]] = [ + [_SacreBLEUTokenizer.tokenize(line, tokenize, lowercase) for line in reference] + for reference in reference_corpus + ] + translate_corpus_: Sequence[Sequence[str]] = [ + _SacreBLEUTokenizer.tokenize(line, tokenize, lowercase) for line in translate_corpus + ] + + numerator = B.zeros(n_gram) + denominator = B.zeros(n_gram) + trans_len = tensor(0, dtype=B.float) + ref_len = tensor(0, dtype=B.float) + + trans_len, ref_len = _bleu_score_update( + reference_corpus_, translate_corpus_, numerator, denominator, trans_len, ref_len, n_gram + ) + + return _bleu_score_compute(trans_len, ref_len, numerator, denominator, n_gram, smooth) diff --git a/RE/paddlemetric/src/paddlemetrics/functional/text/wer.py b/RE/paddlemetric/src/paddlemetrics/functional/text/wer.py new file mode 100644 index 00000000..4cd19b05 --- /dev/null +++ b/RE/paddlemetric/src/paddlemetrics/functional/text/wer.py @@ -0,0 +1,114 @@ +# Copyright The PyTorch Lightning team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from typing import List, Optional, Tuple, Union +from warnings import warn + +import paddleext.torchapi as B +from paddleext.torchapi import Tensor, tensor + + +def _edit_distance(prediction_tokens: List[str], reference_tokens: List[str]) -> int: + """Standard dynamic programming algorithm to compute the edit distance. + + Args: + prediction_tokens: A tokenized predicted sentence + reference_tokens: A tokenized reference sentence + + Returns: + (int) Edit distance between the predicted sentence and the reference sentence + """ + dp = [[0] * (len(reference_tokens) + 1) for _ in range(len(prediction_tokens) + 1)] + for i in range(len(prediction_tokens) + 1): + dp[i][0] = i + for j in range(len(reference_tokens) + 1): + dp[0][j] = j + for i in range(1, len(prediction_tokens) + 1): + for j in range(1, len(reference_tokens) + 1): + if prediction_tokens[i - 1] == reference_tokens[j - 1]: + dp[i][j] = dp[i - 1][j - 1] + else: + dp[i][j] = min(dp[i - 1][j], dp[i][j - 1], dp[i - 1][j - 1]) + 1 + return dp[-1][-1] + + +def _wer_update( + predictions: Union[str, List[str]], + references: Union[str, List[str]], +) -> Tuple[Tensor, Tensor]: + """Update the wer score with the current set of references and predictions. + + Args: + predictions: Transcription(s) to score as a string or list of strings + references: Reference(s) for each speech input as a string or list of strings + + Returns: + (Tensor) Number of edit operations to get from the reference to the prediction, summed over all samples + (Tensor) Number of words over all references + """ + if isinstance(predictions, str): + predictions = [predictions] + if isinstance(references, str): + references = [references] + errors = tensor(0, dtype=B.float) + total = tensor(0, dtype=B.float) + for prediction, reference in zip(predictions, references): + prediction_tokens = prediction.split() + reference_tokens = reference.split() + errors += _edit_distance(prediction_tokens, reference_tokens) + total += len(reference_tokens) + return errors, total + + +def _wer_compute(errors: Tensor, total: Tensor) -> Tensor: + """Compute the word error rate. + + Args: + errors: Number of edit operations to get from the reference to the prediction, summed over all samples + total: Number of words over all references + + Returns: + (Tensor) Word error rate + """ + return errors / total + + +def wer( + predictions: Union[str, List[str]], + references: Union[str, List[str]], + concatenate_texts: Optional[bool] = None, # TODO: remove in v0.7 +) -> Tensor: + """Word error rate (WER_) is a common metric of the performance of an automatic speech recognition system. This + value indicates the percentage of words that were incorrectly predicted. The lower the value, the better the + performance of the ASR system with a WER of 0 being a perfect score. + + Args: + predictions: Transcription(s) to score as a string or list of strings + references: Reference(s) for each speech input as a string or list of strings + concatenate_texts: Whether to concatenate all input texts or compute WER iteratively + This argument is deprecated in v0.6 and it will be removed in v0.7. + + Returns: + (Tensor) Word error rate + + Examples: + >>> predictions = ["this is the prediction", "there is an other sample"] + >>> references = ["this is the reference", "there is another one"] + >>> wer(predictions=predictions, references=references) + tensor(0.5000) + """ + if concatenate_texts is not None: + warn("`concatenate_texts` has been deprecated in v0.6 and it will be removed in v0.7", DeprecationWarning) + errors, total = _wer_update(predictions, references) + return _wer_compute(errors, total) diff --git a/RE/paddlemetric/src/paddlemetrics/image/__init__.py b/RE/paddlemetric/src/paddlemetrics/image/__init__.py new file mode 100644 index 00000000..c3fb3568 --- /dev/null +++ b/RE/paddlemetric/src/paddlemetrics/image/__init__.py @@ -0,0 +1,19 @@ +# Copyright The PyTorch Lightning team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +#from paddlemetrics.image.fid import FID # noqa: F401 +from paddlemetrics.image.inception import IS # noqa: F401 +from paddlemetrics.image.kid import KID # noqa: F401 +from paddlemetrics.image.lpip_similarity import LPIPS # noqa: F401 +from paddlemetrics.image.psnr import PSNR # noqa: F401 +from paddlemetrics.image.ssim import SSIM # noqa: F401 diff --git a/RE/paddlemetric/src/paddlemetrics/image/fid.py b/RE/paddlemetric/src/paddlemetrics/image/fid.py new file mode 100644 index 00000000..6f2965db --- /dev/null +++ b/RE/paddlemetric/src/paddlemetrics/image/fid.py @@ -0,0 +1,283 @@ +# Copyright The PyTorch Lightning team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from typing import Any, Callable, List, Optional, Union + +import numpy as np +import paddleext.torchapi as B +from paddleext.torchapi import Tensor +from paddleext.torchapi.autograd import Function + +from paddlemetrics.metric import Metric +from paddlemetrics.utilities import rank_zero_info, rank_zero_warn +from paddlemetrics.utilities.data import dim_zero_cat +from paddlemetrics.utilities.imports import _SCIPY_AVAILABLE, _TORCH_FIDELITY_AVAILABLE + +if _TORCH_FIDELITY_AVAILABLE: + from torch_fidelity.feature_extractor_inceptionv3 import FeatureExtractorInceptionV3 +else: + + class FeatureExtractorInceptionV3(B.nn.Module): # type: ignore + pass + + +if _SCIPY_AVAILABLE: + import scipy + + +class NoTrainInceptionV3(FeatureExtractorInceptionV3): + def __init__( + self, + name: str, + features_list: List[str], + feature_extractor_weights_path: Optional[str] = None, + ) -> None: + super().__init__(name, features_list, feature_extractor_weights_path) + # put into evaluation mode + self.eval() + + def train(self, mode: bool) -> "NoTrainInceptionV3": + """the inception network should not be able to be switched away from evaluation mode.""" + return super().train(False) + + def forward(self, x: Tensor) -> Tensor: + out = super().forward(x) + return out[0].reshape(x.shape[0], -1) + + +class MatrixSquareRoot(Function): + """Square root of a positive definite matrix. + + All credit to: `Square Root of a Positive Definite Matrix`_ + """ + + @staticmethod + def forward(ctx: Any, input_data: Tensor) -> Tensor: + # TODO: update whenever pytorch gets an matrix square root function + # Issue: https://github.com/pytorch/pytorch/issues/9983 + m = input_data.detach().cpu().numpy().astype(np.float_) + scipy_res, _ = scipy.linalg.sqrtm(m, disp=False) + sqrtm = B.from_numpy(scipy_res.real).to(input_data) + ctx.save_for_backward(sqrtm) + return sqrtm + + @staticmethod + def backward(ctx: Any, grad_output: Tensor) -> Tensor: + grad_input = None + if ctx.needs_input_grad[0]: + (sqrtm,) = ctx.saved_tensors + sqrtm = sqrtm.data.cpu().numpy().astype(np.float_) + gm = grad_output.data.cpu().numpy().astype(np.float_) + + # Given a positive semi-definite matrix X, + # since X = X^{1/2}X^{1/2}, we can compute the gradient of the + # matrix square root dX^{1/2} by solving the Sylvester equation: + # dX = (d(X^{1/2})X^{1/2} + X^{1/2}(dX^{1/2}). + grad_sqrtm = scipy.linalg.solve_sylvester(sqrtm, sqrtm, gm) + + grad_input = B.from_numpy(grad_sqrtm).to(grad_output) + return grad_input + + +sqrtm = MatrixSquareRoot.apply + + +def _compute_fid(mu1: Tensor, sigma1: Tensor, mu2: Tensor, sigma2: Tensor, eps: float = 1e-6) -> Tensor: + r""" + Adjusted version of `Fid Score`_ + + The Frechet Inception Distance between two multivariate Gaussians X_x ~ N(mu_1, sigm_1) + and X_y ~ N(mu_2, sigm_2) is d^2 = ||mu_1 - mu_2||^2 + Tr(sigm_1 + sigm_2 - 2*sqrt(sigm_1*sigm_2)). + + Args: + mu1: mean of activations calculated on predicted (x) samples + sigma1: covariance matrix over activations calculated on predicted (x) samples + mu2: mean of activations calculated on target (y) samples + sigma2: covariance matrix over activations calculated on target (y) samples + eps: offset constant. used if sigma_1 @ sigma_2 matrix is singular + + Returns: + Scalar value of the distance between sets. + """ + diff = mu1 - mu2 + + covmean = sqrtm(sigma1.mm(sigma2)) + # Product might be almost singular + if not B.isfinite(covmean).all(): + rank_zero_info(f"FID calculation produces singular product; adding {eps} to diagonal of covariance estimates") + offset = B.eye(sigma1.size(0), device=mu1.device, dtype=mu1.dtype) * eps + covmean = sqrtm((sigma1 + offset).mm(sigma2 + offset)) + + tr_covmean = B.trace(covmean) + return diff.dot(diff) + B.trace(sigma1) + B.trace(sigma2) - 2 * tr_covmean + + +class FID(Metric): + r""" + Calculates Fréchet inception distance (FID_) which is used to access the quality of generated images. Given by + + .. math:: + FID = |\mu - \mu_w| + tr(\Sigma + \Sigma_w - 2(\Sigma \Sigma_w)^{\frac{1}{2}}) + + where :math:`\mathcal{N}(\mu, \Sigma)` is the multivariate normal distribution estimated from Inception v3 [1] + features calculated on real life images and :math:`\mathcal{N}(\mu_w, \Sigma_w)` is the multivariate normal + distribution estimated from Inception v3 features calculated on generated (fake) images. The metric was + originally proposed in [1]. + + Using the default feature extraction (Inception v3 using the original weights from [2]), the input is + expected to be mini-batches of 3-channel RGB images of shape (3 x H x W) with dtype uint8. All images + will be resized to 299 x 299 which is the size of the original training data. The boolian flag ``real`` + determines if the images should update the statistics of the real distribution or the fake distribution. + + .. note:: using this metrics requires you to have ``scipy`` install. Either install as ``pip install + paddlemetrics[image]`` or ``pip install scipy`` + + .. note:: using this metric with the default feature extractor requires that ``torch-fidelity`` + is installed. Either install as ``pip install paddlemetrics[image]`` or + ``pip install torch-fidelity`` + + .. note:: the ``forward`` method can be used but ``compute_on_step`` is disabled by default (oppesit of + all other metrics) as this metric does not really make sense to calculate on a single batch. This + means that by default ``forward`` will just call ``update`` underneat. + + Args: + feature: + Either an integer or ``nn.Module``: + + - an integer will indicate the inceptionv3 feature layer to choose. Can be one of the following: + 64, 192, 768, 2048 + - an ``nn.Module`` for using a custom feature extractor. Expects that its forward method returns + an ``[N,d]`` matrix where ``N`` is the batch size and ``d`` is the feature size. + + compute_on_step: + Forward only calls ``update()`` and return ``None`` if this is set to ``False``. + dist_sync_on_step: + Synchronize metric state across processes at each ``forward()`` + before returning the value at the step + process_group: + Specify the process group on which synchronization is called. + default: ``None`` (which selects the entire world) + dist_sync_fn: + Callback that performs the allgather operation on the metric state. When ``None``, DDP + will be used to perform the allgather + + References: + [1] Rethinking the Inception Architecture for Computer Vision + Christian Szegedy, Vincent Vanhoucke, Sergey Ioffe, Jonathon Shlens, Zbigniew Wojna + https://arxiv.org/abs/1512.00567 + + [2] GANs Trained by a Two Time-Scale Update Rule Converge to a Local Nash Equilibrium, + Martin Heusel, Hubert Ramsauer, Thomas Unterthiner, Bernhard Nessler, Sepp Hochreiter + https://arxiv.org/abs/1706.08500 + + Raises: + ValueError: + If ``feature`` is set to an ``int`` (default settings) and ``torch-fidelity`` is not installed + ValueError: + If ``feature`` is set to an ``int`` not in [64, 192, 768, 2048] + TypeError: + If ``feature`` is not an ``str``, ``int`` or ``B.nn.Module`` + + Example: + >>> import torchapi as B + >>> _ = B.manual_seed(123) + >>> from paddlemetrics import FID + >>> fid = FID(feature=64) # doctest: +SKIP + >>> # generate two slightly overlapping image intensity distributions + >>> imgs_dist1 = B.randint(0, 200, (100, 3, 299, 299), dtype=B.uint8) # doctest: +SKIP + >>> imgs_dist2 = B.randint(100, 255, (100, 3, 299, 299), dtype=B.uint8) # doctest: +SKIP + >>> fid.update(imgs_dist1, real=True) # doctest: +SKIP + >>> fid.update(imgs_dist2, real=False) # doctest: +SKIP + >>> fid.compute() # doctest: +SKIP + tensor(12.7202) + + """ + real_features: List[Tensor] + fake_features: List[Tensor] + + def __init__( + self, + feature: Union[int, B.nn.Module] = 2048, + compute_on_step: bool = False, + dist_sync_on_step: bool = False, + process_group: Optional[Any] = None, + dist_sync_fn: Callable[[Tensor], List[Tensor]] = None, + ) -> None: + super().__init__( + compute_on_step=compute_on_step, + dist_sync_on_step=dist_sync_on_step, + process_group=process_group, + dist_sync_fn=dist_sync_fn, + ) + + rank_zero_warn( + "Metric `FID` will save all extracted features in buffer." + " For large datasets this may lead to large memory footprint.", + UserWarning, + ) + + if isinstance(feature, int): + if not _TORCH_FIDELITY_AVAILABLE: + raise ValueError( + "FID metric requires that Torch-fidelity is installed." + "Either install as `pip install paddlemetrics[image]` or `pip install torch-fidelity`" + ) + valid_int_input = [64, 192, 768, 2048] + if feature not in valid_int_input: + raise ValueError( + f"Integer input to argument `feature` must be one of {valid_int_input}, but got {feature}." + ) + + self.inception = NoTrainInceptionV3(name="inception-v3-compat", features_list=[str(feature)]) + elif isinstance(feature, B.nn.Module): + self.inception = feature + else: + raise TypeError("Got unknown input to argument `feature`") + + self.add_state("real_features", [], dist_reduce_fx=None) + self.add_state("fake_features", [], dist_reduce_fx=None) + + def update(self, imgs: Tensor, real: bool) -> None: # type: ignore + """Update the state with extracted features. + + Args: + imgs: tensor with images feed to the feature extractor + real: bool indicating if imgs belong to the real or the fake distribution + """ + features = self.inception(imgs) + + if real: + self.real_features.append(features) + else: + self.fake_features.append(features) + + def compute(self) -> Tensor: + """Calculate FID score based on accumulated extracted features from the two distributions.""" + real_features = dim_zero_cat(self.real_features) + fake_features = dim_zero_cat(self.fake_features) + # computation is extremely sensitive so it needs to happen in double precision + orig_dtype = real_features.dtype + real_features = real_features.double() + fake_features = fake_features.double() + + # calculate mean and covariance + n = real_features.shape[0] + mean1 = real_features.mean(dim=0) + mean2 = fake_features.mean(dim=0) + diff1 = real_features - mean1 + diff2 = fake_features - mean2 + cov1 = 1.0 / (n - 1) * diff1.t().mm(diff1) + cov2 = 1.0 / (n - 1) * diff2.t().mm(diff2) + + # compute fid + return _compute_fid(mean1, cov1, mean2, cov2).to(orig_dtype) diff --git a/RE/paddlemetric/src/paddlemetrics/image/inception.py b/RE/paddlemetric/src/paddlemetrics/image/inception.py new file mode 100644 index 00000000..6c05b9a4 --- /dev/null +++ b/RE/paddlemetric/src/paddlemetrics/image/inception.py @@ -0,0 +1,179 @@ +# Copyright The PyTorch Lightning team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from typing import Any, Callable, List, Optional, Tuple, Union + +import paddleext.torchapi as B +from paddleext.torchapi import Tensor + +#from paddlemetrics.image.fid import NoTrainInceptionV3 +from paddlemetrics.metric import Metric +from paddlemetrics.utilities import rank_zero_warn +from paddlemetrics.utilities.data import dim_zero_cat +from paddlemetrics.utilities.imports import _TORCH_FIDELITY_AVAILABLE + + +class IS(Metric): + r""" + Calculates the Inception Score (IS) which is used to access how realistic generated images are. + It is defined as + + .. math:: + IS = exp(\mathbb{E}_x KL(p(y | x ) || p(y))) + + where :math:`KL(p(y | x) || p(y))` is the KL divergence between the conditional distribution :math:`p(y|x)` + and the margianl distribution :math:`p(y)`. Both the conditional and marginal distribution is calculated + from features extracted from the images. The score is calculated on random splits of the images such that + both a mean and standard deviation of the score are returned. The metric was originally proposed in [1]. + + Using the default feature extraction (Inception v3 using the original weights from [2]), the input is + expected to be mini-batches of 3-channel RGB images of shape (3 x H x W) with dtype uint8. All images + will be resized to 299 x 299 which is the size of the original training data. + + .. note:: using this metric with the default feature extractor requires that ``torch-fidelity`` + is installed. Either install as ``pip install paddlemetrics[image]`` or + ``pip install torch-fidelity`` + + .. note:: the ``forward`` method can be used but ``compute_on_step`` is disabled by default (oppesit of + all other metrics) as this metric does not really make sense to calculate on a single batch. This + means that by default ``forward`` will just call ``update`` underneat. + + Args: + feature: + Either an str, integer or ``nn.Module``: + + - an str or integer will indicate the inceptionv3 feature layer to choose. Can be one of the following: + 'logits_unbiased', 64, 192, 768, 2048 + - an ``nn.Module`` for using a custom feature extractor. Expects that its forward method returns + an ``[N,d]`` matrix where ``N`` is the batch size and ``d`` is the feature size. + + splits: integer determining how many splits the inception score calculation should be split among + + compute_on_step: + Forward only calls ``update()`` and return ``None`` if this is set to ``False``. + dist_sync_on_step: + Synchronize metric state across processes at each ``forward()`` + before returning the value at the step + process_group: + Specify the process group on which synchronization is called. + default: ``None`` (which selects the entire world) + dist_sync_fn: + Callback that performs the allgather operation on the metric state. When ``None``, DDP + will be used to perform the allgather + + References: + [1] Improved Techniques for Training GANs + Tim Salimans, Ian Goodfellow, Wojciech Zaremba, Vicki Cheung, Alec Radford, Xi Chen + https://arxiv.org/abs/1606.03498 + + [2] GANs Trained by a Two Time-Scale Update Rule Converge to a Local Nash Equilibrium, + Martin Heusel, Hubert Ramsauer, Thomas Unterthiner, Bernhard Nessler, Sepp Hochreiter + https://arxiv.org/abs/1706.08500 + + Raises: + ValueError: + If ``feature`` is set to an ``str`` or ``int`` and ``torch-fidelity`` is not installed + ValueError: + If ``feature`` is set to an ``str`` or ``int`` and not one of ['logits_unbiased', 64, 192, 768, 2048] + TypeError: + If ``feature`` is not an ``str``, ``int`` or ``B.nn.Module`` + + Example: + >>> import torchapi as B + >>> _ = B.manual_seed(123) + >>> from paddlemetrics import IS + >>> inception = IS() # doctest: +SKIP + >>> # generate some images + >>> imgs = B.randint(0, 255, (100, 3, 299, 299), dtype=B.uint8) # doctest: +SKIP + >>> inception.update(imgs) # doctest: +SKIP + >>> inception.compute() # doctest: +SKIP + (tensor(1.0569), tensor(0.0113)) + + """ + features: List + + def __init__( + self, + feature: Union[str, int, B.nn.Module] = "logits_unbiased", + splits: int = 10, + compute_on_step: bool = False, + dist_sync_on_step: bool = False, + process_group: Optional[Any] = None, + dist_sync_fn: Callable[[Tensor], List[Tensor]] = None, + ) -> None: + super().__init__( + compute_on_step=compute_on_step, + dist_sync_on_step=dist_sync_on_step, + process_group=process_group, + dist_sync_fn=dist_sync_fn, + ) + + rank_zero_warn( + "Metric `IS` will save all extracted features in buffer." + " For large datasets this may lead to large memory footprint.", + UserWarning, + ) + + if isinstance(feature, (str, int)): + if not _TORCH_FIDELITY_AVAILABLE: + raise ValueError( + "IS metric requires that Torch-fidelity is installed." + "Either install as `pip install paddlemetrics[image]`" + " or `pip install torch-fidelity`" + ) + valid_int_input = ("logits_unbiased", 64, 192, 768, 2048) + if feature not in valid_int_input: + raise ValueError( + f"Integer input to argument `feature` must be one of {valid_int_input}," f" but got {feature}." + ) + + self.inception = NoTrainInceptionV3(name="inception-v3-compat", features_list=[str(feature)]) + elif isinstance(feature, B.nn.Module): + self.inception = feature + else: + raise TypeError("Got unknown input to argument `feature`") + + self.splits = splits + self.add_state("features", [], dist_reduce_fx=None) + + def update(self, imgs: Tensor) -> None: # type: ignore + """Update the state with extracted features. + + Args: + imgs: tensor with images feed to the feature extractor + """ + features = self.inception(imgs) + self.features.append(features) + + def compute(self) -> Tuple[Tensor, Tensor]: + features = dim_zero_cat(self.features) + # random permute the features + idx = B.randperm(features.shape[0]) + features = features[idx] + + # calculate probs and logits + prob = features.softmax(dim=1) + log_prob = features.log_softmax(dim=1) + + # split into groups + prob = prob.chunk(self.splits, dim=0) + log_prob = log_prob.chunk(self.splits, dim=0) + + # calculate score per split + mean_prob = [p.mean(dim=0, keepdim=True) for p in prob] + kl_ = [p * (log_p - m_p.log()) for p, log_p, m_p in zip(prob, log_prob, mean_prob)] + kl_ = [k.sum(dim=1).mean().exp() for k in kl_] + kl = B.stack(kl_) + + # return mean and std + return kl.mean(), kl.std() diff --git a/RE/paddlemetric/src/paddlemetrics/image/kid.py b/RE/paddlemetric/src/paddlemetrics/image/kid.py new file mode 100644 index 00000000..2f3d3a6b --- /dev/null +++ b/RE/paddlemetric/src/paddlemetrics/image/kid.py @@ -0,0 +1,277 @@ +# Copyright The PyTorch Lightning team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from typing import Any, Callable, List, Optional, Tuple, Union + +import paddleext.torchapi as B +from paddleext.torchapi import Tensor +from paddleext.torchapi.nn import Module + +from paddlemetrics.image.fid import NoTrainInceptionV3 +from paddlemetrics.metric import Metric +from paddlemetrics.utilities import rank_zero_warn +from paddlemetrics.utilities.data import dim_zero_cat +from paddlemetrics.utilities.imports import _TORCH_FIDELITY_AVAILABLE + + +def maximum_mean_discrepancy(k_xx: Tensor, k_xy: Tensor, k_yy: Tensor) -> Tensor: + """Adapted from `KID Score`_""" + m = k_xx.shape[0] + + diag_x = B.diag(k_xx) + diag_y = B.diag(k_yy) + + kt_xx_sums = k_xx.sum(dim=-1) - diag_x + kt_yy_sums = k_yy.sum(dim=-1) - diag_y + k_xy_sums = k_xy.sum(dim=0) + + kt_xx_sum = kt_xx_sums.sum() + kt_yy_sum = kt_yy_sums.sum() + k_xy_sum = k_xy_sums.sum() + + value = (kt_xx_sum + kt_yy_sum) / (m * (m - 1)) + value -= 2 * k_xy_sum / (m ** 2) + return value + + +def poly_kernel(f1: Tensor, f2: Tensor, degree: int = 3, gamma: Optional[float] = None, coef: float = 1.0) -> Tensor: + """Adapted from `KID Score`_""" + if gamma is None: + gamma = 1.0 / f1.shape[1] + kernel = (f1 @ f2.T * gamma + coef) ** degree + return kernel + + +def poly_mmd( + f_real: Tensor, f_fake: Tensor, degree: int = 3, gamma: Optional[float] = None, coef: float = 1.0 +) -> Tensor: + """Adapted from `KID Score`_""" + k_11 = poly_kernel(f_real, f_real, degree, gamma, coef) + k_22 = poly_kernel(f_fake, f_fake, degree, gamma, coef) + k_12 = poly_kernel(f_real, f_fake, degree, gamma, coef) + return maximum_mean_discrepancy(k_11, k_12, k_22) + + +class KID(Metric): + r""" + Calculates Kernel Inception Distance (KID) which is used to access the quality of generated images. Given by + + .. math:: + KID = MMD(f_{real}, f_{fake})^2 + + where :math:`MMD` is the maximum mean discrepancy and :math:`I_{real}, I_{fake}` are extracted features + from real and fake images, see [1] for more details. In particular, calculating the MMD requires the + evaluation of a polynomial kernel function :math:`k` + + .. math:: + k(x,y) = (\gamma * x^T y + coef)^{degree} + + which controls the distance between two features. In practise the MMD is calculated over a number of + subsets to be able to both get the mean and standard deviation of KID. + + Using the default feature extraction (Inception v3 using the original weights from [2]), the input is + expected to be mini-batches of 3-channel RGB images of shape (3 x H x W) with dtype uint8. All images + will be resized to 299 x 299 which is the size of the original training data. + + .. note:: using this metric with the default feature extractor requires that ``torch-fidelity`` + is installed. Either install as ``pip install paddlemetrics[image]`` or + ``pip install torch-fidelity`` + + .. note:: the ``forward`` method can be used but ``compute_on_step`` is disabled by default (oppesit of + all other metrics) as this metric does not really make sense to calculate on a single batch. This + means that by default ``forward`` will just call ``update`` underneat. + + Args: + feature: + Either an str, integer or ``nn.Module``: + + - an str or integer will indicate the inceptionv3 feature layer to choose. Can be one of the following: + 'logits_unbiased', 64, 192, 768, 2048 + - an ``nn.Module`` for using a custom feature extractor. Expects that its forward method returns + an ``[N,d]`` matrix where ``N`` is the batch size and ``d`` is the feature size. + + subsets: + Number of subsets to calculate the mean and standard deviation scores over + subset_size: + Number of randomly picked samples in each subset + degree: + Degree of the polynomial kernel function + gamma: + Scale-length of polynomial kernel. If set to ``None`` will be automatically set to the feature size + coef: + Bias term in the polynomial kernel. + compute_on_step: + Forward only calls ``update()`` and return ``None`` if this is set to ``False``. + dist_sync_on_step: + Synchronize metric state across processes at each ``forward()`` + before returning the value at the step + process_group: + Specify the process group on which synchronization is called. + default: ``None`` (which selects the entire world) + dist_sync_fn: + Callback that performs the allgather operation on the metric state. When ``None``, DDP + will be used to perform the allgather + + References: + [1] Demystifying MMD GANs + Mikołaj Bińkowski, Danica J. Sutherland, Michael Arbel, Arthur Gretton + https://arxiv.org/abs/1801.01401 + + [2] GANs Trained by a Two Time-Scale Update Rule Converge to a Local Nash Equilibrium, + Martin Heusel, Hubert Ramsauer, Thomas Unterthiner, Bernhard Nessler, Sepp Hochreiter + https://arxiv.org/abs/1706.08500 + + Raises: + ValueError: + If ``feature`` is set to an ``int`` (default settings) and ``torch-fidelity`` is not installed + ValueError: + If ``feature`` is set to an ``int`` not in [64, 192, 768, 2048] + ValueError: + If ``subsets`` is not an integer larger than 0 + ValueError: + If ``subset_size`` is not an integer larger than 0 + ValueError: + If ``degree`` is not an integer larger than 0 + ValueError: + If ``gamma`` is niether ``None`` or a float larger than 0 + ValueError: + If ``coef`` is not an float larger than 0 + + Example: + >>> import torchapi as B + >>> _ = B.manual_seed(123) + >>> from paddlemetrics import KID + >>> kid = KID(subset_size=50) # doctest: +SKIP + >>> # generate two slightly overlapping image intensity distributions + >>> imgs_dist1 = B.randint(0, 200, (100, 3, 299, 299), dtype=B.uint8) # doctest: +SKIP + >>> imgs_dist2 = B.randint(100, 255, (100, 3, 299, 299), dtype=B.uint8) # doctest: +SKIP + >>> kid.update(imgs_dist1, real=True) # doctest: +SKIP + >>> kid.update(imgs_dist2, real=False) # doctest: +SKIP + >>> kid_mean, kid_std = kid.compute() # doctest: +SKIP + >>> print((kid_mean, kid_std)) # doctest: +SKIP + (tensor(0.0338), tensor(0.0025)) + + """ + real_features: List[Tensor] + fake_features: List[Tensor] + + def __init__( + self, + feature: Union[str, int, B.nn.Module] = 2048, + subsets: int = 100, + subset_size: int = 1000, + degree: int = 3, + gamma: Optional[float] = None, # type: ignore + coef: float = 1.0, + compute_on_step: bool = False, + dist_sync_on_step: bool = False, + process_group: Optional[Any] = None, + dist_sync_fn: Callable = None, + ) -> None: + super().__init__( + compute_on_step=compute_on_step, + dist_sync_on_step=dist_sync_on_step, + process_group=process_group, + dist_sync_fn=dist_sync_fn, + ) + + rank_zero_warn( + "Metric `KID` will save all extracted features in buffer." + " For large datasets this may lead to large memory footprint.", + UserWarning, + ) + + if isinstance(feature, (str, int)): + if not _TORCH_FIDELITY_AVAILABLE: + raise RuntimeError( + "KID metric requires that Torch-fidelity is installed." + " Either install as `pip install paddlemetrics[image]`" + " or `pip install torch-fidelity`" + ) + valid_int_input = ("logits_unbiased", 64, 192, 768, 2048) + if feature not in valid_int_input: + raise ValueError( + f"Integer input to argument `feature` must be one of {valid_int_input}," f" but got {feature}." + ) + + self.inception: Module = NoTrainInceptionV3(name="inception-v3-compat", features_list=[str(feature)]) + elif isinstance(feature, Module): + self.inception = feature + else: + raise TypeError("Got unknown input to argument `feature`") + + if not (isinstance(subsets, int) and subsets > 0): + raise ValueError("Argument `subsets` expected to be integer larger than 0") + self.subsets = subsets + + if not (isinstance(subset_size, int) and subset_size > 0): + raise ValueError("Argument `subset_size` expected to be integer larger than 0") + self.subset_size = subset_size + + if not (isinstance(degree, int) and degree > 0): + raise ValueError("Argument `degree` expected to be integer larger than 0") + self.degree = degree + + if gamma is not None and not (isinstance(gamma, float) and gamma > 0): + raise ValueError("Argument `gamma` expected to be `None` or float larger than 0") + self.gamma = gamma + + if not (isinstance(coef, float) and coef > 0): + raise ValueError("Argument `coef` expected to be float larger than 0") + self.coef = coef + + # states for extracted features + self.add_state("real_features", [], dist_reduce_fx=None) + self.add_state("fake_features", [], dist_reduce_fx=None) + + def update(self, imgs: Tensor, real: bool) -> None: # type: ignore + """Update the state with extracted features. + + Args: + imgs: tensor with images feed to the feature extractor + real: bool indicating if imgs belong to the real or the fake distribution + """ + features = self.inception(imgs) + + if real: + self.real_features.append(features) + else: + self.fake_features.append(features) + + def compute(self) -> Tuple[Tensor, Tensor]: + """Calculate KID score based on accumulated extracted features from the two distributions. Returns a tuple + of mean and standard deviation of KID scores calculated on subsets of extracted features. + + Implementation inspired by `Fid Score`_ + """ + real_features = dim_zero_cat(self.real_features) + fake_features = dim_zero_cat(self.fake_features) + + n_samples_real = real_features.shape[0] + if n_samples_real < self.subset_size: + raise ValueError("Argument `subset_size` should be smaller than the number of samples") + n_samples_fake = fake_features.shape[0] + if n_samples_fake < self.subset_size: + raise ValueError("Argument `subset_size` should be smaller than the number of samples") + + kid_scores_ = [] + for _ in range(self.subsets): + perm = B.randperm(n_samples_real) + f_real = real_features[perm[: self.subset_size]] + perm = B.randperm(n_samples_fake) + f_fake = fake_features[perm[: self.subset_size]] + + o = poly_mmd(f_real, f_fake, self.degree, self.gamma, self.coef) + kid_scores_.append(o) + kid_scores = B.stack(kid_scores_) + return kid_scores.mean(), kid_scores.std(unbiased=False) diff --git a/RE/paddlemetric/src/paddlemetrics/image/lpip_similarity.py b/RE/paddlemetric/src/paddlemetrics/image/lpip_similarity.py new file mode 100644 index 00000000..7cf6d03a --- /dev/null +++ b/RE/paddlemetric/src/paddlemetrics/image/lpip_similarity.py @@ -0,0 +1,156 @@ +# Copyright The PyTorch Lightning team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from typing import Any, Callable, List, Optional + +import paddleext.torchapi as B +from paddleext.torchapi import Tensor + +from paddlemetrics.metric import Metric +from paddlemetrics.utilities.imports import _LPIPS_AVAILABLE + +if _LPIPS_AVAILABLE: + from lpips import LPIPS as Lpips_backbone +else: + + class Lpips_backbone(B.nn.Module): # type: ignore + pass + + +class NoTrainLpips(Lpips_backbone): + def train(self, mode: bool) -> "NoTrainLpips": + """the network should not be able to be switched away from evaluation mode.""" + return super().train(False) + + +def _valid_img(img: Tensor) -> bool: + """check that input is a valid image to the network.""" + return img.ndim == 4 and img.shape[1] == 3 and img.min() >= -1.0 and img.max() <= 1.0 + + +class LPIPS(Metric): + """The Learned Perceptual Image Patch Similarity (`LPIPS_`) is used to judge the perceptual similarity between + two images. LPIPS essentially computes the similarity between the activations of two image patches for some + pre-defined network. This measure have been shown to match human perseption well. A low LPIPS score means that + image patches are perceptual similar. + + Both input image patches are expected to have shape `[N, 3, H, W]` and be normalized to the [-1,1] + range. The minimum size of `H, W` depends on the chosen backbone (see `net_type` arg). + + .. note:: using this metrics requires you to have ``lpips`` package installed. Either install + as ``pip install paddlemetrics[image]`` or ``pip install lpips`` + + .. note:: this metric is not scriptable when using ``torch<1.8``. Please update your pytorch installation + if this is a issue. + + Args: + net_type: str indicating backbone network type to use. Choose between `'alex'`, `'vgg'` or `'squeeze'` + reduction: str indicating how to reduce over the batch dimension. Choose between `'sum'` or `'mean'`. + compute_on_step: + Forward only calls ``update()`` and return ``None`` if this is set to ``False``. + dist_sync_on_step: + Synchronize metric state across processes at each ``forward()`` + before returning the value at the step + process_group: + Specify the process group on which synchronization is called. + default: ``None`` (which selects the entire world) + dist_sync_fn: + Callback that performs the allgather operation on the metric state. When ``None``, DDP + will be used to perform the allgather + + Raises: + ValueError: + If ``lpips`` package is not installed + ValueError: + If ``net_type`` is not one of ``"vgg"``, ``"alex"`` or ``"squeeze"`` + ValueError: + If ``reduction`` is not one of ``"mean"`` or ``"sum"`` + + Example: + >>> import torchapi as B + >>> _ = B.manual_seed(123) + >>> from paddlemetrics import LPIPS + >>> lpips = LPIPS(net_type='vgg') + >>> img1 = B.rand(10, 3, 100, 100) + >>> img2 = B.rand(10, 3, 100, 100) + >>> lpips(img1, img2) + tensor([0.3566], grad_fn=) + """ + + is_differentiable = True + real_features: List[Tensor] + fake_features: List[Tensor] + + # due to the use of named tuple in the backbone the net variable cannot be scriptet + __jit_ignored_attributes__ = ["net"] + + def __init__( + self, + net_type: str = "alex", + reduction: str = "mean", + compute_on_step: bool = True, + dist_sync_on_step: bool = False, + process_group: Optional[Any] = None, + dist_sync_fn: Callable[[Tensor], List[Tensor]] = None, + ) -> None: + super().__init__( + compute_on_step=compute_on_step, + dist_sync_on_step=dist_sync_on_step, + process_group=process_group, + dist_sync_fn=dist_sync_fn, + ) + + if not _LPIPS_AVAILABLE: + raise ValueError( + "LPIPS metric requires that lpips is installed." + "Either install as `pip install paddlemetrics[image]` or `pip install lpips`" + ) + + valid_net_type = ("vgg", "alex", "squeeze") + if net_type not in valid_net_type: + raise ValueError(f"Argument `net_type` must be one of {valid_net_type}, but got {net_type}.") + self.net = NoTrainLpips(net=net_type, verbose=False) + + valid_reduction = ("mean", "sum") + if reduction not in valid_reduction: + raise ValueError(f"Argument `reduction` must be one of {valid_reduction}, but got {reduction}") + self.reduction = reduction + + self.add_state("sum_scores", B.zeros(1), dist_reduce_fx="sum") + self.add_state("total", B.zeros(1), dist_reduce_fx="sum") + + def update(self, img1: Tensor, img2: Tensor) -> None: # type: ignore + """Update internal states with lpips score. + + Args: + img1: tensor with images of shape [N, 3, H, W] + img2: tensor with images of shape [N, 3, H, W] + """ + if not (_valid_img(img1) and _valid_img(img2)): + raise ValueError( + "Expected both input arguments to be normalized tensors (all values in range [-1,1])" + f" and to have shape [N, 3, H, W] but `img1` have shape {img1.shape} with values in" + f" range {[img1.min(), img1.max()]} and `img2` have shape {img2.shape} with value" + f" in range {[img2.min(), img2.max()]}" + ) + + loss = self.net(img1, img2).squeeze() + self.sum_scores += loss.sum() + self.total += img1.shape[0] + + def compute(self) -> Tensor: + """Compute final perceptual similarity metric.""" + if self.reduction == "mean": + return self.sum_scores / self.total + if self.reduction == "sum": + return self.sum_scores diff --git a/RE/paddlemetric/src/paddlemetrics/image/psnr.py b/RE/paddlemetric/src/paddlemetrics/image/psnr.py new file mode 100644 index 00000000..3226203d --- /dev/null +++ b/RE/paddlemetric/src/paddlemetrics/image/psnr.py @@ -0,0 +1,147 @@ +# Copyright The PyTorch Lightning team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from typing import Any, Optional, Sequence, Tuple, Union + +import paddleext.torchapi as B +from paddleext.torchapi import Tensor, tensor + +from paddlemetrics.functional.image.psnr import _psnr_compute, _psnr_update +from paddlemetrics.metric import Metric +from paddlemetrics.utilities import rank_zero_warn + + +class PSNR(Metric): + r""" + Computes `Computes Peak Signal-to-Noise Ratio`_ (PSNR): + + .. math:: \text{PSNR}(I, J) = 10 * \log_{10} \left(\frac{\max(I)^2}{\text{MSE}(I, J)}\right) + + Where :math:`\text{MSE}` denotes the `mean-squared-error`_ function. + + Args: + data_range: + the range of the data. If None, it is determined from the data (max - min). + The ``data_range`` must be given when ``dim`` is not None. + base: a base of a logarithm to use (default: 10) + reduction: a method to reduce metric score over labels. + + - ``'elementwise_mean'``: takes the mean (default) + - ``'sum'``: takes the sum + - ``'none'``: no reduction will be applied + + dim: + Dimensions to reduce PSNR scores over, provided as either an integer or a list of integers. Default is + None meaning scores will be reduced across all dimensions and all batches. + compute_on_step: + Forward only calls ``update()`` and return None if this is set to False. default: True + dist_sync_on_step: + Synchronize metric state across processes at each ``forward()`` + before returning the value at the step. default: False + process_group: + Specify the process group on which synchronization is called. default: None (which selects the entire world) + + Raises: + ValueError: + If ``dim`` is not ``None`` and ``data_range`` is not given. + + Example: + >>> from paddlemetrics import PSNR + >>> psnr = PSNR() + >>> preds = B.tensor([[0.0, 1.0], [2.0, 3.0]]) + >>> target = B.tensor([[3.0, 2.0], [1.0, 0.0]]) + >>> psnr(preds, target) + tensor(2.5527) + + .. note:: + Half precision is only support on GPU for this metric + + """ + min_target: Tensor + max_target: Tensor + + def __init__( + self, + data_range: Optional[float] = None, + base: float = 10.0, + reduction: str = "elementwise_mean", + dim: Optional[Union[int, Tuple[int, ...]]] = None, + compute_on_step: bool = True, + dist_sync_on_step: bool = False, + process_group: Optional[Any] = None, + ) -> None: + super().__init__( + compute_on_step=compute_on_step, + dist_sync_on_step=dist_sync_on_step, + process_group=process_group, + ) + + if dim is None and reduction != "elementwise_mean": + rank_zero_warn(f"The `reduction={reduction}` will not have any effect when `dim` is None.") + + if dim is None: + self.add_state("sum_squared_error", default=tensor(0.0), dist_reduce_fx="sum") + self.add_state("total", default=tensor(0), dist_reduce_fx="sum") + else: + self.add_state("sum_squared_error", default=[]) + self.add_state("total", default=[]) + + if data_range is None: + if dim is not None: + # Maybe we could use `B.amax(target, dim=dim) - B.amin(target, dim=dim)` in PyTorch 1.7 to + # calculate `data_range` in the future. + raise ValueError("The `data_range` must be given when `dim` is not None.") + + self.data_range = None + self.add_state("min_target", default=tensor(0.0), dist_reduce_fx=B.min) + self.add_state("max_target", default=tensor(0.0), dist_reduce_fx=B.max) + else: + self.add_state("data_range", default=tensor(float(data_range)), dist_reduce_fx="mean") + self.base = base + self.reduction = reduction + self.dim = tuple(dim) if isinstance(dim, Sequence) else dim + + def update(self, preds: Tensor, target: Tensor) -> None: # type: ignore + """Update state with predictions and targets. + + Args: + preds: Predictions from model + target: Ground truth values + """ + sum_squared_error, n_obs = _psnr_update(preds, target, dim=self.dim) + if self.dim is None: + if self.data_range is None: + # keep track of min and max target values + self.min_target = min(target.min(), self.min_target) + self.max_target = max(target.max(), self.max_target) + + self.sum_squared_error += sum_squared_error + self.total += n_obs + else: + self.sum_squared_error.append(sum_squared_error) + self.total.append(n_obs) + + def compute(self) -> Tensor: + """Compute peak signal-to-noise ratio over state.""" + if self.data_range is not None: + data_range = self.data_range + else: + data_range = self.max_target - self.min_target + + if self.dim is None: + sum_squared_error = self.sum_squared_error + total = self.total + else: + sum_squared_error = B.cat([values.flatten() for values in self.sum_squared_error]) + total = B.cat([values.flatten() for values in self.total]) + return _psnr_compute(sum_squared_error, total, data_range, base=self.base, reduction=self.reduction) diff --git a/RE/paddlemetric/src/paddlemetrics/image/ssim.py b/RE/paddlemetric/src/paddlemetrics/image/ssim.py new file mode 100644 index 00000000..f34a19b1 --- /dev/null +++ b/RE/paddlemetric/src/paddlemetrics/image/ssim.py @@ -0,0 +1,105 @@ +# Copyright The PyTorch Lightning team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from typing import Any, List, Optional, Sequence + +import paddleext.torchapi as B +from paddleext.torchapi import Tensor + +from paddlemetrics.functional.image.ssim import _ssim_compute, _ssim_update +from paddlemetrics.metric import Metric +from paddlemetrics.utilities import rank_zero_warn +from paddlemetrics.utilities.data import dim_zero_cat + + +class SSIM(Metric): + """Computes Structual Similarity Index Measure (SSIM_). + + Args: + kernel_size: size of the gaussian kernel (default: (11, 11)) + sigma: Standard deviation of the gaussian kernel (default: (1.5, 1.5)) + reduction: a method to reduce metric score over labels. + + - ``'elementwise_mean'``: takes the mean (default) + - ``'sum'``: takes the sum + - ``'none'``: no reduction will be applied + + data_range: Range of the image. If ``None``, it is determined from the image (max - min) + k1: Parameter of SSIM. Default: 0.01 + k2: Parameter of SSIM. Default: 0.03 + + Return: + Tensor with SSIM score + + Example: + >>> from paddlemetrics import SSIM + >>> preds = B.rand([16, 1, 16, 16]) + >>> target = preds * 0.75 + >>> ssim = SSIM() + >>> ssim(preds, target) + tensor(0.9219) + """ + + preds: List[Tensor] + target: List[Tensor] + + def __init__( + self, + kernel_size: Sequence[int] = (11, 11), + sigma: Sequence[float] = (1.5, 1.5), + reduction: str = "elementwise_mean", + data_range: Optional[float] = None, + k1: float = 0.01, + k2: float = 0.03, + compute_on_step: bool = True, + dist_sync_on_step: bool = False, + process_group: Optional[Any] = None, + ) -> None: + super().__init__( + compute_on_step=compute_on_step, + dist_sync_on_step=dist_sync_on_step, + process_group=process_group, + ) + rank_zero_warn( + "Metric `SSIM` will save all targets and" + " predictions in buffer. For large datasets this may lead" + " to large memory footprint." + ) + + self.add_state("preds", default=[], dist_reduce_fx="cat") + self.add_state("target", default=[], dist_reduce_fx="cat") + self.kernel_size = kernel_size + self.sigma = sigma + self.data_range = data_range + self.k1 = k1 + self.k2 = k2 + self.reduction = reduction + + def update(self, preds: Tensor, target: Tensor) -> None: # type: ignore + """Update state with predictions and targets. + + Args: + preds: Predictions from model + target: Ground truth values + """ + preds, target = _ssim_update(preds, target) + self.preds.append(preds) + self.target.append(target) + + def compute(self) -> Tensor: + """Computes explained variance over state.""" + preds = dim_zero_cat(self.preds) + target = dim_zero_cat(self.target) + return _ssim_compute( + preds, target, self.kernel_size, self.sigma, self.reduction, self.data_range, self.k1, self.k2 + ) diff --git a/RE/paddlemetric/src/paddlemetrics/metric.py b/RE/paddlemetric/src/paddlemetrics/metric.py new file mode 100644 index 00000000..21c2148b --- /dev/null +++ b/RE/paddlemetric/src/paddlemetrics/metric.py @@ -0,0 +1,775 @@ +# Copyright The PyTorch Lightning team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import functools +import inspect +import operator as op +from abc import ABC, abstractmethod +from collections.abc import Sequence +from contextlib import contextmanager +from copy import deepcopy +from typing import Any, Callable, Dict, Generator, List, Optional, Union + +import paddleext.torchapi as B +from paddleext.torchapi import Tensor, Module + +from paddlemetrics.utilities import apply_to_collection, rank_zero_warn +from paddlemetrics.utilities.data import _flatten, dim_zero_cat, dim_zero_max, dim_zero_mean, dim_zero_min, dim_zero_sum +from paddlemetrics.utilities.distributed import gather_all_tensors +from paddlemetrics.utilities.exceptions import paddlemetricsUserError +from paddlemetrics.utilities.imports import _LIGHTNING_AVAILABLE, _compare_version + + +def jit_distributed_available() -> bool: + return B.distributed.is_available() and B.distributed.is_initialized() + + +class Metric(Module): + """Base class for all metrics present in the Metrics API. + + Implements ``add_state()``, ``forward()``, ``reset()`` and a few other things to + handle distributed synchronization and per-step metric computation. + + Override ``update()`` and ``compute()`` functions to implement your own metric. Use + ``add_state()`` to register metric state variables which keep track of state on each + call of ``update()`` and are synchronized across processes when ``compute()`` is called. + + Note: + Metric state variables can either be ``B.Tensors`` or an empty list which can we used + to store `B.Tensors``. + + Note: + Different metrics only override ``update()`` and not ``forward()``. A call to ``update()`` + is valid, but it won't return the metric value at the current step. A call to ``forward()`` + automatically calls ``update()`` and also returns the metric value at the current step. + + Args: + compute_on_step: + Forward only calls ``update()`` and returns None if this is set to False. default: True + dist_sync_on_step: + Synchronize metric state across processes at each ``forward()`` + before returning the value at the step. + process_group: + Specify the process group on which synchronization is called. default: None (which selects the entire world) + dist_sync_fn: + Callback that performs the allgather operation on the metric state. When `None`, DDP + will be used to perform the allgather. + """ + + __jit_ignored_attributes__ = ["device"] + __jit_unused_properties__ = ["is_differentiable"] + is_differentiable: Optional[bool] = None + higher_is_better: Optional[bool] = None + + def __init__( + self, + compute_on_step: bool = True, + dist_sync_on_step: bool = False, + process_group: Optional[Any] = None, + dist_sync_fn: Callable = None, + ) -> None: + super().__init__() + + # see (https://github.com/pytorch/pytorch/blob/3e6bb5233f9ca2c5aa55d9cda22a7ee85439aa6e/ + # B.nn/modules/module.py#L227) +# B._C._log_api_usage_once(f"paddlemetrics.metric.{self.__class__.__name__}") + +# self._LIGHTNING_GREATER_EQUAL_1_3 = _compare_version("pytorch_lightning", op.ge, "1.3.0") + self._device = B.device("cpu") + + self.dist_sync_on_step = dist_sync_on_step + self.compute_on_step = compute_on_step + self.process_group = process_group + self.dist_sync_fn = dist_sync_fn + self._to_sync = True + self._should_unsync = True + + self._update_signature = inspect.signature(self.update) + self.update: Callable = self._wrap_update(self.update) # type: ignore + self.compute: Callable = self._wrap_compute(self.compute) # type: ignore + self._computed = None + self._forward_cache = None + self._update_called = False + + # initialize state + self._defaults: Dict[str, Union[List, Tensor]] = {} + self._persistent: Dict[str, bool] = {} + self._reductions: Dict[str, Union[str, Callable[[Union[List[Tensor], Tensor]], Tensor], None]] = {} + + # state management + self._is_synced = False + self._cache: Optional[Dict[str, Union[List[Tensor], Tensor]]] = None + + def to(self, *args, **kwargs): + + return self + # result = super().to(*args, **kwargs) + # + # return result if result is not None else self + + def add_state( + self, + name: str, + default: Union[list, Tensor], + dist_reduce_fx: Optional[Union[str, Callable]] = None, + persistent: bool = False, + ) -> None: + """Adds metric state variable. Only used by subclasses. + + Args: + name: The name of the state variable. The variable will then be accessible at ``self.name``. + default: Default value of the state; can either be a ``B.Tensor`` or an empty list. The state will be + reset to this value when ``self.reset()`` is called. + dist_reduce_fx (Optional): Function to reduce state across multiple processes in distributed mode. + If value is ``"sum"``, ``"mean"``, ``"cat"``, ``"min"`` or ``"max"`` we will use ``B.sum``, + ``B.mean``, ``B.cat``, ``B.min`` and ``B.max``` respectively, each with argument + ``dim=0``. Note that the ``"cat"`` reduction only makes sense if the state is a list, and not + a tensor. The user can also pass a custom function in this parameter. + persistent (Optional): whether the state will be saved as part of the modules ``state_dict``. + Default is ``False``. + + Note: + Setting ``dist_reduce_fx`` to None will return the metric state synchronized across different processes. + However, there won't be any reduction function applied to the synchronized metric state. + + The metric states would be synced as follows + + - If the metric state is ``B.Tensor``, the synced value will be a stacked ``B.Tensor`` across + the process dimension if the metric state was a ``B.Tensor``. The original ``B.Tensor`` metric + state retains dimension and hence the synchronized output will be of shape ``(num_process, ...)``. + + - If the metric state is a ``list``, the synced value will be a ``list`` containing the + combined elements from all processes. + + Note: + When passing a custom function to ``dist_reduce_fx``, expect the synchronized metric state to follow + the format discussed in the above note. + + Raises: + ValueError: + If ``default`` is not a ``tensor`` or an ``empty list``. + ValueError: + If ``dist_reduce_fx`` is not callable or one of ``"mean"``, ``"sum"``, ``"cat"``, ``None``. + """ + if not isinstance(default, (Tensor, list)) or (isinstance(default, list) and default): + raise ValueError(f"state variable must be a tensor or any empty list (where you can append tensors): {type(default)}") + + if dist_reduce_fx == "sum": + dist_reduce_fx = dim_zero_sum + elif dist_reduce_fx == "mean": + dist_reduce_fx = dim_zero_mean + elif dist_reduce_fx == "max": + dist_reduce_fx = dim_zero_max + elif dist_reduce_fx == "min": + dist_reduce_fx = dim_zero_min + elif dist_reduce_fx == "cat": + dist_reduce_fx = dim_zero_cat + elif dist_reduce_fx is not None and not callable(dist_reduce_fx): + raise ValueError("`dist_reduce_fx` must be callable or one of ['mean', 'sum', 'cat', None]") + + if isinstance(default, Tensor): + default = default.contiguous() + + setattr(self, name, default) + + self._defaults[name] = deepcopy(default) + self._persistent[name] = persistent + self._reductions[name] = dist_reduce_fx + +# @B.jit.unused + def forward(self, *args: Any, **kwargs: Any) -> Any: + """Automatically calls ``update()``. + + Returns the metric value over inputs if ``compute_on_step`` is True. + """ + # add current step + if self._is_synced: + raise paddlemetricsUserError( + "The Metric shouldn't be synced when performing ``update``. " + "HINT: Did you forget to call ``unsync`` ?." + ) + + with B.no_grad(): + self.update(*args, **kwargs) + + if self.compute_on_step: + self._to_sync = self.dist_sync_on_step + # skip restore cache operation from compute as cache is stored below. + self._should_unsync = False + + # save context before switch + cache = {attr: getattr(self, attr) for attr in self._defaults} + + # call reset, update, compute, on single batch + self.reset() + self.update(*args, **kwargs) + self._forward_cache = self.compute() + + # restore context + for attr, val in cache.items(): + setattr(self, attr, val) + self._is_synced = False + + self._should_unsync = True + self._to_sync = True + self._computed = None + + return self._forward_cache + + def _sync_dist(self, dist_sync_fn: Callable = gather_all_tensors, process_group: Optional[Any] = None) -> None: + input_dict = {attr: getattr(self, attr) for attr in self._reductions} + + for attr, reduction_fn in self._reductions.items(): + # pre-concatenate metric states that are lists to reduce number of all_gather operations + if reduction_fn == dim_zero_cat and isinstance(input_dict[attr], list) and len(input_dict[attr]) > 1: + input_dict[attr] = [dim_zero_cat(input_dict[attr])] + + output_dict = apply_to_collection( + input_dict, + Tensor, + dist_sync_fn, + group=process_group or self.process_group, + ) + + for attr, reduction_fn in self._reductions.items(): + # pre-processing ops (stack or flatten for inputs) + if isinstance(output_dict[attr][0], Tensor): + output_dict[attr] = B.stack(output_dict[attr]) + elif isinstance(output_dict[attr][0], list): + output_dict[attr] = _flatten(output_dict[attr]) + + if not (callable(reduction_fn) or reduction_fn is None): + raise TypeError("reduction_fn must be callable or None") + reduced = reduction_fn(output_dict[attr]) if reduction_fn is not None else output_dict[attr] + setattr(self, attr, reduced) + + def _wrap_update(self, update: Callable) -> Callable: + @functools.wraps(update) + def wrapped_func(*args: Any, **kwargs: Any) -> Optional[Any]: + self._computed = None + self._update_called = True + return update(*args, **kwargs) + + return wrapped_func + + def sync( + self, + dist_sync_fn: Optional[Callable] = None, + process_group: Optional[Any] = None, + should_sync: bool = True, + distributed_available: Optional[Callable] = jit_distributed_available, + ) -> None: + """Sync function for manually controlling when metrics states should be synced across processes. + + Args: + dist_sync_fn: Function to be used to perform states synchronization + process_group: + Specify the process group on which synchronization is called. + default: None (which selects the entire world) + should_sync: Whether to apply to state synchronization. This will have an impact + only when running in a distributed setting. + distributed_available: Function to determine if we are running inside a distributed setting + """ + if self._is_synced and should_sync: + raise paddlemetricsUserError("The Metric has already been synced.") + + is_distributed = distributed_available() if callable(distributed_available) else None + + if not should_sync or not is_distributed: + return + + if dist_sync_fn is None: + dist_sync_fn = gather_all_tensors + + # cache prior to syncing + self._cache = {attr: getattr(self, attr) for attr in self._defaults} + + # sync + self._sync_dist(dist_sync_fn, process_group=process_group) + self._is_synced = True + + def unsync(self, should_unsync: bool = True) -> None: + """Unsync function for manually controlling when metrics states should be reverted back to their local + states. + + Args: + should_unsync: Whether to perform unsync + """ + if not should_unsync: + return + + if not self._is_synced: + raise paddlemetricsUserError("The Metric has already been un-synced.") + + if self._cache is None: + raise paddlemetricsUserError("The internal cache should exist to unsync the Metric.") + + # if we synced, restore to cache so that we can continue to accumulate un-synced state + for attr, val in self._cache.items(): + setattr(self, attr, val) + self._is_synced = False + self._cache = None + + @contextmanager + def sync_context( + self, + dist_sync_fn: Optional[Callable] = None, + process_group: Optional[Any] = None, + should_sync: bool = True, + should_unsync: bool = True, + distributed_available: Optional[Callable] = jit_distributed_available, + ) -> Generator: + """Context manager to synchronize the states between processes when running in a distributed setting and + restore the local cache states after yielding. + + Args: + dist_sync_fn: Function to be used to perform states synchronization + process_group: + Specify the process group on which synchronization is called. + default: None (which selects the entire world) + should_sync: Whether to apply to state synchronization. This will have an impact + only when running in a distributed setting. + should_unsync: Whether to restore the cache state so that the metrics can + continue to be accumulated. + distributed_available: Function to determine if we are running inside a distributed setting + """ + self.sync( + dist_sync_fn=dist_sync_fn, + process_group=process_group, + should_sync=should_sync, + distributed_available=distributed_available, + ) + + yield + + self.unsync(should_unsync=self._is_synced and should_unsync) + + def _wrap_compute(self, compute: Callable) -> Callable: + @functools.wraps(compute) + def wrapped_func(*args: Any, **kwargs: Any) -> Any: + if not self._update_called: + rank_zero_warn( + f"The ``compute`` method of metric {self.__class__.__name__}" + " was called before the ``update`` method which may lead to errors," + " as metric states have not yet been updated.", + UserWarning, + ) + + # return cached value + if self._computed is not None: + return self._computed + + # compute relies on the sync context manager to gather the states across processes and apply reduction + # if synchronization happened, the current rank accumulated states will be restored to keep + # accumulation going if ``should_unsync=True``, + with self.sync_context( + dist_sync_fn=self.dist_sync_fn, should_sync=self._to_sync, should_unsync=self._should_unsync + ): + self._computed = compute(*args, **kwargs) + + return self._computed + + return wrapped_func + + @abstractmethod + def update(self, *_: Any, **__: Any) -> None: + """Override this method to update the state variables of your metric class.""" + + @abstractmethod + def compute(self) -> Any: + """Override this method to compute the final metric value from state variables synchronized across the + distributed backend.""" + + def reset(self) -> None: + """This method automatically resets the metric state variables to their default value.""" + self._update_called = False + self._forward_cache = None + # lower lightning versions requires this implicitly to log metric objects correctly in self.log +# if not _LIGHTNING_AVAILABLE or self._LIGHTNING_GREATER_EQUAL_1_3: + self._computed = None + + for attr, default in self._defaults.items(): + current_val = getattr(self, attr) + if isinstance(default, Tensor): + setattr(self, attr, default.detach().clone().to(current_val.device)) + else: + setattr(self, attr, []) + + # reset internal states + self._cache = None + self._is_synced = False + + def clone(self) -> "Metric": + """Make a copy of the metric.""" + return deepcopy(self) + + def __getstate__(self) -> Dict[str, Any]: + # ignore update and compute functions for pickling + return {k: v for k, v in self.__dict__.items() if k not in ["update", "compute", "_update_signature"]} + + def __setstate__(self, state: Dict[str, Any]) -> None: + # manually restore update and compute functions for pickling + self.__dict__.update(state) + self._update_signature = inspect.signature(self.update) + self.update: Callable = self._wrap_update(self.update) # type: ignore + self.compute: Callable = self._wrap_compute(self.compute) # type: ignore + + def __setattr__(self, name: str, value: Any) -> None: + if name in ("higher_is_better", "is_differentiable"): + raise RuntimeError(f"Can't change const `{name}`.") + super().__setattr__(name, value) + + @property + def device(self) -> "B.device": + """Return the device of the metric.""" + return self._device + + def type(self, dst_type: Union[str, B.dtype]) -> "Metric": + """Method override default and prevent dtype casting. + + Please use `metric.set_dtype(dtype)` instead. + """ + return self + + def float(self) -> "Metric": + """Method override default and prevent dtype casting. + + Please use `metric.set_dtype(dtype)` instead. + """ + return self + + def double(self) -> "Metric": + """Method override default and prevent dtype casting. + + Please use `metric.set_dtype(dtype)` instead. + """ + return self + + def half(self) -> "Metric": + """Method override default and prevent dtype casting. + + Please use `metric.set_dtype(dtype)` instead. + """ + return self + + def set_dtype(self, dst_type: Union[str, B.dtype]) -> None: + """Special version of `type` for transferring all metric states to specific dtype + Arguments: + dst_type (type or string): the desired type + """ + return super().type(dst_type) + + def _apply(self, fn: Callable, *args, **kwargs) -> Module: + """Overwrite _apply function such that we can also move metric states to the correct device when `.to`, + `.cuda`, etc methods are called.""" + this = super()._apply(fn, *args, **kwargs) + if this is None: # for paddle + this = self + # Also apply fn to metric states and defaults + for key, value in this._defaults.items(): + if isinstance(value, Tensor): + this._defaults[key] = fn(value, *args, **kwargs) + elif isinstance(value, Sequence): + this._defaults[key] = [fn(v, *args, **kwargs) for v in value] + + current_val = getattr(this, key) + if isinstance(current_val, Tensor): + setattr(this, key, fn(current_val, *args, **kwargs)) + elif isinstance(current_val, Sequence): + setattr(this, key, [fn(cur_v, *args, **kwargs) for cur_v in current_val]) + else: + raise TypeError( + "Expected metric state to be either a Tensor" f"or a list of Tensor, but encountered {current_val}" + ) + + # make sure to update the device attribute + # if the dummy tensor moves device by fn function we should also update the attribute + self._device = fn(B.zeros(1, device=self.device), *args, **kwargs).device + + # Additional apply to forward cache and computed attributes (may be nested) + if this._computed is not None: + this._computed = apply_to_collection(this._computed, Tensor, fn) + if this._forward_cache is not None: + this._forward_cache = apply_to_collection(this._forward_cache, Tensor, fn) + + return this + + def persistent(self, mode: bool = False) -> None: + """Method for post-init to change if metric states should be saved to its state_dict.""" + for key in self._persistent: + self._persistent[key] = mode + + def state_dict( + self, + destination: Dict[str, Any] = None, + prefix: str = "", + keep_vars: bool = False, + ) -> Optional[Dict[str, Any]]: + destination = super().state_dict(destination=destination, prefix=prefix, keep_vars=keep_vars) + # Register metric states to be part of the state_dict + for key in self._defaults: + if not self._persistent[key]: + continue + current_val = getattr(self, key) + if not keep_vars: + if isinstance(current_val, Tensor): + current_val = current_val.detach() + elif isinstance(current_val, list): + current_val = [cur_v.detach() if isinstance(cur_v, Tensor) else cur_v for cur_v in current_val] + destination[prefix + key] = deepcopy(current_val) # type: ignore + return destination + + def _load_from_state_dict( + self, + state_dict: dict, + prefix: str, + local_metadata: dict, + strict: bool, + missing_keys: List[str], + unexpected_keys: List[str], + error_msgs: List[str], + ) -> None: + """Loads metric states from state_dict.""" + + for key in self._defaults: + name = prefix + key + if name in state_dict: + setattr(self, key, state_dict.pop(name)) + super()._load_from_state_dict( + state_dict, prefix, local_metadata, True, missing_keys, unexpected_keys, error_msgs + ) + + def _filter_kwargs(self, **kwargs: Any) -> Dict[str, Any]: + """filter kwargs such that they match the update signature of the metric.""" + + # filter all parameters based on update signature except those of + # type VAR_POSITIONAL (*args) and VAR_KEYWORD (**kwargs) + _params = (inspect.Parameter.VAR_POSITIONAL, inspect.Parameter.VAR_KEYWORD) + _sign_params = self._update_signature.parameters + filtered_kwargs = { + k: v for k, v in kwargs.items() if (k in _sign_params.keys() and _sign_params[k].kind not in _params) + } + + # if no kwargs filtered, return al kwargs as default + if not filtered_kwargs: + filtered_kwargs = kwargs + return filtered_kwargs + + def __hash__(self) -> int: + # we need to add the id here, since PyTorch requires a module hash to be unique. + # Internally, PyTorch nn.Module relies on that for children discovery + # (see https://github.com/pytorch/pytorch/blob/v1.9.0/B.nn/modules/module.py#L1544) + # For metrics that include tensors it is not a problem, + # since their hash is unique based on the memory location but we cannot rely on that for every metric. + hash_vals = [self.__class__.__name__, id(self)] + + for key in self._defaults: + val = getattr(self, key) + # Special case: allow list values, so long + # as their elements are hashable + if hasattr(val, "__iter__") and not isinstance(val, Tensor): + hash_vals.extend(val) + else: + hash_vals.append(val) + + return hash(tuple(hash_vals)) + + def __add__(self, other: "Metric") -> "Metric": + return CompositionalMetric(B.add, self, other) + + def __and__(self, other: "Metric") -> "Metric": + return CompositionalMetric(B.bitwise_and, self, other) + + # Fixme: this shall return bool instead of Metric + def __eq__(self, other: "Metric") -> "Metric": # type: ignore + return CompositionalMetric(B.eq, self, other) + + def __floordiv__(self, other: "Metric") -> "Metric": + return CompositionalMetric(B.floor_divide, self, other) + + def __ge__(self, other: "Metric") -> "Metric": + return CompositionalMetric(B.ge, self, other) + + def __gt__(self, other: "Metric") -> "Metric": + return CompositionalMetric(B.gt, self, other) + + def __le__(self, other: "Metric") -> "Metric": + return CompositionalMetric(B.le, self, other) + + def __lt__(self, other: "Metric") -> "Metric": + return CompositionalMetric(B.lt, self, other) + + def __matmul__(self, other: "Metric") -> "Metric": + return CompositionalMetric(B.matmul, self, other) + + def __mod__(self, other: "Metric") -> "Metric": + return CompositionalMetric(B.fmod, self, other) + + def __mul__(self, other: "Metric") -> "Metric": + return CompositionalMetric(B.mul, self, other) + + # Fixme: this shall return bool instead of Metric + def __ne__(self, other: "Metric") -> "Metric": # type: ignore + return CompositionalMetric(B.ne, self, other) + + def __or__(self, other: "Metric") -> "Metric": + return CompositionalMetric(B.bitwise_or, self, other) + + def __pow__(self, other: "Metric") -> "Metric": + return CompositionalMetric(B.pow, self, other) + + def __radd__(self, other: "Metric") -> "Metric": + return CompositionalMetric(B.add, other, self) + + def __rand__(self, other: "Metric") -> "Metric": + # swap them since bitwise_and only supports that way and it's commutative + return CompositionalMetric(B.bitwise_and, self, other) + + def __rfloordiv__(self, other: "Metric") -> "Metric": + return CompositionalMetric(B.floor_divide, other, self) + + def __rmatmul__(self, other: "Metric") -> "Metric": + return CompositionalMetric(B.matmul, other, self) + + def __rmod__(self, other: "Metric") -> "Metric": + return CompositionalMetric(B.fmod, other, self) + + def __rmul__(self, other: "Metric") -> "Metric": + return CompositionalMetric(B.mul, other, self) + + def __ror__(self, other: "Metric") -> "Metric": + return CompositionalMetric(B.bitwise_or, other, self) + + def __rpow__(self, other: "Metric") -> "Metric": + return CompositionalMetric(B.pow, other, self) + + def __rsub__(self, other: "Metric") -> "Metric": + return CompositionalMetric(B.sub, other, self) + + def __rtruediv__(self, other: "Metric") -> "Metric": + return CompositionalMetric(B.true_divide, other, self) + + def __rxor__(self, other: "Metric") -> "Metric": + return CompositionalMetric(B.bitwise_xor, other, self) + + def __sub__(self, other: "Metric") -> "Metric": + return CompositionalMetric(B.sub, self, other) + + def __truediv__(self, other: "Metric") -> "Metric": + return CompositionalMetric(B.true_divide, self, other) + + def __xor__(self, other: "Metric") -> "Metric": + return CompositionalMetric(B.bitwise_xor, self, other) + + def __abs__(self) -> "Metric": + return CompositionalMetric(B.abs, self, None) + + def __inv__(self) -> "Metric": + return CompositionalMetric(B.bitwise_not, self, None) + + def __invert__(self) -> "Metric": + return self.__inv__() + + def __neg__(self) -> "Metric": + return CompositionalMetric(_neg, self, None) + + def __pos__(self) -> "Metric": + return CompositionalMetric(B.abs, self, None) + + def __getitem__(self, idx: int) -> "Metric": + return CompositionalMetric(lambda x: x[idx], self, None) + + +def _neg(x: Tensor) -> Tensor: + return -B.abs(x) + + +class CompositionalMetric(Metric): + """Composition of two metrics with a specific operator which will be executed upon metrics compute.""" + + def __init__( + self, + operator: Callable, + metric_a: Union[Metric, int, float, Tensor], + metric_b: Union[Metric, int, float, Tensor, None], + ) -> None: + """ + Args: + operator: the operator taking in one (if metric_b is None) + or two arguments. Will be applied to outputs of metric_a.compute() + and (optionally if metric_b is not None) metric_b.compute() + metric_a: first metric whose compute() result is the first argument of operator + metric_b: second metric whose compute() result is the second argument of operator. + For operators taking in only one input, this should be None + """ + super().__init__() + + self.op = operator + + if isinstance(metric_a, Tensor): + self.register_buffer("metric_a", metric_a) + else: + self.metric_a = metric_a + + if isinstance(metric_b, Tensor): + self.register_buffer("metric_b", metric_b) + else: + self.metric_b = metric_b + + def _sync_dist(self, dist_sync_fn: Optional[Callable] = None, process_group: Optional[Any] = None) -> None: + # No syncing required here. syncing will be done in metric_a and metric_b + pass + + def update(self, *args: Any, **kwargs: Any) -> None: + if isinstance(self.metric_a, Metric): + self.metric_a.update(*args, **self.metric_a._filter_kwargs(**kwargs)) + + if isinstance(self.metric_b, Metric): + self.metric_b.update(*args, **self.metric_b._filter_kwargs(**kwargs)) + + def compute(self) -> Any: + + # also some parsing for kwargs? + if isinstance(self.metric_a, Metric): + val_a = self.metric_a.compute() + else: + val_a = self.metric_a + + if isinstance(self.metric_b, Metric): + val_b = self.metric_b.compute() + else: + val_b = self.metric_b + + if val_b is None: + return self.op(val_a) + + return self.op(val_a, val_b) + + def reset(self) -> None: + if isinstance(self.metric_a, Metric): + self.metric_a.reset() + + if isinstance(self.metric_b, Metric): + self.metric_b.reset() + + def persistent(self, mode: bool = False) -> None: + if isinstance(self.metric_a, Metric): + self.metric_a.persistent(mode=mode) + if isinstance(self.metric_b, Metric): + self.metric_b.persistent(mode=mode) + + def __repr__(self) -> str: + _op_metrics = f"(\n {self.op.__name__}(\n {repr(self.metric_a)},\n {repr(self.metric_b)}\n )\n)" + repr_str = self.__class__.__name__ + _op_metrics + + return repr_str diff --git a/RE/paddlemetric/src/paddlemetrics/py.typed b/RE/paddlemetric/src/paddlemetrics/py.typed new file mode 100644 index 00000000..e69de29b diff --git a/RE/paddlemetric/src/paddlemetrics/regression/__init__.py b/RE/paddlemetric/src/paddlemetrics/regression/__init__.py new file mode 100644 index 00000000..aafc1024 --- /dev/null +++ b/RE/paddlemetric/src/paddlemetrics/regression/__init__.py @@ -0,0 +1,26 @@ +# Copyright The PyTorch Lightning team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from paddlemetrics.regression.cosine_similarity import CosineSimilarity # noqa: F401 +from paddlemetrics.regression.explained_variance import ExplainedVariance # noqa: F401 +from paddlemetrics.regression.mean_absolute_error import MeanAbsoluteError # noqa: F401 +from paddlemetrics.regression.mean_absolute_percentage_error import MeanAbsolutePercentageError # noqa: F401 +from paddlemetrics.regression.mean_squared_error import MeanSquaredError # noqa: F401 +from paddlemetrics.regression.mean_squared_log_error import MeanSquaredLogError # noqa: F401 +from paddlemetrics.regression.pearson import PearsonCorrcoef # noqa: F401 +from paddlemetrics.regression.r2 import R2Score # noqa: F401 +from paddlemetrics.regression.spearman import SpearmanCorrcoef # noqa: F401 +from paddlemetrics.regression.symmetric_mean_absolute_percentage_error import ( # noqa: F401 + SymmetricMeanAbsolutePercentageError, +) +from paddlemetrics.regression.tweedie_deviance import TweedieDevianceScore # noqa: F401 diff --git a/RE/paddlemetric/src/paddlemetrics/regression/cosine_similarity.py b/RE/paddlemetric/src/paddlemetrics/regression/cosine_similarity.py new file mode 100644 index 00000000..3b2946e2 --- /dev/null +++ b/RE/paddlemetric/src/paddlemetrics/regression/cosine_similarity.py @@ -0,0 +1,105 @@ +# Copyright The PyTorch Lightning team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from typing import Any, Callable, List, Optional + +import paddleext.torchapi as B +from paddleext.torchapi import Tensor + +from paddlemetrics.functional.regression.cosine_similarity import _cosine_similarity_compute, _cosine_similarity_update +from paddlemetrics.metric import Metric +from paddlemetrics.utilities.data import dim_zero_cat + + +class CosineSimilarity(Metric): + r""" + Computes the `Cosine Similarity`_ + between targets and predictions: + + .. math:: + cos_{sim}(x,y) = \frac{x \cdot y}{||x|| \cdot ||y||} = + \frac{\sum_{i=1}^n x_i y_i}{\sqrt{\sum_{i=1}^n x_i^2}\sqrt{\sum_{i=1}^n y_i^2}} + + where :math:`y` is a tensor of target values, and :math:`x` is a tensor of predictions. + + Forward accepts + + - ``preds`` (float tensor): ``(N,d)`` + - ``target`` (float tensor): ``(N,d)`` + + Args: + reduction: how to reduce over the batch dimension using 'sum', 'mean' or 'none' + (taking the individual scores) + compute_on_step: + Forward only calls ``update()`` and return ``None`` if this is set to ``False``. + dist_sync_on_step: + Synchronize metric state across processes at each ``forward()`` + before returning the value at the step. + process_group: + Specify the process group on which synchronization is called. + default: ``None`` (which selects the entire world) + dist_sync_fn: + Callback that performs the allgather operation on the metric state. When ``None``, DDP + will be used to perform the all gather. + + Example: + >>> from paddlemetrics import CosineSimilarity + >>> target = B.tensor([[0, 1], [1, 1]]) + >>> preds = B.tensor([[0, 1], [0, 1]]) + >>> cosine_similarity = CosineSimilarity(reduction = 'mean') + >>> cosine_similarity(preds, target) + tensor(0.8536) + + """ + is_differentiable = True + preds: List[Tensor] + target: List[Tensor] + + def __init__( + self, + reduction: str = "sum", + compute_on_step: bool = True, + dist_sync_on_step: bool = False, + process_group: Optional[Any] = None, + dist_sync_fn: Callable = None, + ) -> None: + super().__init__( + compute_on_step=compute_on_step, + dist_sync_on_step=dist_sync_on_step, + process_group=process_group, + dist_sync_fn=dist_sync_fn, + ) + allowed_reduction = ("sum", "mean", "none", None) + if reduction not in allowed_reduction: + raise ValueError(f"Expected argument `reduction` to be one of {allowed_reduction} but got {reduction}") + self.reduction = reduction + + self.add_state("preds", [], dist_reduce_fx="cat") + self.add_state("target", [], dist_reduce_fx="cat") + + def update(self, preds: Tensor, target: Tensor) -> None: # type: ignore + """Update metric states with predictions and targets. + + Args: + preds: Predicted tensor with shape ``(N,d)`` + target: Ground truth tensor with shape ``(N,d)`` + """ + preds, target = _cosine_similarity_update(preds, target) + + self.preds.append(preds) + self.target.append(target) + + def compute(self) -> Tensor: + preds = dim_zero_cat(self.preds) + target = dim_zero_cat(self.target) + return _cosine_similarity_compute(preds, target, self.reduction) diff --git a/RE/paddlemetric/src/paddlemetrics/regression/explained_variance.py b/RE/paddlemetric/src/paddlemetrics/regression/explained_variance.py new file mode 100644 index 00000000..226ac076 --- /dev/null +++ b/RE/paddlemetric/src/paddlemetrics/regression/explained_variance.py @@ -0,0 +1,136 @@ +# Copyright The PyTorch Lightning team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from typing import Any, Callable, Optional, Sequence, Union + +import paddleext.torchapi as B +from paddleext.torchapi import Tensor, tensor + +from paddlemetrics.functional.regression.explained_variance import ( + _explained_variance_compute, + _explained_variance_update, +) +from paddlemetrics.metric import Metric + + +class ExplainedVariance(Metric): + r""" + Computes `explained variance`_: + + .. math:: \text{ExplainedVariance} = 1 - \frac{\text{Var}(y - \hat{y})}{\text{Var}(y)} + + Where :math:`y` is a tensor of target values, and :math:`\hat{y}` is a + tensor of predictions. + + Forward accepts + + - ``preds`` (float tensor): ``(N,)`` or ``(N, ...)`` (multioutput) + - ``target`` (long tensor): ``(N,)`` or ``(N, ...)`` (multioutput) + + In the case of multioutput, as default the variances will be uniformly + averaged over the additional dimensions. Please see argument `multioutput` + for changing this behavior. + + Args: + multioutput: + Defines aggregation in the case of multiple output scores. Can be one + of the following strings (default is `'uniform_average'`.): + + * `'raw_values'` returns full set of scores + * `'uniform_average'` scores are uniformly averaged + * `'variance_weighted'` scores are weighted by their individual variances + + compute_on_step: + Forward only calls ``update()`` and return None if this is set to False. default: True + dist_sync_on_step: + Synchronize metric state across processes at each ``forward()`` + before returning the value at the step. default: False + process_group: + Specify the process group on which synchronization is called. default: None (which selects the entire world) + + Raises: + ValueError: + If ``multioutput`` is not one of ``"raw_values"``, ``"uniform_average"`` or ``"variance_weighted"``. + + Example: + >>> from paddlemetrics import ExplainedVariance + >>> target = B.tensor([3, -0.5, 2, 7]) + >>> preds = B.tensor([2.5, 0.0, 2, 8]) + >>> explained_variance = ExplainedVariance() + >>> explained_variance(preds, target) + tensor(0.9572) + + >>> target = B.tensor([[0.5, 1], [-1, 1], [7, -6]]) + >>> preds = B.tensor([[0, 2], [-1, 2], [8, -5]]) + >>> explained_variance = ExplainedVariance(multioutput='raw_values') + >>> explained_variance(preds, target) + tensor([0.9677, 1.0000]) + + """ + is_differentiable = True + n_obs: Tensor + sum_error: Tensor + sum_squared_error: Tensor + sum_target: Tensor + sum_squared_target: Tensor + + def __init__( + self, + multioutput: str = "uniform_average", + compute_on_step: bool = True, + dist_sync_on_step: bool = False, + process_group: Optional[Any] = None, + dist_sync_fn: Callable = None, + ) -> None: + super().__init__( + compute_on_step=compute_on_step, + dist_sync_on_step=dist_sync_on_step, + process_group=process_group, + dist_sync_fn=dist_sync_fn, + ) + allowed_multioutput = ("raw_values", "uniform_average", "variance_weighted") + if multioutput not in allowed_multioutput: + raise ValueError( + f"Invalid input to argument `multioutput`. Choose one of the following: {allowed_multioutput}" + ) + self.multioutput: str = multioutput + self.add_state("sum_error", default=tensor(0.0), dist_reduce_fx="sum") + self.add_state("sum_squared_error", default=tensor(0.0), dist_reduce_fx="sum") + self.add_state("sum_target", default=tensor(0.0), dist_reduce_fx="sum") + self.add_state("sum_squared_target", default=tensor(0.0), dist_reduce_fx="sum") + self.add_state("n_obs", default=tensor(0.0), dist_reduce_fx="sum") + + def update(self, preds: Tensor, target: Tensor) -> None: # type: ignore + """Update state with predictions and targets. + + Args: + preds: Predictions from model + target: Ground truth values + """ + n_obs, sum_error, sum_squared_error, sum_target, sum_squared_target = _explained_variance_update(preds, target) + self.n_obs = self.n_obs + n_obs + self.sum_error = self.sum_error + sum_error + self.sum_squared_error = self.sum_squared_error + sum_squared_error + self.sum_target = self.sum_target + sum_target + self.sum_squared_target = self.sum_squared_target + sum_squared_target + + def compute(self) -> Union[Tensor, Sequence[Tensor]]: + """Computes explained variance over state.""" + return _explained_variance_compute( + self.n_obs, + self.sum_error, + self.sum_squared_error, + self.sum_target, + self.sum_squared_target, + self.multioutput, + ) diff --git a/RE/paddlemetric/src/paddlemetrics/regression/mean_absolute_error.py b/RE/paddlemetric/src/paddlemetrics/regression/mean_absolute_error.py new file mode 100644 index 00000000..8614bed2 --- /dev/null +++ b/RE/paddlemetric/src/paddlemetrics/regression/mean_absolute_error.py @@ -0,0 +1,86 @@ +# Copyright The PyTorch Lightning team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from typing import Any, Callable, Optional + +import paddleext.torchapi as B +from paddleext.torchapi import Tensor, tensor + +from paddlemetrics.functional.regression.mean_absolute_error import ( + _mean_absolute_error_compute, + _mean_absolute_error_update, +) +from paddlemetrics.metric import Metric + + +class MeanAbsoluteError(Metric): + r""" + `Computes Mean Absolute Error`_ (MAE): + + .. math:: \text{MAE} = \frac{1}{N}\sum_i^N | y_i - \hat{y_i} | + + Where :math:`y` is a tensor of target values, and :math:`\hat{y}` is a tensor of predictions. + + Args: + compute_on_step: + Forward only calls ``update()`` and return None if this is set to False. default: True + dist_sync_on_step: + Synchronize metric state across processes at each ``forward()`` + before returning the value at the step. default: False + process_group: + Specify the process group on which synchronization is called. default: None (which selects the entire world) + + Example: + >>> from paddlemetrics import MeanAbsoluteError + >>> target = B.tensor([3.0, -0.5, 2.0, 7.0]) + >>> preds = B.tensor([2.5, 0.0, 2.0, 8.0]) + >>> mean_absolute_error = MeanAbsoluteError() + >>> mean_absolute_error(preds, target) + tensor(0.5000) + """ + is_differentiable = True + sum_abs_error: Tensor + total: Tensor + + def __init__( + self, + compute_on_step: bool = True, + dist_sync_on_step: bool = False, + process_group: Optional[Any] = None, + dist_sync_fn: Callable = None, + ) -> None: + super().__init__( + compute_on_step=compute_on_step, + dist_sync_on_step=dist_sync_on_step, + process_group=process_group, + dist_sync_fn=dist_sync_fn, + ) + + self.add_state("sum_abs_error", default=tensor(0.0), dist_reduce_fx="sum") + self.add_state("total", default=tensor(0), dist_reduce_fx="sum") + + def update(self, preds: Tensor, target: Tensor) -> None: # type: ignore + """Update state with predictions and targets. + + Args: + preds: Predictions from model + target: Ground truth values + """ + sum_abs_error, n_obs = _mean_absolute_error_update(preds, target) + + self.sum_abs_error += sum_abs_error + self.total += n_obs + + def compute(self) -> Tensor: + """Computes mean absolute error over state.""" + return _mean_absolute_error_compute(self.sum_abs_error, self.total) diff --git a/RE/paddlemetric/src/paddlemetrics/regression/mean_absolute_percentage_error.py b/RE/paddlemetric/src/paddlemetrics/regression/mean_absolute_percentage_error.py new file mode 100644 index 00000000..66d9c091 --- /dev/null +++ b/RE/paddlemetric/src/paddlemetrics/regression/mean_absolute_percentage_error.py @@ -0,0 +1,95 @@ +# Copyright The PyTorch Lightning team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from typing import Any, Callable, Optional + +import paddleext.torchapi as B +from paddleext.torchapi import Tensor, tensor + +from paddlemetrics.functional.regression.mean_absolute_percentage_error import ( + _mean_absolute_percentage_error_compute, + _mean_absolute_percentage_error_update, +) +from paddlemetrics.metric import Metric + + +class MeanAbsolutePercentageError(Metric): + r""" + Computes `Mean Absolute Percentage Error`_ (MAPE): + + .. math:: \text{MAPE} = \frac{1}{n}\sum_1^n\frac{| y_i - \hat{y_i} |}{\max(\epsilon, y_i)} + + Where :math:`y` is a tensor of target values, and :math:`\hat{y}` is a tensor of predictions. + + Args: + compute_on_step: + Forward only calls ``update()`` and return None if this is set to False. default: True + dist_sync_on_step: + Synchronize metric state across processes at each ``forward()`` + before returning the value at the step. default: False + process_group: + Specify the process group on which synchronization is called. default: None (which selects the entire world) + + Note: + The epsilon value is taken from `scikit-learn's implementation of MAPE`_. + + Note: + MAPE output is a non-negative floating point. Best result is 0.0 . But it is important to note that, + bad predictions, can lead to arbitarily large values. Especially when some ``target`` values are close to 0. + This `MAPE implementation returns`_ a very large number instead of ``inf``. + + Example: + >>> from paddlemetrics import MeanAbsolutePercentageError + >>> target = B.tensor([1, 10, 1e6]) + >>> preds = B.tensor([0.9, 15, 1.2e6]) + >>> mean_abs_percentage_error = MeanAbsolutePercentageError() + >>> mean_abs_percentage_error(preds, target) + tensor(0.2667) + + """ + is_differentiable = True + sum_abs_per_error: Tensor + total: Tensor + + def __init__( + self, + compute_on_step: bool = True, + dist_sync_on_step: bool = False, + process_group: Optional[Any] = None, + dist_sync_fn: Callable = None, + ) -> None: + super().__init__( + compute_on_step=compute_on_step, + dist_sync_on_step=dist_sync_on_step, + process_group=process_group, + dist_sync_fn=dist_sync_fn, + ) + + self.add_state("sum_abs_per_error", default=tensor(0.0), dist_reduce_fx="sum") + self.add_state("total", default=tensor(0.0), dist_reduce_fx="sum") + + def update(self, preds: Tensor, target: Tensor) -> None: # type: ignore + """Update state with predictions and targets. + + Args: + preds: Predictions from model + target: Ground truth values + """ + sum_abs_per_error, num_obs = _mean_absolute_percentage_error_update(preds, target) + + self.sum_abs_per_error += sum_abs_per_error + self.total += num_obs + + def compute(self) -> Tensor: + """Computes mean absolute percentage error over state.""" + return _mean_absolute_percentage_error_compute(self.sum_abs_per_error, self.total) diff --git a/RE/paddlemetric/src/paddlemetrics/regression/mean_squared_error.py b/RE/paddlemetric/src/paddlemetrics/regression/mean_squared_error.py new file mode 100644 index 00000000..8c1c9245 --- /dev/null +++ b/RE/paddlemetric/src/paddlemetrics/regression/mean_squared_error.py @@ -0,0 +1,91 @@ +# Copyright The PyTorch Lightning team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from typing import Any, Callable, Optional + +import paddleext.torchapi as B +from paddleext.torchapi import Tensor, tensor + +from paddlemetrics.functional.regression.mean_squared_error import ( + _mean_squared_error_compute, + _mean_squared_error_update, +) +from paddlemetrics.metric import Metric + + +class MeanSquaredError(Metric): + r""" + Computes `mean squared error`_ (MSE): + + .. math:: \text{MSE} = \frac{1}{N}\sum_i^N(y_i - \hat{y_i})^2 + + Where :math:`y` is a tensor of target values, and :math:`\hat{y}` is a tensor of predictions. + + Args: + compute_on_step: + Forward only calls ``update()`` and return None if this is set to False. default: True + dist_sync_on_step: + Synchronize metric state across processes at each ``forward()`` + before returning the value at the step. default: False + process_group: + Specify the process group on which synchronization is called. default: None (which selects the entire world) + squared: + If True returns MSE value, if False returns RMSE value. + + Example: + >>> from paddlemetrics import MeanSquaredError + >>> target = B.tensor([2.5, 5.0, 4.0, 8.0]) + >>> preds = B.tensor([3.0, 5.0, 2.5, 7.0]) + >>> mean_squared_error = MeanSquaredError() + >>> mean_squared_error(preds, target) + tensor(0.8750) + + """ + is_differentiable = True + sum_squared_error: Tensor + total: Tensor + + def __init__( + self, + compute_on_step: bool = True, + dist_sync_on_step: bool = False, + process_group: Optional[Any] = None, + dist_sync_fn: Callable = None, + squared: bool = True, + ) -> None: + super().__init__( + compute_on_step=compute_on_step, + dist_sync_on_step=dist_sync_on_step, + process_group=process_group, + dist_sync_fn=dist_sync_fn, + ) + + self.add_state("sum_squared_error", default=tensor(0.0), dist_reduce_fx="sum") + self.add_state("total", default=tensor(0), dist_reduce_fx="sum") + self.squared = squared + + def update(self, preds: Tensor, target: Tensor) -> None: # type: ignore + """Update state with predictions and targets. + + Args: + preds: Predictions from model + target: Ground truth values + """ + sum_squared_error, n_obs = _mean_squared_error_update(preds, target) + + self.sum_squared_error += sum_squared_error + self.total += n_obs + + def compute(self) -> Tensor: + """Computes mean squared error over state.""" + return _mean_squared_error_compute(self.sum_squared_error, self.total, squared=self.squared) diff --git a/RE/paddlemetric/src/paddlemetrics/regression/mean_squared_log_error.py b/RE/paddlemetric/src/paddlemetrics/regression/mean_squared_log_error.py new file mode 100644 index 00000000..e36773b0 --- /dev/null +++ b/RE/paddlemetric/src/paddlemetrics/regression/mean_squared_log_error.py @@ -0,0 +1,90 @@ +# Copyright The PyTorch Lightning team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from typing import Any, Callable, Optional + +import paddleext.torchapi as B +from paddleext.torchapi import Tensor, tensor + +from paddlemetrics.functional.regression.mean_squared_log_error import ( + _mean_squared_log_error_compute, + _mean_squared_log_error_update, +) +from paddlemetrics.metric import Metric + + +class MeanSquaredLogError(Metric): + r""" + Computes `mean squared logarithmic error`_ (MSLE): + + .. math:: \text{MSLE} = \frac{1}{N}\sum_i^N (\log_e(1 + y_i) - \log_e(1 + \hat{y_i}))^2 + + Where :math:`y` is a tensor of target values, and :math:`\hat{y}` is a tensor of predictions. + + Args: + compute_on_step: + Forward only calls ``update()`` and return None if this is set to False. default: True + dist_sync_on_step: + Synchronize metric state across processes at each ``forward()`` + before returning the value at the step. default: False + process_group: + Specify the process group on which synchronization is called. default: None (which selects the entire world) + + Example: + >>> from paddlemetrics import MeanSquaredLogError + >>> target = B.tensor([2.5, 5, 4, 8]) + >>> preds = B.tensor([3, 5, 2.5, 7]) + >>> mean_squared_log_error = MeanSquaredLogError() + >>> mean_squared_log_error(preds, target) + tensor(0.0397) + + .. note:: + Half precision is only support on GPU for this metric + + """ + is_differentiable = True + sum_squared_log_error: Tensor + total: Tensor + + def __init__( + self, + compute_on_step: bool = True, + dist_sync_on_step: bool = False, + process_group: Optional[Any] = None, + dist_sync_fn: Callable = None, + ) -> None: + super().__init__( + compute_on_step=compute_on_step, + dist_sync_on_step=dist_sync_on_step, + process_group=process_group, + dist_sync_fn=dist_sync_fn, + ) + + self.add_state("sum_squared_log_error", default=tensor(0.0), dist_reduce_fx="sum") + self.add_state("total", default=tensor(0), dist_reduce_fx="sum") + + def update(self, preds: Tensor, target: Tensor) -> None: # type: ignore + """Update state with predictions and targets. + + Args: + preds: Predictions from model + target: Ground truth values + """ + sum_squared_log_error, n_obs = _mean_squared_log_error_update(preds, target) + + self.sum_squared_log_error += sum_squared_log_error + self.total += n_obs + + def compute(self) -> Tensor: + """Compute mean squared logarithmic error over state.""" + return _mean_squared_log_error_compute(self.sum_squared_log_error, self.total) diff --git a/RE/paddlemetric/src/paddlemetrics/regression/pearson.py b/RE/paddlemetric/src/paddlemetrics/regression/pearson.py new file mode 100644 index 00000000..7927392a --- /dev/null +++ b/RE/paddlemetric/src/paddlemetrics/regression/pearson.py @@ -0,0 +1,140 @@ +# Copyright The PyTorch Lightning team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from typing import Any, List, Optional, Tuple + +import paddleext.torchapi as B +from paddleext.torchapi import Tensor + +from paddlemetrics.functional.regression.pearson import _pearson_corrcoef_compute, _pearson_corrcoef_update +from paddlemetrics.metric import Metric + + +def _final_aggregation( + means_x: Tensor, + means_y: Tensor, + vars_x: Tensor, + vars_y: Tensor, + corrs_xy: Tensor, + nbs: Tensor, +) -> Tuple[Tensor, Tensor, Tensor, Tensor]: + """Aggregate the statistics from multiple devices. + + Formula taken from here: `Aggregate the statistics from multiple devices`_ + """ + # assert len(means_x) > 1 and len(means_y) > 1 and len(vars_x) > 1 and len(vars_y) > 1 and len(corrs_xy) > 1 + mx1, my1, vx1, vy1, cxy1, n1 = means_x[0], means_y[0], vars_x[0], vars_y[0], corrs_xy[0], nbs[0] + for i in range(1, len(means_x)): + mx2, my2, vx2, vy2, cxy2, n2 = means_x[i], means_y[i], vars_x[i], vars_y[i], corrs_xy[i], nbs[i] + + nb = n1 + n2 + mean_x = (n1 * mx1 + n2 * mx2) / nb + mean_y = (n1 * my1 + n2 * my2) / nb + var_x = 1 / (n1 + n2 - 1) * ((n1 - 1) * vx1 + (n2 - 1) * vx2 + ((n1 * n2) / (n1 + n2)) * (mx1 - mx2) ** 2) + var_y = 1 / (n1 + n2 - 1) * ((n1 - 1) * vy1 + (n2 - 1) * vy2 + ((n1 * n2) / (n1 + n2)) * (my1 - my2) ** 2) + + corr1 = n1 * cxy1 + n1 * (mx1 - mean_x) * (my1 - mean_y) + corr2 = n2 * cxy2 + n2 * (mx2 - mean_x) * (my2 - mean_y) + corr_xy = (corr1 + corr2) / (n1 + n2) + + mx1, my1, vx1, vy1, cxy1, n1 = mean_x, mean_y, var_x, var_y, corr_xy, nb + + return var_x, var_y, corr_xy, nb + + +class PearsonCorrcoef(Metric): + r""" + Computes `Pearson Correlation Coefficient`_: + + .. math:: + P_{corr}(x,y) = \frac{cov(x,y)}{\sigma_x \sigma_y} + + Where :math:`y` is a tensor of target values, and :math:`x` is a + tensor of predictions. + + Forward accepts + + - ``preds`` (float tensor): ``(N,)`` + - ``target``(float tensor): ``(N,)`` + + Args: + compute_on_step: + Forward only calls ``update()`` and return None if this is set to False. default: True + dist_sync_on_step: + Synchronize metric state across processes at each ``forward()`` + before returning the value at the step. default: False + process_group: + Specify the process group on which synchronization is called. default: None (which selects the entire world) + + Example: + >>> from paddlemetrics import PearsonCorrcoef + >>> target = B.tensor([3, -0.5, 2, 7]) + >>> preds = B.tensor([2.5, 0.0, 2, 8]) + >>> pearson = PearsonCorrcoef() + >>> pearson(preds, target) + tensor(0.9849) + + """ + is_differentiable = True + preds: List[Tensor] + target: List[Tensor] + mean_x: Tensor + mean_y: Tensor + var_x: Tensor + var_y: Tensor + corr_xy: Tensor + n_total: Tensor + + def __init__( + self, + compute_on_step: bool = True, + dist_sync_on_step: bool = False, + process_group: Optional[Any] = None, + ) -> None: + super().__init__( + compute_on_step=compute_on_step, + dist_sync_on_step=dist_sync_on_step, + process_group=process_group, + ) + + self.add_state("mean_x", default=B.zeros(1), dist_reduce_fx=None) + self.add_state("mean_y", default=B.zeros(1), dist_reduce_fx=None) + self.add_state("var_x", default=B.zeros(1), dist_reduce_fx=None) + self.add_state("var_y", default=B.zeros(1), dist_reduce_fx=None) + self.add_state("corr_xy", default=B.zeros(1), dist_reduce_fx=None) + self.add_state("n_total", default=B.zeros(1), dist_reduce_fx=None) + + def update(self, preds: Tensor, target: Tensor) -> None: # type: ignore + """Update state with predictions and targets. + + Args: + preds: Predictions from model + target: Ground truth values + """ + self.mean_x, self.mean_y, self.var_x, self.var_y, self.corr_xy, self.n_total = _pearson_corrcoef_update( + preds, target, self.mean_x, self.mean_y, self.var_x, self.var_y, self.corr_xy, self.n_total + ) + + def compute(self) -> Tensor: + """Computes pearson correlation coefficient over state.""" + if self.mean_x.numel() > 1: # multiple devices, need further reduction + var_x, var_y, corr_xy, n_total = _final_aggregation( + self.mean_x, self.mean_y, self.var_x, self.var_y, self.corr_xy, self.n_total + ) + else: + var_x = self.var_x + var_y = self.var_y + corr_xy = self.corr_xy + n_total = self.n_total + + return _pearson_corrcoef_compute(var_x, var_y, corr_xy, n_total) diff --git a/RE/paddlemetric/src/paddlemetrics/regression/r2.py b/RE/paddlemetric/src/paddlemetrics/regression/r2.py new file mode 100644 index 00000000..36db3d8d --- /dev/null +++ b/RE/paddlemetric/src/paddlemetrics/regression/r2.py @@ -0,0 +1,149 @@ +# Copyright The PyTorch Lightning team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from typing import Any, Callable, Optional + +import paddleext.torchapi as B +from paddleext.torchapi import Tensor, tensor + +from paddlemetrics.functional.regression.r2 import _r2_score_compute, _r2_score_update +from paddlemetrics.metric import Metric + + +class R2Score(Metric): + r""" + Computes r2 score also known as `R2 Score_Coefficient Determination`_: + + .. math:: R^2 = 1 - \frac{SS_{res}}{SS_{tot}} + + where :math:`SS_{res}=\sum_i (y_i - f(x_i))^2` is the sum of residual squares, and + :math:`SS_{tot}=\sum_i (y_i - \bar{y})^2` is total sum of squares. Can also calculate + adjusted r2 score given by + + .. math:: R^2_{adj} = 1 - \frac{(1-R^2)(n-1)}{n-k-1} + + where the parameter :math:`k` (the number of independent regressors) should + be provided as the `adjusted` argument. + + Forward accepts + + - ``preds`` (float tensor): ``(N,)`` or ``(N, M)`` (multioutput) + - ``target`` (float tensor): ``(N,)`` or ``(N, M)`` (multioutput) + + In the case of multioutput, as default the variances will be uniformly + averaged over the additional dimensions. Please see argument `multioutput` + for changing this behavior. + + Args: + num_outputs: + Number of outputs in multioutput setting (default is 1) + adjusted: + number of independent regressors for calculating adjusted r2 score. + Default 0 (standard r2 score). + multioutput: + Defines aggregation in the case of multiple output scores. Can be one + of the following strings (default is ``'uniform_average'``.): + + * ``'raw_values'`` returns full set of scores + * ``'uniform_average'`` scores are uniformly averaged + * ``'variance_weighted'`` scores are weighted by their individual variances + + compute_on_step: + Forward only calls ``update()`` and return None if this is set to False. default: True + dist_sync_on_step: + Synchronize metric state across processes at each ``forward()`` + before returning the value at the step. default: False + process_group: + Specify the process group on which synchronization is called. default: None (which selects the entire world) + + Raises: + ValueError: + If ``adjusted`` parameter is not an integer larger or equal to 0. + ValueError: + If ``multioutput`` is not one of ``"raw_values"``, ``"uniform_average"`` or ``"variance_weighted"``. + + Example: + >>> from paddlemetrics import R2Score + >>> target = B.tensor([3, -0.5, 2, 7]) + >>> preds = B.tensor([2.5, 0.0, 2, 8]) + >>> r2score = R2Score() + >>> r2score(preds, target) + tensor(0.9486) + + >>> target = B.tensor([[0.5, 1], [-1, 1], [7, -6]]) + >>> preds = B.tensor([[0, 2], [-1, 2], [8, -5]]) + >>> r2score = R2Score(num_outputs=2, multioutput='raw_values') + >>> r2score(preds, target) + tensor([0.9654, 0.9082]) + + """ + is_differentiable = True + sum_squared_error: Tensor + sum_error: Tensor + residual: Tensor + total: Tensor + + def __init__( + self, + num_outputs: int = 1, + adjusted: int = 0, + multioutput: str = "uniform_average", + compute_on_step: bool = True, + dist_sync_on_step: bool = False, + process_group: Optional[Any] = None, + dist_sync_fn: Callable = None, + ) -> None: + super().__init__( + compute_on_step=compute_on_step, + dist_sync_on_step=dist_sync_on_step, + process_group=process_group, + dist_sync_fn=dist_sync_fn, + ) + + self.num_outputs = num_outputs + + if adjusted < 0 or not isinstance(adjusted, int): + raise ValueError("`adjusted` parameter should be an integer larger or equal to 0.") + self.adjusted = adjusted + + allowed_multioutput = ("raw_values", "uniform_average", "variance_weighted") + if multioutput not in allowed_multioutput: + raise ValueError( + f"Invalid input to argument `multioutput`. Choose one of the following: {allowed_multioutput}" + ) + self.multioutput = multioutput + + self.add_state("sum_squared_error", default=B.zeros(self.num_outputs), dist_reduce_fx="sum") + self.add_state("sum_error", default=B.zeros(self.num_outputs), dist_reduce_fx="sum") + self.add_state("residual", default=B.zeros(self.num_outputs), dist_reduce_fx="sum") + self.add_state("total", default=tensor(0), dist_reduce_fx="sum") + + def update(self, preds: Tensor, target: Tensor) -> None: # type: ignore + """Update state with predictions and targets. + + Args: + preds: Predictions from model + target: Ground truth values + """ + sum_squared_error, sum_error, residual, total = _r2_score_update(preds, target) + + self.sum_squared_error += sum_squared_error + self.sum_error += sum_error + self.residual += residual + self.total += total + + def compute(self) -> Tensor: + """Computes r2 score over the metric states.""" + return _r2_score_compute( + self.sum_squared_error, self.sum_error, self.residual, self.total, self.adjusted, self.multioutput + ) diff --git a/RE/paddlemetric/src/paddlemetrics/regression/spearman.py b/RE/paddlemetric/src/paddlemetrics/regression/spearman.py new file mode 100644 index 00000000..76249378 --- /dev/null +++ b/RE/paddlemetric/src/paddlemetrics/regression/spearman.py @@ -0,0 +1,96 @@ +# Copyright The PyTorch Lightning team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from typing import Any, Callable, List, Optional + +import paddleext.torchapi as B +from paddleext.torchapi import Tensor + +from paddlemetrics.functional.regression.spearman import _spearman_corrcoef_compute, _spearman_corrcoef_update +from paddlemetrics.metric import Metric +from paddlemetrics.utilities import rank_zero_warn +from paddlemetrics.utilities.data import dim_zero_cat + + +class SpearmanCorrcoef(Metric): + r""" + Computes `spearmans rank correlation coefficient`_. + + .. math: + r_s = = \frac{cov(rg_x, rg_y)}{\sigma_{rg_x} * \sigma_{rg_y}} + + where rg_x and rg_y are the rank associated to the variables x and y. Spearmans correlations coefficient + corresponds to the standard pearsons correlation coefficient calculated on the rank variables. + + Args: + compute_on_step: + Forward only calls ``update()`` and return None if this is set to False. default: True + dist_sync_on_step: + Synchronize metric state across processes at each ``forward()`` + before returning the value at the step. default: False + process_group: + Specify the process group on which synchronization is called. default: None (which selects the entire world) + dist_sync_fn: + Callback that performs the allgather operation on the metric state. When ``None``, DDP + will be used to perform the allgather + + Example: + >>> from paddlemetrics import SpearmanCorrcoef + >>> target = B.tensor([3, -0.5, 2, 7]) + >>> preds = B.tensor([2.5, 0.0, 2, 8]) + >>> spearman = SpearmanCorrcoef() + >>> spearman(preds, target) + tensor(1.0000) + + """ + is_differentiable = False + preds: List[Tensor] + target: List[Tensor] + + def __init__( + self, + compute_on_step: bool = True, + dist_sync_on_step: bool = False, + process_group: Optional[Any] = None, + dist_sync_fn: Optional[Callable] = None, + ) -> None: + super().__init__( + compute_on_step=compute_on_step, + dist_sync_on_step=dist_sync_on_step, + process_group=process_group, + dist_sync_fn=dist_sync_fn, + ) + rank_zero_warn( + "Metric `SpearmanCorrcoef` will save all targets and predictions in the buffer." + " For large datasets, this may lead to large memory footprint." + ) + + self.add_state("preds", default=[], dist_reduce_fx="cat") + self.add_state("target", default=[], dist_reduce_fx="cat") + + def update(self, preds: Tensor, target: Tensor) -> None: # type: ignore + """Update state with predictions and targets. + + Args: + preds: Predictions from model + target: Ground truth values + """ + preds, target = _spearman_corrcoef_update(preds, target) + self.preds.append(preds) + self.target.append(target) + + def compute(self) -> Tensor: + """Computes spearmans correlation coefficient.""" + preds = dim_zero_cat(self.preds) + target = dim_zero_cat(self.target) + return _spearman_corrcoef_compute(preds, target) diff --git a/RE/paddlemetric/src/paddlemetrics/regression/symmetric_mean_absolute_percentage_error.py b/RE/paddlemetric/src/paddlemetrics/regression/symmetric_mean_absolute_percentage_error.py new file mode 100644 index 00000000..3e545e08 --- /dev/null +++ b/RE/paddlemetric/src/paddlemetrics/regression/symmetric_mean_absolute_percentage_error.py @@ -0,0 +1,92 @@ +# Copyright The PyTorch Lightning team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from typing import Any, Callable, Optional + +import paddleext.torchapi as B +from paddleext.torchapi import Tensor, tensor + +from paddlemetrics.functional.regression.symmetric_mean_absolute_percentage_error import ( + _symmetric_mean_absolute_percentage_error_compute, + _symmetric_mean_absolute_percentage_error_update, +) +from paddlemetrics.metric import Metric + + +class SymmetricMeanAbsolutePercentageError(Metric): + r""" + Computes symmetric mean absolute percentage error (`SMAPE`_). + + .. math:: \text{SMAPE} = \frac{2}{n}\sum_1^n max(\frac{| y_i - \hat{y_i} |}{| y_i | + | \hat{y_i} |, \epsilon}) + + Where :math:`y` is a tensor of target values, and :math:`\hat{y}` is a tensor of predictions. + + Args: + compute_on_step: + Forward only calls ``update()`` and return None if this is set to False. + dist_sync_on_step: + Synchronize metric state across processes at each ``forward()`` before returning the value at the step. + process_group: + Specify the process group on which synchronization is called. default: None (which selects the entire world) + + Note: + The epsilon value is taken from `scikit-learn's implementation of SMAPE`_. + + Note: + SMAPE output is a non-negative floating point between 0 and 1. Best result is 0.0 . + + + Example: + >>> from paddlemetrics import SymmetricMeanAbsolutePercentageError + >>> target = B.tensor([1, 10, 1e6]) + >>> preds = B.tensor([0.9, 15, 1.2e6]) + >>> smape = SymmetricMeanAbsolutePercentageError() + >>> smape(preds, target) + tensor(0.2290) + """ + is_differentiable = True + sum_abs_per_error: Tensor + total: Tensor + + def __init__( + self, + compute_on_step: bool = True, + dist_sync_on_step: bool = False, + process_group: Optional[Any] = None, + dist_sync_fn: Callable = None, + ) -> None: + super().__init__( + compute_on_step=compute_on_step, + dist_sync_on_step=dist_sync_on_step, + process_group=process_group, + dist_sync_fn=dist_sync_fn, + ) + + self.add_state("sum_abs_per_error", default=tensor(0.0), dist_reduce_fx="sum") + self.add_state("total", default=tensor(0.0), dist_reduce_fx="sum") + + def update(self, preds: Tensor, target: Tensor) -> None: # type: ignore + """Update state with predictions and targets. + + Args: + preds: Predictions from model + target: Ground truth values + """ + sum_abs_per_error, num_obs = _symmetric_mean_absolute_percentage_error_update(preds, target) + + self.sum_abs_per_error += sum_abs_per_error + self.total += num_obs + + def compute(self) -> Tensor: + """Computes mean absolute percentage error over state.""" + return _symmetric_mean_absolute_percentage_error_compute(self.sum_abs_per_error, self.total) diff --git a/RE/paddlemetric/src/paddlemetrics/regression/tweedie_deviance.py b/RE/paddlemetric/src/paddlemetrics/regression/tweedie_deviance.py new file mode 100644 index 00000000..4687bdd5 --- /dev/null +++ b/RE/paddlemetric/src/paddlemetrics/regression/tweedie_deviance.py @@ -0,0 +1,116 @@ +# Copyright The PyTorch Lightning team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from typing import Any, Callable, Optional + +import paddleext.torchapi as B +from paddleext.torchapi import Tensor + +from paddlemetrics.functional.regression.tweedie_deviance import ( + _tweedie_deviance_score_compute, + _tweedie_deviance_score_update, +) +from paddlemetrics.metric import Metric + + +class TweedieDevianceScore(Metric): + r""" + Computes the `Tweedie Deviance Score`_ between targets and predictions: + + .. math:: + deviance\_score(\hat{y},y) = + \begin{cases} + (\hat{y} - y)^2, & \text{for }power=0\\ + 2 * (y * log(\frac{y}{\hat{y}}) + \hat{y} - y), & \text{for }power=1\\ + 2 * (log(\frac{\hat{y}}{y}) + \frac{y}{\hat{y}} - 1), & \text{for }power=2\\ + 2 * (\frac{(max(y,0))^{2}}{(1 - power)(2 - power)} - \frac{y(\hat{y})^{1 - power}}{1 - power} + \frac{(\hat{y}) + ^{2 - power}}{2 - power}), & \text{otherwise} + \end{cases} + + where :math:`y` is a tensor of targets values, and :math:`\hat{y}` is a tensor of predictions. + + Forward accepts + + - ``preds`` (float tensor): ``(N,...)`` + - ``targets`` (float tensor): ``(N,...)`` + + Args: + power: + - power < 0 : Extreme stable distribution. (Requires: preds > 0.) + - power = 0 : Normal distribution. (Requires: targets and preds can be any real numbers.) + - power = 1 : Poisson distribution. (Requires: targets >= 0 and y_pred > 0.) + - 1 < p < 2 : Compound Poisson distribution. (Requires: targets >= 0 and preds > 0.) + - power = 2 : Gamma distribution. (Requires: targets > 0 and preds > 0.) + - power = 3 : Inverse Gaussian distribution. (Requires: targets > 0 and preds > 0.) + - otherwise : Positive stable distribution. (Requires: targets > 0 and preds > 0.) + compute_on_step: + Forward only calls ``update()`` and return ``None`` if this is set to ``False``. + dist_sync_on_step: + Synchronize metric state across processes at each ``forward()`` + before returning the value at the step. + process_group: + Specify the process group on which synchronization is called. + default: ``None`` (which selects the entire world) + dist_sync_fn: + Callback that performs the allgather operation on the metric state. When ``None``, DDP + will be used to perform the all gather. + + Example: + >>> from paddlemetrics import TweedieDevianceScore + >>> targets = B.tensor([1.0, 2.0, 3.0, 4.0]) + >>> preds = B.tensor([4.0, 3.0, 2.0, 1.0]) + >>> deviance_score = TweedieDevianceScore(power=2) + >>> deviance_score(preds, targets) + tensor(1.2083) + + """ + is_differentiable = True + sum_deviance_score: Tensor + num_observations: Tensor + + def __init__( + self, + power: float = 0.0, + compute_on_step: bool = True, + dist_sync_on_step: bool = False, + process_group: Optional[Any] = None, + dist_sync_fn: Callable = None, + ) -> None: + super().__init__( + compute_on_step=compute_on_step, + dist_sync_on_step=dist_sync_on_step, + process_group=process_group, + dist_sync_fn=dist_sync_fn, + ) + if 0 < power < 1: + raise ValueError(f"Deviance Score is not defined for power={power}.") + + self.power: float = power + + self.add_state("sum_deviance_score", B.tensor(0.0), dist_reduce_fx="sum") + self.add_state("num_observations", B.tensor(0), dist_reduce_fx="sum") + + def update(self, preds: Tensor, targets: Tensor) -> None: # type: ignore + """Update metric states with predictions and targets. + + Args: + preds: Predicted tensor with shape ``(N,d)`` + targets: Ground truth tensor with shape ``(N,d)`` + """ + sum_deviance_score, num_observations = _tweedie_deviance_score_update(preds, targets, self.power) + + self.sum_deviance_score += sum_deviance_score + self.num_observations += num_observations + + def compute(self) -> Tensor: + return _tweedie_deviance_score_compute(self.sum_deviance_score, self.num_observations) diff --git a/RE/paddlemetric/src/paddlemetrics/retrieval/__init__.py b/RE/paddlemetric/src/paddlemetrics/retrieval/__init__.py new file mode 100644 index 00000000..208a0224 --- /dev/null +++ b/RE/paddlemetric/src/paddlemetrics/retrieval/__init__.py @@ -0,0 +1,22 @@ +# Copyright The PyTorch Lightning team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from paddlemetrics.retrieval.mean_average_precision import RetrievalMAP # noqa: F401 +from paddlemetrics.retrieval.mean_reciprocal_rank import RetrievalMRR # noqa: F401 +from paddlemetrics.retrieval.retrieval_fallout import RetrievalFallOut # noqa: F401 +from paddlemetrics.retrieval.retrieval_hit_rate import RetrievalHitRate # noqa: F401 +from paddlemetrics.retrieval.retrieval_metric import RetrievalMetric # noqa: F401 +from paddlemetrics.retrieval.retrieval_ndcg import RetrievalNormalizedDCG # noqa: F401 +from paddlemetrics.retrieval.retrieval_precision import RetrievalPrecision # noqa: F401 +from paddlemetrics.retrieval.retrieval_r_precision import RetrievalRPrecision # noqa: F401 +from paddlemetrics.retrieval.retrieval_recall import RetrievalRecall # noqa: F401 diff --git a/RE/paddlemetric/src/paddlemetrics/retrieval/mean_average_precision.py b/RE/paddlemetric/src/paddlemetrics/retrieval/mean_average_precision.py new file mode 100644 index 00000000..ee7f9065 --- /dev/null +++ b/RE/paddlemetric/src/paddlemetrics/retrieval/mean_average_precision.py @@ -0,0 +1,70 @@ +# Copyright The PyTorch Lightning team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from paddleext.torchapi import Tensor, tensor + +from paddlemetrics.functional.retrieval.average_precision import retrieval_average_precision +from paddlemetrics.retrieval.retrieval_metric import RetrievalMetric + + +class RetrievalMAP(RetrievalMetric): + """Computes `Mean Average Precision`_. + + Works with binary target data. Accepts float predictions from a model output. + + Forward accepts + + - ``preds`` (float tensor): ``(N, ...)`` + - ``target`` (long or bool tensor): ``(N, ...)`` + - ``indexes`` (long tensor): ``(N, ...)`` + + ``indexes``, ``preds`` and ``target`` must have the same dimension. + ``indexes`` indicate to which query a prediction belongs. + Predictions will be first grouped by ``indexes`` and then `MAP` will be computed as the mean + of the `Average Precisions` over each query. + + Args: + empty_target_action: + Specify what to do with queries that do not have at least a positive ``target``. Choose from: + + - ``'neg'``: those queries count as ``0.0`` (default) + - ``'pos'``: those queries count as ``1.0`` + - ``'skip'``: skip those queries; if all queries are skipped, ``0.0`` is returned + - ``'error'``: raise a ``ValueError`` + + compute_on_step: + Forward only calls ``update()`` and return None if this is set to False. default: True + dist_sync_on_step: + Synchronize metric state across processes at each ``forward()`` + before returning the value at the step. default: False + process_group: + Specify the process group on which synchronization is called. default: None (which selects + the entire world) + dist_sync_fn: + Callback that performs the allgather operation on the metric state. When `None`, DDP + will be used to perform the allgather. default: None + + Example: + >>> from paddlemetrics import RetrievalMAP + >>> indexes = tensor([0, 0, 0, 1, 1, 1, 1]) + >>> preds = tensor([0.2, 0.3, 0.5, 0.1, 0.3, 0.5, 0.2]) + >>> target = tensor([False, False, True, False, True, False, True]) + >>> rmap = RetrievalMAP() + >>> rmap(preds, target, indexes=indexes) + tensor(0.7917) + """ + + higher_is_better = True + + def _metric(self, preds: Tensor, target: Tensor) -> Tensor: + return retrieval_average_precision(preds, target) diff --git a/RE/paddlemetric/src/paddlemetrics/retrieval/mean_reciprocal_rank.py b/RE/paddlemetric/src/paddlemetrics/retrieval/mean_reciprocal_rank.py new file mode 100644 index 00000000..76f15bde --- /dev/null +++ b/RE/paddlemetric/src/paddlemetrics/retrieval/mean_reciprocal_rank.py @@ -0,0 +1,70 @@ +# Copyright The PyTorch Lightning team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from paddleext.torchapi import Tensor, tensor + +from paddlemetrics.functional.retrieval.reciprocal_rank import retrieval_reciprocal_rank +from paddlemetrics.retrieval.retrieval_metric import RetrievalMetric + + +class RetrievalMRR(RetrievalMetric): + """Computes `Mean Reciprocal Rank`_. + + Works with binary target data. Accepts float predictions from a model output. + + Forward accepts + + - ``preds`` (float tensor): ``(N, ...)`` + - ``target`` (long or bool tensor): ``(N, ...)`` + - ``indexes`` (long tensor): ``(N, ...)`` + + ``indexes``, ``preds`` and ``target`` must have the same dimension. + ``indexes`` indicate to which query a prediction belongs. + Predictions will be first grouped by ``indexes`` and then `MRR` will be computed as the mean + of the `Reciprocal Rank` over each query. + + Args: + empty_target_action: + Specify what to do with queries that do not have at least a positive ``target``. Choose from: + + - ``'neg'``: those queries count as ``0.0`` (default) + - ``'pos'``: those queries count as ``1.0`` + - ``'skip'``: skip those queries; if all queries are skipped, ``0.0`` is returned + - ``'error'``: raise a ``ValueError`` + + compute_on_step: + Forward only calls ``update()`` and return None if this is set to False. default: True + dist_sync_on_step: + Synchronize metric state across processes at each ``forward()`` + before returning the value at the step. default: False + process_group: + Specify the process group on which synchronization is called. default: None (which selects + the entire world) + dist_sync_fn: + Callback that performs the allgather operation on the metric state. When `None`, DDP + will be used to perform the allgather. default: None + + Example: + >>> from paddlemetrics import RetrievalMRR + >>> indexes = tensor([0, 0, 0, 1, 1, 1, 1]) + >>> preds = tensor([0.2, 0.3, 0.5, 0.1, 0.3, 0.5, 0.2]) + >>> target = tensor([False, False, True, False, True, False, True]) + >>> mrr = RetrievalMRR() + >>> mrr(preds, target, indexes=indexes) + tensor(0.7500) + """ + + higher_is_better = True + + def _metric(self, preds: Tensor, target: Tensor) -> Tensor: + return retrieval_reciprocal_rank(preds, target) diff --git a/RE/paddlemetric/src/paddlemetrics/retrieval/retrieval_fallout.py b/RE/paddlemetric/src/paddlemetrics/retrieval/retrieval_fallout.py new file mode 100644 index 00000000..38b70f7c --- /dev/null +++ b/RE/paddlemetric/src/paddlemetrics/retrieval/retrieval_fallout.py @@ -0,0 +1,131 @@ +# Copyright The PyTorch Lightning team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from typing import Any, Callable, Optional + +import paddleext.torchapi as B +from paddleext.torchapi import Tensor, tensor + +from paddlemetrics.functional.retrieval.fall_out import retrieval_fall_out +from paddlemetrics.retrieval.retrieval_metric import RetrievalMetric +from paddlemetrics.utilities.data import get_group_indexes + + +class RetrievalFallOut(RetrievalMetric): + """Computes `Fall-out`_. + + Works with binary target data. Accepts float predictions from a model output. + + Forward accepts: + + - ``preds`` (float tensor): ``(N, ...)`` + - ``target`` (long or bool tensor): ``(N, ...)`` + - ``indexes`` (long tensor): ``(N, ...)`` + + ``indexes``, ``preds`` and ``target`` must have the same dimension. + ``indexes`` indicate to which query a prediction belongs. + Predictions will be first grouped by ``indexes`` and then `Fall-out` will be computed as the mean + of the `Fall-out` over each query. + + Args: + empty_target_action: + Specify what to do with queries that do not have at least a negative ``target``. Choose from: + + - ``'neg'``: those queries count as ``0.0`` (default) + - ``'pos'``: those queries count as ``1.0`` + - ``'skip'``: skip those queries; if all queries are skipped, ``0.0`` is returned + - ``'error'``: raise a ``ValueError`` + + k: consider only the top k elements for each query (default: None, which considers them all) + compute_on_step: + Forward only calls ``update()`` and return None if this is set to False. default: True + dist_sync_on_step: + Synchronize metric state across processes at each ``forward()`` + before returning the value at the step. default: False + process_group: + Specify the process group on which synchronization is called. default: None (which selects + the entire world) + dist_sync_fn: + Callback that performs the allgather operation on the metric state. When `None`, DDP + will be used to perform the allgather. default: None + + Raises: + ValueError: + If ``k`` parameter is not `None` or an integer larger than 0 + + Example: + >>> from paddlemetrics import RetrievalFallOut + >>> indexes = tensor([0, 0, 0, 1, 1, 1, 1]) + >>> preds = tensor([0.2, 0.3, 0.5, 0.1, 0.3, 0.5, 0.2]) + >>> target = tensor([False, False, True, False, True, False, True]) + >>> fo = RetrievalFallOut(k=2) + >>> fo(preds, target, indexes=indexes) + tensor(0.5000) + """ + + higher_is_better = False + + def __init__( + self, + empty_target_action: str = "pos", + k: int = None, + compute_on_step: bool = True, + dist_sync_on_step: bool = False, + process_group: Optional[Any] = None, + dist_sync_fn: Callable = None, + ) -> None: + super().__init__( + empty_target_action=empty_target_action, + compute_on_step=compute_on_step, + dist_sync_on_step=dist_sync_on_step, + process_group=process_group, + dist_sync_fn=dist_sync_fn, + ) + + if (k is not None) and not (isinstance(k, int) and k > 0): + raise ValueError("`k` has to be a positive integer or None") + self.k = k + + def compute(self) -> Tensor: + """First concat state `indexes`, `preds` and `target` since they were stored as lists. + + After that, compute list of groups that will help in keeping together predictions about the same query. Finally, + for each group compute the `_metric` if the number of negative targets is at least 1, otherwise behave as + specified by `self.empty_target_action`. + """ + indexes = B.cat(self.indexes, dim=0) + preds = B.cat(self.preds, dim=0) + target = B.cat(self.target, dim=0) + + res = [] + groups = get_group_indexes(indexes) + + for group in groups: + mini_preds = preds[group] + mini_target = target[group] + + if not (1 - mini_target).sum(): + if self.empty_target_action == "error": + raise ValueError("`compute` method was provided with a query with no negative target.") + if self.empty_target_action == "pos": + res.append(tensor(1.0)) + elif self.empty_target_action == "neg": + res.append(tensor(0.0)) + else: + # ensure list containt only float tensors + res.append(self._metric(mini_preds, mini_target)) + + return B.stack([x.to(preds) for x in res]).mean() if res else tensor(0.0).to(preds) + + def _metric(self, preds: Tensor, target: Tensor) -> Tensor: + return retrieval_fall_out(preds, target, k=self.k) diff --git a/RE/paddlemetric/src/paddlemetrics/retrieval/retrieval_hit_rate.py b/RE/paddlemetric/src/paddlemetrics/retrieval/retrieval_hit_rate.py new file mode 100644 index 00000000..6a053b7b --- /dev/null +++ b/RE/paddlemetric/src/paddlemetrics/retrieval/retrieval_hit_rate.py @@ -0,0 +1,98 @@ +# Copyright The PyTorch Lightning team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from typing import Any, Callable, Optional + +from paddleext.torchapi import Tensor, tensor + +from paddlemetrics.functional.retrieval.hit_rate import retrieval_hit_rate +from paddlemetrics.retrieval.retrieval_metric import RetrievalMetric + + +class RetrievalHitRate(RetrievalMetric): + """Computes `IR HitRate`. + + Works with binary target data. Accepts float predictions from a model output. + + Forward accepts: + + - ``preds`` (float tensor): ``(N, ...)`` + - ``target`` (long or bool tensor): ``(N, ...)`` + - ``indexes`` (long tensor): ``(N, ...)`` + + ``indexes``, ``preds`` and ``target`` must have the same dimension. + ``indexes`` indicate to which query a prediction belongs. + Predictions will be first grouped by ``indexes`` and then the `Hit Rate` will be computed as the mean + of the `Hit Rate` over each query. + + Args: + empty_target_action: + Specify what to do with queries that do not have at least a positive ``target``. Choose from: + + - ``'neg'``: those queries count as ``0.0`` (default) + - ``'pos'``: those queries count as ``1.0`` + - ``'skip'``: skip those queries; if all queries are skipped, ``0.0`` is returned + - ``'error'``: raise a ``ValueError`` + + k: consider only the top k elements for each query (default: None, which considers them all) + compute_on_step: + Forward only calls ``update()`` and return None if this is set to False. default: True + dist_sync_on_step: + Synchronize metric state across processes at each ``forward()`` + before returning the value at the step. default: False + process_group: + Specify the process group on which synchronization is called. default: None (which selects + the entire world) + dist_sync_fn: + Callback that performs the allgather operation on the metric state. When `None`, DDP + will be used to perform the allgather. default: None + + Raises: + ValueError: + If ``k`` parameter is not `None` or an integer larger than 0 + + Example: + >>> from paddlemetrics import RetrievalHitRate + >>> indexes = tensor([0, 0, 0, 1, 1, 1, 1]) + >>> preds = tensor([0.2, 0.3, 0.5, 0.1, 0.3, 0.5, 0.2]) + >>> target = tensor([True, False, False, False, True, False, True]) + >>> hr2 = RetrievalHitRate(k=2) + >>> hr2(preds, target, indexes=indexes) + tensor(0.5000) + """ + + higher_is_better = True + + def __init__( + self, + empty_target_action: str = "neg", + k: int = None, + compute_on_step: bool = True, + dist_sync_on_step: bool = False, + process_group: Optional[Any] = None, + dist_sync_fn: Callable = None, + ) -> None: + super().__init__( + empty_target_action=empty_target_action, + compute_on_step=compute_on_step, + dist_sync_on_step=dist_sync_on_step, + process_group=process_group, + dist_sync_fn=dist_sync_fn, + ) + + if (k is not None) and not (isinstance(k, int) and k > 0): + raise ValueError("`k` has to be a positive integer or None") + self.k = k + + def _metric(self, preds: Tensor, target: Tensor) -> Tensor: + return retrieval_hit_rate(preds, target, k=self.k) diff --git a/RE/paddlemetric/src/paddlemetrics/retrieval/retrieval_metric.py b/RE/paddlemetric/src/paddlemetrics/retrieval/retrieval_metric.py new file mode 100644 index 00000000..ab43876f --- /dev/null +++ b/RE/paddlemetric/src/paddlemetrics/retrieval/retrieval_metric.py @@ -0,0 +1,147 @@ +# Copyright The PyTorch Lightning team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from abc import ABC, abstractmethod +from typing import Any, Callable, List, Optional + +import paddleext.torchapi as B +from paddleext.torchapi import Tensor, tensor + +from paddlemetrics import Metric +from paddlemetrics.utilities.checks import _check_retrieval_inputs +from paddlemetrics.utilities.data import get_group_indexes + +#: get_group_indexes is used to group predictions belonging to the same document + + +class RetrievalMetric(Metric): + """Works with binary target data. Accepts float predictions from a model output. + + Forward accepts + + - ``preds`` (float tensor): ``(N, ...)`` + - ``target`` (long or bool tensor): ``(N, ...)`` + - ``indexes`` (long tensor): ``(N, ...)`` + + `indexes`, `preds` and `target` must have the same dimension and will be flatten + to single dimension once provided. + + `indexes` indicate to which query a prediction belongs. + Predictions will be first grouped by indexes. Then the + real metric, defined by overriding the `_metric` method, + will be computed as the mean of the scores over each query. + + Args: + empty_target_action: + Specify what to do with queries that do not have at least a positive + or negative (depend on metric) target. Choose from: + + - ``'neg'``: those queries count as ``0.0`` (default) + - ``'pos'``: those queries count as ``1.0`` + - ``'skip'``: skip those queries; if all queries are skipped, ``0.0`` is returned + - ``'error'``: raise a ``ValueError`` + + compute_on_step: + Forward only calls ``update()`` and return None if this is set to False. default: True + dist_sync_on_step: + Synchronize metric state across processes at each ``forward()`` + before returning the value at the step. default: False + process_group: + Specify the process group on which synchronization is called. default: None (which selects + the entire world) + dist_sync_fn: + Callback that performs the allgather operation on the metric state. When `None`, DDP + will be used to perform the allgather. default: None + """ + + indexes: List[Tensor] + preds: List[Tensor] + target: List[Tensor] + higher_is_better = True + + def __init__( + self, + empty_target_action: str = "neg", + compute_on_step: bool = True, + dist_sync_on_step: bool = False, + process_group: Optional[Any] = None, + dist_sync_fn: Callable = None, + ) -> None: + super().__init__( + compute_on_step=compute_on_step, + dist_sync_on_step=dist_sync_on_step, + process_group=process_group, + dist_sync_fn=dist_sync_fn, + ) + self.allow_non_binary_target = False + + empty_target_action_options = ("error", "skip", "neg", "pos") + if empty_target_action not in empty_target_action_options: + raise ValueError(f"Argument `empty_target_action` received a wrong value `{empty_target_action}`.") + + self.empty_target_action = empty_target_action + + self.add_state("indexes", default=[], dist_reduce_fx=None) + self.add_state("preds", default=[], dist_reduce_fx=None) + self.add_state("target", default=[], dist_reduce_fx=None) + + def update(self, preds: Tensor, target: Tensor, indexes: Tensor) -> None: # type: ignore + """Check shape, check and convert dtypes, flatten and add to accumulators.""" + if indexes is None: + raise ValueError("Argument `indexes` cannot be None") + + indexes, preds, target = _check_retrieval_inputs( + indexes, preds, target, allow_non_binary_target=self.allow_non_binary_target + ) + + self.indexes.append(indexes) + self.preds.append(preds) + self.target.append(target) + + def compute(self) -> Tensor: + """First concat state ``indexes``, ``preds`` and ``target`` since they were stored as lists. + + After that, compute list of groups that will help in keeping together predictions about the same query. Finally, + for each group compute the ``_metric`` if the number of positive targets is at least 1, otherwise behave as + specified by ``self.empty_target_action``. + """ + indexes = B.cat(self.indexes, dim=0) + preds = B.cat(self.preds, dim=0) + target = B.cat(self.target, dim=0) + + res = [] + groups = get_group_indexes(indexes) + + for group in groups: + mini_preds = preds[group] + mini_target = target[group] + + if not mini_target.sum(): + if self.empty_target_action == "error": + raise ValueError("`compute` method was provided with a query with no positive target.") + if self.empty_target_action == "pos": + res.append(tensor(1.0)) + elif self.empty_target_action == "neg": + res.append(tensor(0.0)) + else: + # ensure list contains only float tensors + res.append(self._metric(mini_preds, mini_target)) + + return B.stack([x.to(preds) for x in res]).mean() if res else tensor(0.0).to(preds) + + @abstractmethod + def _metric(self, preds: Tensor, target: Tensor) -> Tensor: + """Compute a metric over a predictions and target of a single group. + + This method should be overridden by subclasses. + """ diff --git a/RE/paddlemetric/src/paddlemetrics/retrieval/retrieval_ndcg.py b/RE/paddlemetric/src/paddlemetrics/retrieval/retrieval_ndcg.py new file mode 100644 index 00000000..bb0740ca --- /dev/null +++ b/RE/paddlemetric/src/paddlemetrics/retrieval/retrieval_ndcg.py @@ -0,0 +1,99 @@ +# Copyright The PyTorch Lightning team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from typing import Any, Callable, Optional + +from paddleext.torchapi import Tensor, tensor + +from paddlemetrics.functional.retrieval.ndcg import retrieval_normalized_dcg +from paddlemetrics.retrieval.retrieval_metric import RetrievalMetric + + +class RetrievalNormalizedDCG(RetrievalMetric): + """Computes `Normalized Discounted Cumulative Gain`_. + + Works with binary or positive integer target data. Accepts float predictions from a model output. + + Forward accepts: + + - ``preds`` (float tensor): ``(N, ...)`` + - ``target`` (long, int, bool or float tensor): ``(N, ...)`` + - ``indexes`` (long tensor): ``(N, ...)`` + + ``indexes``, ``preds`` and ``target`` must have the same dimension. + ``indexes`` indicate to which query a prediction belongs. + Predictions will be first grouped by ``indexes`` and then `Normalized Discounted Cumulative Gain` + will be computed as the mean of the `Normalized Discounted Cumulative Gain` over each query. + + Args: + empty_target_action: + Specify what to do with queries that do not have at least a positive ``target``. Choose from: + + - ``'neg'``: those queries count as ``0.0`` (default) + - ``'pos'``: those queries count as ``1.0`` + - ``'skip'``: skip those queries; if all queries are skipped, ``0.0`` is returned + - ``'error'``: raise a ``ValueError`` + + k: consider only the top k elements for each query (default: None, which considers them all) + compute_on_step: + Forward only calls ``update()`` and return None if this is set to False. default: True + dist_sync_on_step: + Synchronize metric state across processes at each ``forward()`` + before returning the value at the step. default: False + process_group: + Specify the process group on which synchronization is called. default: None (which selects + the entire world) + dist_sync_fn: + Callback that performs the allgather operation on the metric state. When `None`, DDP + will be used to perform the allgather. default: None + + Raises: + ValueError: + If ``k`` parameter is not `None` or an integer larger than 0 + + Example: + >>> from paddlemetrics import RetrievalNormalizedDCG + >>> indexes = tensor([0, 0, 0, 1, 1, 1, 1]) + >>> preds = tensor([0.2, 0.3, 0.5, 0.1, 0.3, 0.5, 0.2]) + >>> target = tensor([False, False, True, False, True, False, True]) + >>> ndcg = RetrievalNormalizedDCG() + >>> ndcg(preds, target, indexes=indexes) + tensor(0.8467) + """ + + higher_is_better = True + + def __init__( + self, + empty_target_action: str = "neg", + k: int = None, + compute_on_step: bool = True, + dist_sync_on_step: bool = False, + process_group: Optional[Any] = None, + dist_sync_fn: Callable = None, + ) -> None: + super().__init__( + empty_target_action=empty_target_action, + compute_on_step=compute_on_step, + dist_sync_on_step=dist_sync_on_step, + process_group=process_group, + dist_sync_fn=dist_sync_fn, + ) + + if (k is not None) and not (isinstance(k, int) and k > 0): + raise ValueError("`k` has to be a positive integer or None") + self.k = k + self.allow_non_binary_target = True + + def _metric(self, preds: Tensor, target: Tensor) -> Tensor: + return retrieval_normalized_dcg(preds, target, k=self.k) diff --git a/RE/paddlemetric/src/paddlemetrics/retrieval/retrieval_precision.py b/RE/paddlemetric/src/paddlemetrics/retrieval/retrieval_precision.py new file mode 100644 index 00000000..f0f983a8 --- /dev/null +++ b/RE/paddlemetric/src/paddlemetrics/retrieval/retrieval_precision.py @@ -0,0 +1,98 @@ +# Copyright The PyTorch Lightning team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from typing import Any, Callable, Optional + +from paddleext.torchapi import Tensor, tensor + +from paddlemetrics.functional.retrieval.precision import retrieval_precision +from paddlemetrics.retrieval.retrieval_metric import RetrievalMetric + + +class RetrievalPrecision(RetrievalMetric): + """Computes `IR Precision`_. + + Works with binary target data. Accepts float predictions from a model output. + + Forward accepts: + + - ``preds`` (float tensor): ``(N, ...)`` + - ``target`` (long or bool tensor): ``(N, ...)`` + - ``indexes`` (long tensor): ``(N, ...)`` + + ``indexes``, ``preds`` and ``target`` must have the same dimension. + ``indexes`` indicate to which query a prediction belongs. + Predictions will be first grouped by ``indexes`` and then `Precision` will be computed as the mean + of the `Precision` over each query. + + Args: + empty_target_action: + Specify what to do with queries that do not have at least a positive ``target``. Choose from: + + - ``'neg'``: those queries count as ``0.0`` (default) + - ``'pos'``: those queries count as ``1.0`` + - ``'skip'``: skip those queries; if all queries are skipped, ``0.0`` is returned + - ``'error'``: raise a ``ValueError`` + + k: consider only the top k elements for each query (default: None, which considers them all) + compute_on_step: + Forward only calls ``update()`` and return None if this is set to False. default: True + dist_sync_on_step: + Synchronize metric state across processes at each ``forward()`` + before returning the value at the step. default: False + process_group: + Specify the process group on which synchronization is called. default: None (which selects + the entire world) + dist_sync_fn: + Callback that performs the allgather operation on the metric state. When `None`, DDP + will be used to perform the allgather. default: None + + Raises: + ValueError: + If ``k`` parameter is not `None` or an integer larger than 0 + + Example: + >>> from paddlemetrics import RetrievalPrecision + >>> indexes = tensor([0, 0, 0, 1, 1, 1, 1]) + >>> preds = tensor([0.2, 0.3, 0.5, 0.1, 0.3, 0.5, 0.2]) + >>> target = tensor([False, False, True, False, True, False, True]) + >>> p2 = RetrievalPrecision(k=2) + >>> p2(preds, target, indexes=indexes) + tensor(0.5000) + """ + + higher_is_better = True + + def __init__( + self, + empty_target_action: str = "neg", + k: int = None, + compute_on_step: bool = True, + dist_sync_on_step: bool = False, + process_group: Optional[Any] = None, + dist_sync_fn: Callable = None, + ) -> None: + super().__init__( + empty_target_action=empty_target_action, + compute_on_step=compute_on_step, + dist_sync_on_step=dist_sync_on_step, + process_group=process_group, + dist_sync_fn=dist_sync_fn, + ) + + if (k is not None) and not (isinstance(k, int) and k > 0): + raise ValueError("`k` has to be a positive integer or None") + self.k = k + + def _metric(self, preds: Tensor, target: Tensor) -> Tensor: + return retrieval_precision(preds, target, k=self.k) diff --git a/RE/paddlemetric/src/paddlemetrics/retrieval/retrieval_r_precision.py b/RE/paddlemetric/src/paddlemetrics/retrieval/retrieval_r_precision.py new file mode 100644 index 00000000..75373532 --- /dev/null +++ b/RE/paddlemetric/src/paddlemetrics/retrieval/retrieval_r_precision.py @@ -0,0 +1,70 @@ +# Copyright The PyTorch Lightning team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from paddleext.torchapi import Tensor, tensor + +from paddlemetrics.functional.retrieval.r_precision import retrieval_r_precision +from paddlemetrics.retrieval.retrieval_metric import RetrievalMetric + + +class RetrievalRPrecision(RetrievalMetric): + """Computes `IR R-Precision`_. + + Works with binary target data. Accepts float predictions from a model output. + + Forward accepts: + + - ``preds`` (float tensor): ``(N, ...)`` + - ``target`` (long or bool tensor): ``(N, ...)`` + - ``indexes`` (long tensor): ``(N, ...)`` + + ``indexes``, ``preds`` and ``target`` must have the same dimension. + ``indexes`` indicate to which query a prediction belongs. + Predictions will be first grouped by ``indexes`` and then `R-Precision` will be computed as the mean + of the `R-Precision` over each query. + + Args: + empty_target_action: + Specify what to do with queries that do not have at least a positive ``target``. Choose from: + + - ``'neg'``: those queries count as ``0.0`` (default) + - ``'pos'``: those queries count as ``1.0`` + - ``'skip'``: skip those queries; if all queries are skipped, ``0.0`` is returned + - ``'error'``: raise a ``ValueError`` + + compute_on_step: + Forward only calls ``update()`` and return None if this is set to False. default: True + dist_sync_on_step: + Synchronize metric state across processes at each ``forward()`` + before returning the value at the step. default: False + process_group: + Specify the process group on which synchronization is called. default: None (which selects + the entire world) + dist_sync_fn: + Callback that performs the allgather operation on the metric state. When `None`, DDP + will be used to perform the allgather. default: None + + Example: + >>> from paddlemetrics import RetrievalRPrecision + >>> indexes = tensor([0, 0, 0, 1, 1, 1, 1]) + >>> preds = tensor([0.2, 0.3, 0.5, 0.1, 0.3, 0.5, 0.2]) + >>> target = tensor([False, False, True, False, True, False, True]) + >>> p2 = RetrievalRPrecision() + >>> p2(preds, target, indexes=indexes) + tensor(0.7500) + """ + + higher_is_better = True + + def _metric(self, preds: Tensor, target: Tensor) -> Tensor: + return retrieval_r_precision(preds, target) diff --git a/RE/paddlemetric/src/paddlemetrics/retrieval/retrieval_recall.py b/RE/paddlemetric/src/paddlemetrics/retrieval/retrieval_recall.py new file mode 100644 index 00000000..26ace51c --- /dev/null +++ b/RE/paddlemetric/src/paddlemetrics/retrieval/retrieval_recall.py @@ -0,0 +1,98 @@ +# Copyright The PyTorch Lightning team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from typing import Any, Callable, Optional + +from paddleext.torchapi import Tensor, tensor + +from paddlemetrics.functional.retrieval.recall import retrieval_recall +from paddlemetrics.retrieval.retrieval_metric import RetrievalMetric + + +class RetrievalRecall(RetrievalMetric): + """Computes `IR Recall`_. + + Works with binary target data. Accepts float predictions from a model output. + + Forward accepts: + + - ``preds`` (float tensor): ``(N, ...)`` + - ``target`` (long or bool tensor): ``(N, ...)`` + - ``indexes`` (long tensor): ``(N, ...)`` + + ``indexes``, ``preds`` and ``target`` must have the same dimension. + ``indexes`` indicate to which query a prediction belongs. + Predictions will be first grouped by ``indexes`` and then `Recall` will be computed as the mean + of the `Recall` over each query. + + Args: + empty_target_action: + Specify what to do with queries that do not have at least a positive ``target``. Choose from: + + - ``'neg'``: those queries count as ``0.0`` (default) + - ``'pos'``: those queries count as ``1.0`` + - ``'skip'``: skip those queries; if all queries are skipped, ``0.0`` is returned + - ``'error'``: raise a ``ValueError`` + + k: consider only the top k elements for each query (default: None, which considers them all) + compute_on_step: + Forward only calls ``update()`` and return None if this is set to False. default: True + dist_sync_on_step: + Synchronize metric state across processes at each ``forward()`` + before returning the value at the step. default: False + process_group: + Specify the process group on which synchronization is called. default: None (which selects + the entire world) + dist_sync_fn: + Callback that performs the allgather operation on the metric state. When `None`, DDP + will be used to perform the allgather. default: None + + Raises: + ValueError: + If ``k`` parameter is not `None` or an integer larger than 0 + + Example: + >>> from paddlemetrics import RetrievalRecall + >>> indexes = tensor([0, 0, 0, 1, 1, 1, 1]) + >>> preds = tensor([0.2, 0.3, 0.5, 0.1, 0.3, 0.5, 0.2]) + >>> target = tensor([False, False, True, False, True, False, True]) + >>> r2 = RetrievalRecall(k=2) + >>> r2(preds, target, indexes=indexes) + tensor(0.7500) + """ + + higher_is_better = True + + def __init__( + self, + empty_target_action: str = "neg", + k: int = None, + compute_on_step: bool = True, + dist_sync_on_step: bool = False, + process_group: Optional[Any] = None, + dist_sync_fn: Callable = None, + ) -> None: + super().__init__( + empty_target_action=empty_target_action, + compute_on_step=compute_on_step, + dist_sync_on_step=dist_sync_on_step, + process_group=process_group, + dist_sync_fn=dist_sync_fn, + ) + + if (k is not None) and not (isinstance(k, int) and k > 0): + raise ValueError("`k` has to be a positive integer or None") + self.k = k + + def _metric(self, preds: Tensor, target: Tensor) -> Tensor: + return retrieval_recall(preds, target, k=self.k) diff --git a/RE/paddlemetric/src/paddlemetrics/setup_tools.py b/RE/paddlemetric/src/paddlemetrics/setup_tools.py new file mode 100644 index 00000000..e3233cef --- /dev/null +++ b/RE/paddlemetric/src/paddlemetrics/setup_tools.py @@ -0,0 +1,74 @@ +# Copyright The PyTorch Lightning team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import os +import re +from typing import List + +_PROJECT_ROOT = os.path.dirname(os.path.dirname(__file__)) + + +def _load_requirements(path_dir: str, file_name: str = "requirements.txt", comment_char: str = "#") -> List[str]: + """Load requirements from a file. + + >>> _load_requirements(_PROJECT_ROOT) # doctest: +ELLIPSIS +NORMALIZE_WHITESPACE + ['numpy...', 'B...'] + """ + with open(os.path.join(path_dir, file_name)) as file: + lines = [ln.strip() for ln in file.readlines()] + reqs = [] + for ln in lines: + # filer all comments + if comment_char in ln: + ln = ln[: ln.index(comment_char)].strip() + # skip directly installed dependencies + if ln.startswith("http"): + continue + if ln: # if requirement is not empty + reqs.append(ln) + return reqs + + +def _load_readme_description(path_dir: str, homepage: str, version: str) -> str: + """Load readme as decribtion. + + >>> _load_readme_description(_PROJECT_ROOT, "", "") # doctest: +ELLIPSIS +NORMALIZE_WHITESPACE + '
...' + """ + path_readme = os.path.join(path_dir, "README.md") + with open(path_readme, encoding="utf-8") as fp: + text = fp.read() + + # https://github.com/PyTorchLightning/paddlemetrics/raw/master/docs/source/_static/images/lightning_module/pt_to_pl.png + github_source_url = os.path.join(homepage, "raw", version) + # replace relative repository path to absolute link to the release + # do not replace all "docs" as in the readme we reger some other sources with particular path to docs + text = text.replace("docs/source/_static/", f"{os.path.join(github_source_url, 'docs/source/_static/')}") + + # readthedocs badge + text = text.replace("badge/?version=stable", f"badge/?version={version}") + text = text.replace("paddlemetrics.readthedocs.io/en/stable/", f"paddlemetrics.readthedocs.io/en/{version}") + # codecov badge + text = text.replace("/branch/master/graph/badge.svg", f"/release/{version}/graph/badge.svg") + # replace github badges for release ones + text = text.replace("badge.svg?branch=master&event=push", f"badge.svg?tag={version}") + # Azure... + text = text.replace("?branchName=master", f"?branchName=refs%2Ftags%2F{version}") + text = re.sub(r"\?definitionId=\d+&branchName=master", f"?definitionId=2&branchName=refs%2Ftags%2F{version}", text) + + skip_begin = r"" + skip_end = r"" + # todo: wrap content as commented description + text = re.sub(rf"{skip_begin}.+?{skip_end}", "", text, flags=re.IGNORECASE + re.DOTALL) + + return text diff --git a/RE/paddlemetric/src/paddlemetrics/text/__init__.py b/RE/paddlemetric/src/paddlemetrics/text/__init__.py new file mode 100644 index 00000000..782ca295 --- /dev/null +++ b/RE/paddlemetric/src/paddlemetrics/text/__init__.py @@ -0,0 +1,18 @@ +# Copyright The PyTorch Lightning team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +#from paddlemetrics.text.bert import BERTScore # noqa: F401 +from paddlemetrics.text.bleu import BLEUScore # noqa: F401 +from paddlemetrics.text.rouge import ROUGEScore # noqa: F401 +from paddlemetrics.text.sacre_bleu import SacreBLEUScore # noqa: F401 +from paddlemetrics.text.wer import WER # noqa: F401 diff --git a/RE/paddlemetric/src/paddlemetrics/text/bert.py b/RE/paddlemetric/src/paddlemetrics/text/bert.py new file mode 100644 index 00000000..0f602f30 --- /dev/null +++ b/RE/paddlemetric/src/paddlemetrics/text/bert.py @@ -0,0 +1,251 @@ +# Copyright The PyTorch Lightning team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import warnings +from typing import Any, Callable, Dict, List, Optional, Union + +import paddleext.torchapi as B + +from paddlemetrics.functional import bert_score +from paddlemetrics.functional.text.bert import _preprocess_text +from paddlemetrics.metric import Metric +from paddlemetrics.utilities.imports import _TRANSFORMERS_AVAILABLE + +if _TRANSFORMERS_AVAILABLE: + from transformers import AutoTokenizer + + +# Default model recommended in the original implementation. +_DEFAULT_MODEL = "roberta-large" + + +def _concatenate(d: Dict[str, List[B.Tensor]]) -> Dict[str, B.Tensor]: + """Concatenate list of tensors within a given dictionary.""" + output_dict: Dict[str, B.Tensor] = {} + for k, v in d.items(): + output_dict[k] = B.cat(v) + return output_dict + + +class BERTScore(Metric): + """`Bert_score Evaluating Text Generation`_ leverages the pre-trained contextual embeddings from BERT and + matches words in candidate and reference sentences by cosine similarity. It has been shown to correlate with + human judgment on sentence-level and system-level evaluation. Moreover, BERTScore computes precision, recall, + and F1 measure, which can be useful for evaluating different language generation tasks. + + This implemenation follows the original implementation from `BERT_score`_. + + Args: + predictions: + An iterable of predicted sentences. + references: + An iterable of target sentences. + model_type: + A name or a model path used to load `transformers` pretrained model. + num_layers: + A layer of representation to use. + all_layers: + An indication of whether the representation from all model's layers should be used. + If `all_layers = True`, the argument `num_layers` is ignored. + model: + A user's own model. Must be of `B.nn.Module` instance. + user_tokenizer: + A user's own tokenizer used with the own model. This must be an instance with the `__call__` method. + This method must take an iterable of sentences (`List[str]`) and must return a python dictionary + containing `"input_ids"` and `"attention_mask"` represented by `B.Tensor`. It is up to the user's model + of whether `"input_ids"` is a `B.Tensor` of input ids or embedding vectors. + This tokenizer must prepend an equivalent of `[CLS]` token and append an equivalent of `[SEP]` token + as `transformers` tokenizer does. + user_forward_fn: + A user's own forward function used in a combination with `user_model`. This function must take `user_model` + and a python dictionary of containing `"input_ids"` and `"attention_mask"` represented by `B.Tensor` + as an input and return the model's output represented by the single `B.Tensor`. + verbose: + An indication of whether a progress bar to be displayed during the embeddings calculation. + idf: + An indication whether normalization using inverse document frequencies should be used. + device: + A device to be used for calculation. + max_length: + A maximum length of input sequences. Sequences longer than `max_length` are to be trimmed. + batch_size: + A batch size used for model processing. + num_threads: + A number of threads to use for a dataloader. + return_hash: + An indication of whether the correspodning `hash_code` should be returned. + lang: + A language of input sentences. + rescale_with_baseline: + An indication of whether bertscore should be rescaled with a pre-computed baseline. + When a pretrained model from `transformers` model is used, the corresponding baseline is downloaded + from the original `bert-score` package from `BERT_score`_ if available. + In other cases, please specify a path to the baseline csv/tsv file, which must follow the formatting + of the files from `BERT_score`_. + baseline_path: + A path to the user's own local csv/tsv file with the baseline scale. + baseline_url: + A url path to the user's own csv/tsv file with the baseline scale. + compute_on_step: + Forward only calls ``update()`` and return None if this is set to False. default: True + dist_sync_on_step: + Synchronize metric state across processes at each ``forward()`` + before returning the value at the step. default: False + process_group: + Specify the process group on which synchronization is called. default: None (which selects the entire world) + dist_sync_fn: + Callback that performs the allgather operation on the metric state. When ``None``, DDP + will be used to perform the allgather + + Returns: + Python dictionary containing the keys `precision`, `recall` and `f1` with corresponding values. + + Example: + >>> predictions = ["hello there", "general kenobi"] + >>> references = ["hello there", "master kenobi"] + >>> bertscore = BERTScore() + >>> bertscore.update(predictions=predictions,references=references) + >>> bertscore.compute() # doctest: +SKIP + {'precision': [0.99..., 0.99...], + 'recall': [0.99..., 0.99...], + 'f1': [0.99..., 0.99...]} + """ + + higher_is_better = True + + def __init__( + self, + model_name_or_path: Optional[str] = None, + num_layers: Optional[int] = None, + all_layers: bool = False, + model: Optional[B.nn.Module] = None, + user_tokenizer: Optional[Any] = None, + user_forward_fn: Callable[[B.nn.Module, Dict[str, B.Tensor]], B.Tensor] = None, + verbose: bool = False, + idf: bool = False, + device: Optional[Union[str, B.device]] = None, + max_length: int = 512, + batch_size: int = 64, + num_threads: int = 4, + return_hash: bool = False, + lang: str = "en", + rescale_with_baseline: bool = False, + baseline_path: Optional[str] = None, + baseline_url: Optional[str] = None, + compute_on_step: bool = True, + dist_sync_on_step: bool = False, + process_group: Optional[Any] = None, + dist_sync_fn: Callable = None, + ): + super().__init__( + compute_on_step=compute_on_step, + dist_sync_on_step=dist_sync_on_step, + process_group=process_group, + dist_sync_fn=dist_sync_fn, + ) + self.model_name_or_path = model_name_or_path + self.num_layers = num_layers + self.all_layers = all_layers + self.model = model + self.user_forward_fn = user_forward_fn + self.verbose = verbose + self.idf = idf + self.embedding_device = device + self.max_length = max_length + self.batch_size = batch_size + self.num_threads = num_threads + self.return_hash = return_hash + self.lang = lang + self.rescale_with_baseline = rescale_with_baseline + self.baseline_path = baseline_path + self.baseline_url = baseline_url + self.predictions: Dict[str, List[B.Tensor]] = {"input_ids": [], "attention_mask": []} + self.references: Dict[str, List[B.Tensor]] = {"input_ids": [], "attention_mask": []} + + if user_tokenizer: + self.tokenizer = user_tokenizer + self.user_tokenizer = True + else: + if not _TRANSFORMERS_AVAILABLE: + raise ValueError( + "`BERTScore` metric with default tokenizers requires `transformers` package be installed. " + "Either install with `pip install transformers>=4.0` or `pip install paddlemetrics[text]`" + ) + if not model_name_or_path: + model_name_or_path = _DEFAULT_MODEL + warnings.warn( + "The argument `model_name_or_path` was not specified while it is required when default " + " `transformers` model are used." + f"It is, therefore, used the default recommended model - {_DEFAULT_MODEL}." + ) + self.tokenizer = AutoTokenizer.from_pretrained(model_name_or_path) + self.user_tokenizer = False + + def update(self, predictions: List[str], references: List[str]) -> None: # type: ignore + """Store predictions/references for computing BERT scores. It is necessary to store sentences in a + tokenized form to ensure the DDP mode working. + + Args: + predictions: + An iterable of predicted sentences. + references: + An iterable of predicted sentences. + """ + predictions_dict = _preprocess_text( + predictions, + self.tokenizer, + self.max_length, + truncation=False, + sort_according_length=False, + own_tokenizer=self.user_tokenizer, + ) + references_dict = _preprocess_text( + references, + self.tokenizer, + self.max_length, + truncation=False, + sort_according_length=False, + own_tokenizer=self.user_tokenizer, + ) + self.predictions["input_ids"].append(predictions_dict["input_ids"]) + self.predictions["attention_mask"].append(predictions_dict["attention_mask"]) + self.references["input_ids"].append(references_dict["input_ids"]) + self.references["attention_mask"].append(references_dict["attention_mask"]) + + def compute(self) -> Dict[str, Union[List[float], str]]: + """Calculate BERT scores. + + Return: + Python dictionary containing the keys `precision`, `recall` and `f1` with corresponding values. + """ + return bert_score( + predictions=_concatenate(self.predictions), + references=_concatenate(self.references), + model_name_or_path=self.model_name_or_path, + num_layers=self.num_layers, + all_layers=self.all_layers, + model=self.model, + user_tokenizer=self.tokenizer if self.user_tokenizer else None, + user_forward_fn=self.user_forward_fn, + verbose=self.verbose, + idf=self.idf, + device=self.embedding_device, + max_length=self.max_length, + batch_size=self.batch_size, + num_threads=self.num_threads, + return_hash=self.return_hash, + lang=self.lang, + rescale_with_baseline=self.rescale_with_baseline, + baseline_path=self.baseline_path, + baseline_url=self.baseline_url, + ) diff --git a/RE/paddlemetric/src/paddlemetrics/text/bleu.py b/RE/paddlemetric/src/paddlemetrics/text/bleu.py new file mode 100644 index 00000000..46937d98 --- /dev/null +++ b/RE/paddlemetric/src/paddlemetrics/text/bleu.py @@ -0,0 +1,120 @@ +# Copyright The PyTorch Lightning team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# referenced from +# Library Name: torchtext +# Authors: torchtext authors and @sluks +# Date: 2020-07-18 +# Link: https://pyB.org/text/_modules/torchtext/data/metrics.html#bleu_score +from typing import Any, Callable, Optional, Sequence + +import paddleext.torchapi as B +from paddleext.torchapi import Tensor, tensor + +from paddlemetrics import Metric +from paddlemetrics.functional.text.bleu import _bleu_score_compute, _bleu_score_update + + +class BLEUScore(Metric): + """Calculate `BLEU score`_ of machine translated text with one or more references. + + Args: + n_gram: + Gram value ranged from 1 to 4 (Default 4) + smooth: + Whether or not to apply smoothing – see [2] + compute_on_step: + Forward only calls ``update()`` and returns None if this is set to False. default: True + dist_sync_on_step: + Synchronize metric state across processes at each ``forward()`` + before returning the value at the step. + process_group: + Specify the process group on which synchronization is called. default: None (which selects the entire world) + dist_sync_fn: + Callback that performs the allgather operation on the metric state. When `None`, DDP + will be used to perform the allgather. + + Example: + >>> translate_corpus = ['the cat is on the mat'.split()] + >>> reference_corpus = [['there is a cat on the mat'.split(), 'a cat is on the mat'.split()]] + >>> metric = BLEUScore() + >>> metric(reference_corpus, translate_corpus) + tensor(0.7598) + + References: + [1] BLEU: a Method for Automatic Evaluation of Machine Translation by Papineni, + Kishore, Salim Roukos, Todd Ward, and Wei-Jing Zhu `BLEU`_ + + [2] Automatic Evaluation of Machine Translation Quality Using Longest Common Subsequence + and Skip-Bigram Statistics by Chin-Yew Lin and Franz Josef Och `Machine Translation Evolution`_ + """ + + is_differentiable = False + higher_is_better = True + trans_len: Tensor + ref_len: Tensor + numerator: Tensor + denominator: Tensor + + def __init__( + self, + n_gram: int = 4, + smooth: bool = False, + compute_on_step: bool = True, + dist_sync_on_step: bool = False, + process_group: Optional[Any] = None, + dist_sync_fn: Optional[Callable] = None, + ): + super().__init__( + compute_on_step=compute_on_step, + dist_sync_on_step=dist_sync_on_step, + process_group=process_group, + dist_sync_fn=dist_sync_fn, + ) + + self.n_gram = n_gram + self.smooth = smooth + + self.add_state("trans_len", tensor(0, dtype=B.float), dist_reduce_fx="sum") + self.add_state("ref_len", tensor(0, dtype=B.float), dist_reduce_fx="sum") + self.add_state("numerator", B.zeros(self.n_gram), dist_reduce_fx="sum") + self.add_state("denominator", B.zeros(self.n_gram), dist_reduce_fx="sum") + + def update( # type: ignore + self, reference_corpus: Sequence[Sequence[Sequence[str]]], translate_corpus: Sequence[Sequence[str]] + ) -> None: + """Compute Precision Scores. + + Args: + reference_corpus: An iterable of iterables of reference corpus + translate_corpus: An iterable of machine translated corpus + """ + self.trans_len, self.ref_len = _bleu_score_update( + reference_corpus, + translate_corpus, + self.numerator, + self.denominator, + self.trans_len, + self.ref_len, + self.n_gram, + ) + + def compute(self) -> Tensor: + """Calculate BLEU score. + + Return: + Tensor with BLEU Score + """ + return _bleu_score_compute( + self.trans_len, self.ref_len, self.numerator, self.denominator, self.n_gram, self.smooth + ) diff --git a/RE/paddlemetric/src/paddlemetrics/text/rouge.py b/RE/paddlemetric/src/paddlemetrics/text/rouge.py new file mode 100644 index 00000000..254f366d --- /dev/null +++ b/RE/paddlemetric/src/paddlemetrics/text/rouge.py @@ -0,0 +1,171 @@ +# Copyright The PyTorch Lightning team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import warnings +from typing import Any, Callable, Dict, List, Optional, Tuple, Union + +from paddleext.torchapi import Tensor + +from paddlemetrics import Metric +from paddlemetrics.functional.text.rouge import ALLOWED_ROUGE_KEYS, _rouge_score_compute, _rouge_score_update +from paddlemetrics.utilities.imports import _NLTK_AVAILABLE + + +class ROUGEScore(Metric): + """`Calculate Rouge Score`_, used for automatic summarization. This implementation should imitate the behaviour + of the `rouge-score` package `Python ROUGE Implementation` + + Args: + newline_sep: + New line separate the inputs. + This argument has not been in use any more. It is deprecated in v0.6 and will be removed in v0.7. + use_stemmer: + Use Porter stemmer to strip word suffixes to improve matching. + rouge_keys: + A list of rouge types to calculate. + Keys that are allowed are ``rougeL``, ``rougeLsum``, and ``rouge1`` through ``rouge9``. + decimal_places: + The number of digits to round the computed the values to. + This argument has not been in usd any more. It is deprecated in v0.6 and will be removed in v0.7. + compute_on_step: + Forward only calls ``update()`` and returns None if this is set to False. default: True + dist_sync_on_step: + Synchronize metric state across processes at each ``forward()`` + before returning the value at the step. + process_group: + Specify the process group on which synchronization is called. default: None (which selects the entire world) + dist_sync_fn: + Callback that performs the allgather operation on the metric state. When `None`, DDP + will be used to perform the allgather. + + Example: + + >>> targets = "Is your name John".split() + >>> preds = "My name is John".split() + >>> rouge = ROUGEScore() # doctest: +SKIP + >>> from pprint import pprint + >>> pprint(rouge(preds, targets)) # doctest: +NORMALIZE_WHITESPACE +SKIP + {'rouge1_fmeasure': 0.25, + 'rouge1_precision': 0.25, + 'rouge1_recall': 0.25, + 'rouge2_fmeasure': 0.0, + 'rouge2_precision': 0.0, + 'rouge2_recall': 0.0, + 'rougeL_fmeasure': 0.25, + 'rougeL_precision': 0.25, + 'rougeL_recall': 0.25, + 'rougeLsum_fmeasure': 0.25, + 'rougeLsum_precision': 0.25, + 'rougeLsum_recall': 0.25} + + Raises: + ValueError: + If the python packages ``nltk`` is not installed. + ValueError: + If any of the ``rouge_keys`` does not belong to the allowed set of keys. + + References: + [1] ROUGE: A Package for Automatic Evaluation of Summaries by Chin-Yew Lin `Rouge Detail`_ + """ + + higher_is_better = True + + def __init__( + self, + newline_sep: Optional[bool] = None, # remove in v0.7 + use_stemmer: bool = False, + rouge_keys: Union[str, Tuple[str, ...]] = ("rouge1", "rouge2", "rougeL", "rougeLsum"), # type: ignore + decimal_places: Optional[bool] = None, # remove in v0.7 + compute_on_step: bool = True, + dist_sync_on_step: bool = False, + process_group: Optional[Any] = None, + dist_sync_fn: Optional[Callable] = None, + ): + super().__init__( + compute_on_step=compute_on_step, + dist_sync_on_step=dist_sync_on_step, + process_group=process_group, + dist_sync_fn=dist_sync_fn, + ) + if newline_sep is not None: + warnings.warn("Argument `newline_sep` is deprecated in v0.6 and will be removed in v0.7") + if decimal_places is not None: + warnings.warn("Argument `decimal_places` is deprecated in v0.6 and will be removed in v0.7") + + if use_stemmer or "rougeLsum" in rouge_keys: + if not _NLTK_AVAILABLE: + raise ValueError("Stemmer and/or `rougeLsum` requires that nltk is installed. Use `pip install nltk`.") + import nltk + + if not isinstance(rouge_keys, tuple): + rouge_keys = tuple([rouge_keys]) + for key in rouge_keys: + if key not in ALLOWED_ROUGE_KEYS: + raise ValueError(f"Got unknown rouge key {key}. Expected to be one of {ALLOWED_ROUGE_KEYS}") + + self.rouge_keys = rouge_keys + self.rouge_keys_values = [ALLOWED_ROUGE_KEYS[key] for key in rouge_keys] + self.stemmer = nltk.stem.porter.PorterStemmer() if use_stemmer else None + + # Adding stated dynamically to prevent IndexError during sync function as some lists can be empty. + for rouge_key in self.rouge_keys: + for score in ["fmeasure", "precision", "recall"]: + self.add_state(f"{rouge_key}_{score}", [], dist_reduce_fx=None) + + def update(self, preds: Union[str, List[str]], targets: Union[str, List[str]]) -> None: # type: ignore + """Compute rouge scores. + + Args: + preds: An iterable of predicted sentences. + targets: An iterable of target sentences. + """ + + if isinstance(preds, str): + preds = [preds] + + if isinstance(targets, str): + targets = [targets] + + output: Dict[Union[int, str], List[Dict[str, Tensor]]] = _rouge_score_update( + preds, targets, self.rouge_keys_values, stemmer=self.stemmer + ) + for rouge_key, metrics in output.items(): + for metric in metrics: + for type, value in metric.items(): + getattr(self, f"rouge{rouge_key}_{type}").append(value.to(self.device)) + + def compute(self) -> Dict[str, Tensor]: + """Calculate (Aggregate and provide confidence intervals) ROUGE score. + + Return: + Python dictionary of rouge scores for each input rouge key. + """ + update_output = {} + for rouge_key in self.rouge_keys_values: + for type in ["fmeasure", "precision", "recall"]: + update_output[f"rouge{rouge_key}_{type}"] = getattr(self, f"rouge{rouge_key}_{type}") + + return _rouge_score_compute(update_output) + + def __hash__(self) -> int: + # override to hash list objects. + # this is a bug in the upstream pytorch release. + hash_vals = [self.__class__.__name__] + + for key in self._defaults: + value = getattr(self, key) + if isinstance(value, list): + value = tuple(value) + hash_vals.append(value) + + return hash(tuple(hash_vals)) diff --git a/RE/paddlemetric/src/paddlemetrics/text/sacre_bleu.py b/RE/paddlemetric/src/paddlemetrics/text/sacre_bleu.py new file mode 100644 index 00000000..4f4d99e8 --- /dev/null +++ b/RE/paddlemetric/src/paddlemetrics/text/sacre_bleu.py @@ -0,0 +1,134 @@ +# Copyright The PyTorch Lightning team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# referenced from +# Library Name: torchtext +# Authors: torchtext authors and @sluks +# Date: 2020-07-18 +# Link: https://pyB.org/text/_modules/torchtext/data/metrics.html#bleu_score +from typing import Any, Callable, Optional, Sequence + +from typing_extensions import Literal + +from paddlemetrics.functional.text.bleu import _bleu_score_update +from paddlemetrics.functional.text.sacre_bleu import _SacreBLEUTokenizer +from paddlemetrics.text.bleu import BLEUScore +from paddlemetrics.utilities.imports import _REGEX_AVAILABLE + +AVAILABLE_TOKENIZERS = ("none", "13a", "zh", "intl", "char") + + +class SacreBLEUScore(BLEUScore): + """Calculate `BLEU score`_ [1] of machine translated text with one or more references. This implementation + follows the behaviour of SacreBLEU [2] implementation from https://github.com/mjpost/sacrebleu. + + The SacreBLEU implementation differs from the NLTK BLEU implementation in tokenization techniques. + + Args: + n_gram: + Gram value ranged from 1 to 4 (Default 4) + smooth: + Whether or not to apply smoothing – see [2] + tokenize: + Tokenization technique to be used. (Default '13a') + Supported tokenization: ['none', '13a', 'zh', 'intl', 'char'] + lowercase: + If ``True``, BLEU score over lowercased text is calculated. + compute_on_step: + Forward only calls ``update()`` and returns None if this is set to False. default: True + dist_sync_on_step: + Synchronize metric state across processes at each ``forward()`` + before returning the value at the step. + process_group: + Specify the process group on which synchronization is called. default: None (which selects the entire world) + dist_sync_fn: + Callback that performs the allgather operation on the metric state. When `None`, DDP + will be used to perform the allgather. + + Raises: + ValueError: + If ``tokenize`` not one of 'none', '13a', 'zh', 'intl' or 'char' + ValueError: + If ``tokenize`` is set to 'intl' and `regex` is not installed + + + Example: + >>> translate_corpus = ['the cat is on the mat'] + >>> reference_corpus = [['there is a cat on the mat', 'a cat is on the mat']] + >>> metric = SacreBLEUScore() + >>> metric(reference_corpus, translate_corpus) + tensor(0.7598) + + References: + [1] BLEU: a Method for Automatic Evaluation of Machine Translation by Papineni, + Kishore, Salim Roukos, Todd Ward, and Wei-Jing Zhu `BLEU`_ + + [2] A Call for Clarity in Reporting BLEU Scores by Matt Post. + + [3] Automatic Evaluation of Machine Translation Quality Using Longest Common Subsequence + and Skip-Bigram Statistics by Chin-Yew Lin and Franz Josef Och `Machine Translation Evolution`_ + """ + + def __init__( + self, + n_gram: int = 4, + smooth: bool = False, + tokenize: Literal["none", "13a", "zh", "intl", "char"] = "13a", + lowercase: bool = False, + compute_on_step: bool = True, + dist_sync_on_step: bool = False, + process_group: Optional[Any] = None, + dist_sync_fn: Optional[Callable] = None, + ): + super().__init__( + n_gram=n_gram, + smooth=smooth, + compute_on_step=compute_on_step, + dist_sync_on_step=dist_sync_on_step, + process_group=process_group, + dist_sync_fn=dist_sync_fn, + ) + if tokenize not in AVAILABLE_TOKENIZERS: + raise ValueError(f"Argument `tokenize` expected to be one of {AVAILABLE_TOKENIZERS} but got {tokenize}.") + + if tokenize == "intl" and not _REGEX_AVAILABLE: + raise ValueError( + "`'intl'` tokenization requires `regex` installed. Use `pip install regex` or `pip install " + "paddlemetrics[text]`." + ) + self.tokenizer = _SacreBLEUTokenizer(tokenize, lowercase) + + def update( # type: ignore + self, reference_corpus: Sequence[Sequence[str]], translate_corpus: Sequence[str] + ) -> None: + """Compute Precision Scores. + + Args: + reference_corpus: An iterable of iterables of reference corpus + translate_corpus: An iterable of machine translated corpus + """ + reference_corpus_: Sequence[Sequence[Sequence[str]]] = [ + [self.tokenizer(line) for line in reference] for reference in reference_corpus + ] + translate_corpus_: Sequence[Sequence[str]] = [self.tokenizer(line) for line in translate_corpus] + + self.trans_len, self.ref_len = _bleu_score_update( + reference_corpus_, + translate_corpus_, + self.numerator, + self.denominator, + self.trans_len, + self.ref_len, + self.n_gram, + ) diff --git a/RE/paddlemetric/src/paddlemetrics/text/wer.py b/RE/paddlemetric/src/paddlemetrics/text/wer.py new file mode 100644 index 00000000..7bb69740 --- /dev/null +++ b/RE/paddlemetric/src/paddlemetrics/text/wer.py @@ -0,0 +1,109 @@ +# Copyright The PyTorch Lightning team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from typing import Any, Callable, List, Optional, Union +from warnings import warn + +import paddleext.torchapi as B +from paddleext.torchapi import Tensor, tensor + +from paddlemetrics.functional.text.wer import _wer_compute, _wer_update +from paddlemetrics.metric import Metric + + +class WER(Metric): + r""" + Word error rate (WER_) is a common metric of the performance of an automatic speech recognition system. + This value indicates the percentage of words that were incorrectly predicted. + The lower the value, the better the performance of the ASR system with a WER of 0 being a perfect score. + Word error rate can then be computed as: + + .. math:: + WER = \frac{S + D + I}{N} = \frac{S + D + I}{S + D + C} + + where: + - S is the number of substitutions, + - D is the number of deletions, + - I is the number of insertions, + - C is the number of correct words, + - N is the number of words in the reference (N=S+D+C). + + Compute WER score of transcribed segments against references. + + Args: + concatenate_texts: Whether to concatenate all input texts or compute WER iteratively. + This argument is deprecated in v0.6 and it will be removed in v0.7. + compute_on_step: + Forward only calls ``update()`` and return None if this is set to False. default: True + dist_sync_on_step: + Synchronize metric state across processes at each ``forward()`` + before returning the value at the step. default: False + process_group: + Specify the process group on which synchronization is called. default: None (which selects the entire world) + dist_sync_fn: + Callback that performs the allgather operation on the metric state. When ``None``, DDP + will be used to perform the allgather + + Returns: + (Tensor) Word error rate + + Examples: + >>> predictions = ["this is the prediction", "there is an other sample"] + >>> references = ["this is the reference", "there is another one"] + >>> metric = WER() + >>> metric(predictions, references) + tensor(0.5000) + """ + is_differentiable = False + higher_is_better = False + error: Tensor + total: Tensor + + def __init__( + self, + concatenate_texts: Optional[bool] = None, # TODO: remove in v0.7 + compute_on_step: bool = True, + dist_sync_on_step: bool = False, + process_group: Optional[Any] = None, + dist_sync_fn: Callable = None, + ): + super().__init__( + compute_on_step=compute_on_step, + dist_sync_on_step=dist_sync_on_step, + process_group=process_group, + dist_sync_fn=dist_sync_fn, + ) + if concatenate_texts is not None: + warn("`concatenate_texts` has been deprecated in v0.6 and it will be removed in v0.7", DeprecationWarning) + self.add_state("errors", tensor(0, dtype=B.float), dist_reduce_fx="sum") + self.add_state("total", tensor(0, dtype=B.float), dist_reduce_fx="sum") + + def update(self, predictions: Union[str, List[str]], references: Union[str, List[str]]) -> None: # type: ignore + """Store references/predictions for computing Word Error Rate scores. + + Args: + predictions: Transcription(s) to score as a string or list of strings + references: Reference(s) for each speech input as a string or list of strings + """ + errors, total = _wer_update(predictions, references) + self.errors += errors + self.total += total + + def compute(self) -> Tensor: + """Calculate the word error rate. + + Returns: + (Tensor) Word error rate + """ + return _wer_compute(self.errors, self.total) diff --git a/RE/paddlemetric/src/paddlemetrics/utilities/__init__.py b/RE/paddlemetric/src/paddlemetrics/utilities/__init__.py new file mode 100644 index 00000000..b1b2a506 --- /dev/null +++ b/RE/paddlemetric/src/paddlemetrics/utilities/__init__.py @@ -0,0 +1,3 @@ +from paddlemetrics.utilities.data import apply_to_collection # noqa: F401 +from paddlemetrics.utilities.distributed import class_reduce, reduce # noqa: F401 +from paddlemetrics.utilities.prints import rank_zero_debug, rank_zero_info, rank_zero_warn # noqa: F401 diff --git a/RE/paddlemetric/src/paddlemetrics/utilities/checks.py b/RE/paddlemetric/src/paddlemetrics/utilities/checks.py new file mode 100644 index 00000000..b948f103 --- /dev/null +++ b/RE/paddlemetric/src/paddlemetrics/utilities/checks.py @@ -0,0 +1,582 @@ +# Copyright The PyTorch Lightning team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from typing import Optional, Tuple + +import paddleext.torchapi as B +from paddleext.torchapi import Tensor + +from paddlemetrics.utilities.data import select_topk, to_onehot +from paddlemetrics.utilities.enums import DataType + + +def _check_same_shape(preds: Tensor, target: Tensor) -> None: + """Check that predictions and target have the same shape, else raise error.""" + if preds.shape != target.shape: + raise RuntimeError("Predictions and targets are expected to have the same shape") + + +def _basic_input_validation(preds: Tensor, target: Tensor, threshold: float, multiclass: Optional[bool]) -> None: + """Perform basic validation of inputs that does not require deducing any information of the type of inputs.""" + + if target.is_floating_point(): + raise ValueError("The `target` has to be an integer tensor.") + if target.min() < 0: + raise ValueError("The `target` has to be a non-negative tensor.") + + preds_float = preds.is_floating_point() + if not preds_float and preds.min() < 0: + raise ValueError("If `preds` are integers, they have to be non-negative.") + + if not preds.shape[0] == target.shape[0]: + raise ValueError("The `preds` and `target` should have the same first dimension.") + + if multiclass is False and target.max() > 1: + raise ValueError("If you set `multiclass=False`, then `target` should not exceed 1.") + + if multiclass is False and not preds_float and preds.max() > 1: + raise ValueError("If you set `multiclass=False` and `preds` are integers, then `preds` should not exceed 1.") + + +def _check_shape_and_type_consistency(preds: Tensor, target: Tensor) -> Tuple[DataType, int]: + """This checks that the shape and type of inputs are consistent with each other and fall into one of the + allowed input types (see the documentation of docstring of ``_input_format_classification``). It does not check + for consistency of number of classes, other functions take care of that. + + It returns the name of the case in which the inputs fall, and the implied number of classes (from the ``C`` dim for + multi-class data, or extra dim(s) for multi-label data). + """ + + preds_float = preds.is_floating_point() + + if preds.ndim == target.ndim: + if preds.shape != target.shape: + raise ValueError( + "The `preds` and `target` should have the same shape,", + f" got `preds` with shape={preds.shape} and `target` with shape={target.shape}.", + ) + if preds_float and target.max() > 1: + raise ValueError( + "If `preds` and `target` are of shape (N, ...) and `preds` are floats, `target` should be binary." + ) + + # Get the case + if preds.ndim == 1 and preds_float: + case = DataType.BINARY + elif preds.ndim == 1 and not preds_float: + case = DataType.MULTICLASS + elif preds.ndim > 1 and preds_float: + case = DataType.MULTILABEL + else: + case = DataType.MULTIDIM_MULTICLASS + + implied_classes = preds[0].numel() + + elif preds.ndim == target.ndim + 1: + if not preds_float: + raise ValueError("If `preds` have one dimension more than `target`, `preds` should be a float tensor.") + if preds.shape[2:] != target.shape[1:]: + raise ValueError( + "If `preds` have one dimension more than `target`, the shape of `preds` should be" + " (N, C, ...), and the shape of `target` should be (N, ...)." + ) + + implied_classes = preds.shape[1] + + if preds.ndim == 2: + case = DataType.MULTICLASS + else: + case = DataType.MULTIDIM_MULTICLASS + else: + raise ValueError( + "Either `preds` and `target` both should have the (same) shape (N, ...), or `target` should be (N, ...)" + " and `preds` should be (N, C, ...)." + ) + + return case, implied_classes + + +def _check_num_classes_binary(num_classes: int, multiclass: Optional[bool]) -> None: + """This checks that the consistency of `num_classes` with the data and `multiclass` param for binary data.""" + + if num_classes > 2: + raise ValueError("Your data is binary, but `num_classes` is larger than 2.") + if num_classes == 2 and not multiclass: + raise ValueError( + "Your data is binary and `num_classes=2`, but `multiclass` is not True." + " Set it to True if you want to transform binary data to multi-class format." + ) + if num_classes == 1 and multiclass: + raise ValueError( + "You have binary data and have set `multiclass=True`, but `num_classes` is 1." + " Either set `multiclass=None`(default) or set `num_classes=2`" + " to transform binary data to multi-class format." + ) + + +def _check_num_classes_mc( + preds: Tensor, + target: Tensor, + num_classes: int, + multiclass: Optional[bool], + implied_classes: int, +) -> None: + """This checks that the consistency of `num_classes` with the data and `multiclass` param for (multi- + dimensional) multi-class data.""" + + if num_classes == 1 and multiclass is not False: + raise ValueError( + "You have set `num_classes=1`, but predictions are integers." + " If you want to convert (multi-dimensional) multi-class data with 2 classes" + " to binary/multi-label, set `multiclass=False`." + ) + if num_classes > 1: + if multiclass is False and implied_classes != num_classes: + raise ValueError( + "You have set `multiclass=False`, but the implied number of classes " + " (from shape of inputs) does not match `num_classes`. If you are trying to" + " transform multi-dim multi-class data with 2 classes to multi-label, `num_classes`" + " should be either None or the product of the size of extra dimensions (...)." + " See Input Types in Metrics documentation." + ) + if num_classes <= target.max(): + raise ValueError("The highest label in `target` should be smaller than `num_classes`.") + if preds.shape != target.shape and num_classes != implied_classes: + raise ValueError("The size of C dimension of `preds` does not match `num_classes`.") + + +def _check_num_classes_ml(num_classes: int, multiclass: Optional[bool], implied_classes: int) -> None: + """This checks that the consistency of `num_classes` with the data and `multiclass` param for multi-label + data.""" + + if multiclass and num_classes != 2: + raise ValueError( + "Your have set `multiclass=True`, but `num_classes` is not equal to 2." + " If you are trying to transform multi-label data to 2 class multi-dimensional" + " multi-class, you should set `num_classes` to either 2 or None." + ) + if not multiclass and num_classes != implied_classes: + raise ValueError("The implied number of classes (from shape of inputs) does not match num_classes.") + + +def _check_top_k(top_k: int, case: str, implied_classes: int, multiclass: Optional[bool], preds_float: bool) -> None: + if case == DataType.BINARY: + raise ValueError("You can not use `top_k` parameter with binary data.") + if not isinstance(top_k, int) or top_k <= 0: + raise ValueError("The `top_k` has to be an integer larger than 0.") + if not preds_float: + raise ValueError("You have set `top_k`, but you do not have probability predictions.") + if multiclass is False: + raise ValueError("If you set `multiclass=False`, you can not set `top_k`.") + if case == DataType.MULTILABEL and multiclass: + raise ValueError( + "If you want to transform multi-label data to 2 class multi-dimensional" + "multi-class data using `multiclass=True`, you can not use `top_k`." + ) + if top_k >= implied_classes: + raise ValueError("The `top_k` has to be strictly smaller than the `C` dimension of `preds`.") + + +def _check_classification_inputs( + preds: Tensor, + target: Tensor, + threshold: float, + num_classes: Optional[int], + multiclass: Optional[bool], + top_k: Optional[int], +) -> DataType: + """Performs error checking on inputs for classification. + + This ensures that preds and target take one of the shape/type combinations that are + specified in ``_input_format_classification`` docstring. It also checks the cases of + over-rides with ``multiclass`` by checking (for multi-class and multi-dim multi-class + cases) that there are only up to 2 distinct labels. + + In case where preds are floats (probabilities), it is checked whether they are in [0,1] interval. + + When ``num_classes`` is given, it is checked that it is consistent with input cases (binary, + multi-label, ...), and that, if available, the implied number of classes in the ``C`` + dimension is consistent with it (as well as that max label in target is smaller than it). + + When ``num_classes`` is not specified in these cases, consistency of the highest target + value against ``C`` dimension is checked for (multi-dimensional) multi-class cases. + + If ``top_k`` is set (not None) for inputs that do not have probability predictions (and + are not binary), an error is raised. Similarly if ``top_k`` is set to a number that + is higher than or equal to the ``C`` dimension of ``preds``, an error is raised. + + Preds and target tensors are expected to be squeezed already - all dimensions should be + greater than 1, except perhaps the first one (``N``). + + Args: + preds: Tensor with predictions (labels or probabilities) + target: Tensor with ground truth labels, always integers (labels) + threshold: + Threshold value for transforming probability/logit predictions to binary + (0,1) predictions, in the case of binary or multi-label inputs. + num_classes: + Number of classes. If not explicitly set, the number of classes will be inferred + either from the shape of inputs, or the maximum label in the ``target`` and ``preds`` + tensor, where applicable. + top_k: + Number of highest probability entries for each sample to convert to 1s - relevant + only for inputs with probability predictions. The default value (``None``) will be + interpreted as 1 for these inputs. If this parameter is set for multi-label inputs, + it will take precedence over threshold. + + Should be left unset (``None``) for inputs with label predictions. + multiclass: + Used only in certain special cases, where you want to treat inputs as a different type + than what they appear to be. See the parameter's + :ref:`documentation section ` + for a more detailed explanation and examples. + + + Return: + case: The case the inputs fall in, one of 'binary', 'multi-class', 'multi-label' or + 'multi-dim multi-class' + """ + + # Basic validation (that does not need case/type information) + _basic_input_validation(preds, target, threshold, multiclass) + + # Check that shape/types fall into one of the cases + case, implied_classes = _check_shape_and_type_consistency(preds, target) + + # Check consistency with the `C` dimension in case of multi-class data + if preds.shape != target.shape: + if multiclass is False and implied_classes != 2: + raise ValueError( + "You have set `multiclass=False`, but have more than 2 classes in your data," + " based on the C dimension of `preds`." + ) + if target.max() >= implied_classes: + raise ValueError( + "The highest label in `target` should be smaller than the size of the `C` dimension of `preds`." + ) + + # Check that num_classes is consistent + if num_classes: + if case == DataType.BINARY: + _check_num_classes_binary(num_classes, multiclass) + elif case in (DataType.MULTICLASS, DataType.MULTIDIM_MULTICLASS): + _check_num_classes_mc(preds, target, num_classes, multiclass, implied_classes) + elif case.MULTILABEL: + _check_num_classes_ml(num_classes, multiclass, implied_classes) + + # Check that top_k is consistent + if top_k is not None: + _check_top_k(top_k, case, implied_classes, multiclass, preds.is_floating_point()) + + return case + + +def _input_squeeze( + preds: Tensor, + target: Tensor, +) -> Tuple[Tensor, Tensor]: + """Remove excess dimensions.""" + if preds.shape[0] == 1: + preds, target = preds.squeeze().unsqueeze(0), target.squeeze().unsqueeze(0) + else: + preds, target = preds.squeeze(), target.squeeze() + return preds, target + + +def _input_format_classification( + preds: Tensor, + target: Tensor, + threshold: float = 0.5, + top_k: Optional[int] = None, + num_classes: Optional[int] = None, + multiclass: Optional[bool] = None, +) -> Tuple[Tensor, Tensor, DataType]: + """Convert preds and target tensors into common format. + + Preds and targets are supposed to fall into one of these categories (and are + validated to make sure this is the case): + + * Both preds and target are of shape ``(N,)``, and both are integers (multi-class) + * Both preds and target are of shape ``(N,)``, and target is binary, while preds + are a float (binary) + * preds are of shape ``(N, C)`` and are floats, and target is of shape ``(N,)`` and + is integer (multi-class) + * preds and target are of shape ``(N, ...)``, target is binary and preds is a float + (multi-label) + * preds are of shape ``(N, C, ...)`` and are floats, target is of shape ``(N, ...)`` + and is integer (multi-dimensional multi-class) + * preds and target are of shape ``(N, ...)`` both are integers (multi-dimensional + multi-class) + + To avoid ambiguities, all dimensions of size 1, except the first one, are squeezed out. + + The returned output tensors will be binary tensors of the same shape, either ``(N, C)`` + of ``(N, C, X)``, the details for each case are described below. The function also returns + a ``case`` string, which describes which of the above cases the inputs belonged to - regardless + of whether this was "overridden" by other settings (like ``multiclass``). + + In binary case, targets are normally returned as ``(N,1)`` tensor, while preds are transformed + into a binary tensor (elements become 1 if the probability is greater than or equal to + ``threshold`` or 0 otherwise). If ``multiclass=True``, then then both targets are preds + become ``(N, 2)`` tensors by a one-hot transformation; with the thresholding being applied to + preds first. + + In multi-class case, normally both preds and targets become ``(N, C)`` binary tensors; targets + by a one-hot transformation and preds by selecting ``top_k`` largest entries (if their original + shape was ``(N,C)``). However, if ``multiclass=False``, then targets and preds will be + returned as ``(N,1)`` tensor. + + In multi-label case, normally targets and preds are returned as ``(N, C)`` binary tensors, with + preds being binarized as in the binary case. Here the ``C`` dimension is obtained by flattening + all dimensions after the first one. However if ``multiclass=True``, then both are returned as + ``(N, 2, C)``, by an equivalent transformation as in the binary case. + + In multi-dimensional multi-class case, normally both target and preds are returned as + ``(N, C, X)`` tensors, with ``X`` resulting from flattening of all dimensions except ``N`` and + ``C``. The transformations performed here are equivalent to the multi-class case. However, if + ``multiclass=False`` (and there are up to two classes), then the data is returned as + ``(N, X)`` binary tensors (multi-label). + + Note: + Where a one-hot transformation needs to be performed and the number of classes + is not implicitly given by a ``C`` dimension, the new ``C`` dimension will either be + equal to ``num_classes``, if it is given, or the maximum label value in preds and + target. + + Args: + preds: Tensor with predictions (labels or probabilities) + target: Tensor with ground truth labels, always integers (labels) + threshold: + Threshold value for transforming probability/logit predictions to binary + (0 or 1) predictions, in the case of binary or multi-label inputs. + num_classes: + Number of classes. If not explicitly set, the number of classes will be inferred + either from the shape of inputs, or the maximum label in the ``target`` and ``preds`` + tensor, where applicable. + top_k: + Number of highest probability entries for each sample to convert to 1s - relevant + only for (multi-dimensional) multi-class inputs with probability predictions. The + default value (``None``) will be interepreted as 1 for these inputs. + + Should be left unset (``None``) for all other types of inputs. + multiclass: + Used only in certain special cases, where you want to treat inputs as a different type + than what they appear to be. See the parameter's + :ref:`documentation section ` + for a more detailed explanation and examples. + + Returns: + preds: binary tensor of shape ``(N, C)`` or ``(N, C, X)`` + target: binary tensor of shape ``(N, C)`` or ``(N, C, X)`` + case: The case the inputs fall in, one of ``'binary'``, ``'multi-class'``, ``'multi-label'`` or + ``'multi-dim multi-class'`` + """ + # Remove excess dimensions + preds, target = _input_squeeze(preds, target) + + # Convert half precision tensors to full precision, as not all ops are supported + # for example, min() is not supported + if preds.dtype == B.float16: + preds = preds.float() + + case = _check_classification_inputs( + preds, + target, + threshold=threshold, + num_classes=num_classes, + multiclass=multiclass, + top_k=top_k, + ) + + if case in (DataType.BINARY, DataType.MULTILABEL) and not top_k: + preds = (preds >= threshold).int() + num_classes = num_classes if not multiclass else 2 + + if case == DataType.MULTILABEL and top_k: + preds = select_topk(preds, top_k) + + if case in (DataType.MULTICLASS, DataType.MULTIDIM_MULTICLASS) or multiclass: + if preds.is_floating_point(): + num_classes = preds.shape[1] + preds = select_topk(preds, top_k or 1) + else: + num_classes = num_classes if num_classes else max(preds.max(), target.max()) + 1 + preds = to_onehot(preds, max(2, num_classes)) + + target = to_onehot(target, max(2, num_classes)) # type: ignore + + if multiclass is False: + preds, target = preds[:, 1, ...], target[:, 1, ...] + + if (case in (DataType.MULTICLASS, DataType.MULTIDIM_MULTICLASS) and multiclass is not False) or multiclass: + target = target.reshape(target.shape[0], target.shape[1], -1) + preds = preds.reshape(preds.shape[0], preds.shape[1], -1) + else: + target = target.reshape(target.shape[0], -1) + preds = preds.reshape(preds.shape[0], -1) + + # Some operations above create an extra dimension for MC/binary case - this removes it + if preds.ndim > 2: + preds, target = preds.squeeze(-1), target.squeeze(-1) + + return preds.int(), target.int(), case + + +def _input_format_classification_one_hot( + num_classes: int, + preds: Tensor, + target: Tensor, + threshold: float = 0.5, + multilabel: bool = False, +) -> Tuple[Tensor, Tensor]: + """Convert preds and target tensors into one hot spare label tensors. + + Args: + num_classes: number of classes + preds: either tensor with labels, tensor with probabilities/logits or multilabel tensor + target: tensor with ground true labels + threshold: float used for thresholding multilabel input + multilabel: boolean flag indicating if input is multilabel + + Raises: + ValueError: + If ``preds`` and ``target`` don't have the same number of dimensions + or one additional dimension for ``preds``. + + Returns: + preds: one hot tensor of shape [num_classes, -1] with predicted labels + target: one hot tensors of shape [num_classes, -1] with true labels + """ + if preds.ndim not in (target.ndim, target.ndim + 1): + raise ValueError("preds and target must have same number of dimensions, or one additional dimension for preds") + + if preds.ndim == target.ndim + 1: + # multi class probabilities + preds = B.argmax(preds, dim=1) + + if preds.ndim == target.ndim and preds.dtype in (B.long, B.int) and num_classes > 1 and not multilabel: + # multi-class + preds = to_onehot(preds, num_classes=num_classes) + target = to_onehot(target, num_classes=num_classes) + + elif preds.ndim == target.ndim and preds.is_floating_point(): + # binary or multilabel probabilities + preds = (preds >= threshold).long() + + # transpose class as first dim and reshape + if preds.ndim > 1: + preds = preds.transpose(1, 0) + target = target.transpose(1, 0) + + return preds.reshape(num_classes, -1), target.reshape(num_classes, -1) + + +def _check_retrieval_functional_inputs( + preds: Tensor, + target: Tensor, + allow_non_binary_target: bool = False, +) -> Tuple[Tensor, Tensor]: + """Check ``preds`` and ``target`` tensors are of the same shape and of the correct dtype. + + Args: + preds: either tensor with scores/logits + target: tensor with ground true labels + allow_non_binary_target: whether to allow target to contain non-binary values + + Raises: + ValueError: + If ``preds`` and ``target`` don't have the same shape, if they are empty + or not of the correct ``dtypes``. + + Returns: + preds: as B.float32 + target: as B.long if not floating point else B.float32 + """ + if preds.shape != target.shape: + raise ValueError("`preds` and `target` must be of the same shape") + + if not preds.numel() or not preds.size(): + raise ValueError("`preds` and `target` must be non-empty and non-scalar tensors") + + return _check_retrieval_target_and_prediction_types(preds, target, allow_non_binary_target=allow_non_binary_target) + + +def _check_retrieval_inputs( + indexes: Tensor, + preds: Tensor, + target: Tensor, + allow_non_binary_target: bool = False, +) -> Tuple[Tensor, Tensor, Tensor]: + """Check ``indexes``, ``preds`` and ``target`` tensors are of the same shape and of the correct dtype. + + Args: + indexes: tensor with queries indexes + preds: tensor with scores/logits + target: tensor with ground true labels + + Raises: + ValueError: + If ``preds`` and ``target`` don't have the same shape, if they are empty + or not of the correct ``dtypes``. + + Returns: + indexes: as B.long + preds: as B.float32 + target: as B.long + """ + if indexes.shape != preds.shape or preds.shape != target.shape: + raise ValueError("`indexes`, `preds` and `target` must be of the same shape") + + if not indexes.numel() or not indexes.size(): + raise ValueError( + "`indexes`, `preds` and `target` must be non-empty and non-scalar tensors", + ) + + if indexes.dtype is not B.long: + raise ValueError("`indexes` must be a tensor of long integers") + + preds, target = _check_retrieval_target_and_prediction_types( + preds, target, allow_non_binary_target=allow_non_binary_target + ) + + return indexes.long().flatten(), preds, target + + +def _check_retrieval_target_and_prediction_types( + preds: Tensor, + target: Tensor, + allow_non_binary_target: bool = False, +) -> Tuple[Tensor, Tensor]: + """Check ``preds`` and ``target`` tensors are of the same shape and of the correct dtype. + + Args: + preds: either tensor with scores/logits + target: tensor with ground true labels + allow_non_binary_target: whether to allow target to contain non-binary values + + Raises: + ValueError: + If ``preds`` and ``target`` don't have the same shape, if they are empty + or not of the correct ``dtypes``. + """ + if target.dtype not in (B.bool, B.long, B.int) and not B.is_floating_point(target): + raise ValueError("`target` must be a tensor of booleans, integers or floats") + + if not preds.is_floating_point(): + raise ValueError("`preds` must be a tensor of floats") + + if not allow_non_binary_target and (target.max() > 1 or target.min() < 0): + raise ValueError("`target` must contain `binary` values") + + target = target.float().flatten() if target.is_floating_point() else target.long().flatten() + return preds.float().flatten(), target diff --git a/RE/paddlemetric/src/paddlemetrics/utilities/data.py b/RE/paddlemetric/src/paddlemetrics/utilities/data.py new file mode 100644 index 00000000..13e43fb6 --- /dev/null +++ b/RE/paddlemetric/src/paddlemetrics/utilities/data.py @@ -0,0 +1,240 @@ +# Copyright The PyTorch Lightning team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from typing import Any, Callable, List, Mapping, Optional, Sequence, Union + +import paddleext.torchapi as B +from paddleext.torchapi import Tensor, tensor + +from paddlemetrics.utilities.prints import rank_zero_warn + +METRIC_EPS = 1e-6 + + +def dim_zero_cat(x: Union[Tensor, List[Tensor]]) -> Tensor: + """concatenation along the zero dimension.""" + x = x if isinstance(x, (list, tuple)) else [x] + x = [y.unsqueeze(0) if y.numel() == 1 and y.ndim == 0 else y for y in x] + if not x: # empty list + raise ValueError("No samples to concatenate") + return B.cat(x, dim=0) + + +def dim_zero_sum(x: Tensor) -> Tensor: + """summation along the zero dimension.""" + return B.sum(x, dim=0) + + +def dim_zero_mean(x: Tensor) -> Tensor: + """average along the zero dimension.""" + return B.mean(x, dim=0) + + +def dim_zero_max(x: Tensor) -> Tensor: + """max along the zero dimension.""" + return B.max(x, dim=0).values + + +def dim_zero_min(x: Tensor) -> Tensor: + """min along the zero dimension.""" + return B.min(x, dim=0).values + + +def _flatten(x: Sequence) -> list: + return [item for sublist in x for item in sublist] + + +def to_onehot( + label_tensor: Tensor, + num_classes: Optional[int] = None, +) -> Tensor: + """Converts a dense label tensor to one-hot format. + + Args: + label_tensor: dense label tensor, with shape [N, d1, d2, ...] + num_classes: number of classes C + + Returns: + A sparse label tensor with shape [N, C, d1, d2, ...] + + Example: + >>> x = B.tensor([1, 2, 3]) + >>> to_onehot(x) + tensor([[0, 1, 0, 0], + [0, 0, 1, 0], + [0, 0, 0, 1]]) + """ + if num_classes is None: + num_classes = int(label_tensor.max().detach().item() + 1) + + tensor_onehot = B.zeros( + label_tensor.shape[0], + num_classes, + *label_tensor.shape[1:], + dtype=label_tensor.dtype, + device=label_tensor.device, + ) + index = label_tensor.long().unsqueeze(1).expand_as(tensor_onehot) + return (tensor_onehot.scatter_(1, index, 1.0) > 0).to(label_tensor.dtype) + + +def select_topk(prob_tensor: Tensor, topk: int = 1, dim: int = 1) -> Tensor: + """Convert a probability tensor to binary by selecting top-k highest entries. + + Args: + prob_tensor: dense tensor of shape ``[..., C, ...]``, where ``C`` is in the + position defined by the ``dim`` argument + topk: number of highest entries to turn into 1s + dim: dimension on which to compare entries + + Returns: + A binary tensor of the same shape as the input tensor of type B.int32 + + Example: + >>> x = B.tensor([[1.1, 2.0, 3.0], [2.0, 1.0, 0.5]]) + >>> select_topk(x, topk=2) + tensor([[0, 1, 1], + [1, 1, 0]], dtype=B.int32) + """ + zeros = B.zeros_like(prob_tensor) + if topk == 1: # argmax has better performance than topk + topk_tensor = zeros.scatter(dim, prob_tensor.argmax(dim=dim, keepdim=True), 1.0) + else: + topk_tensor = zeros.scatter(dim, prob_tensor.topk(k=topk, dim=dim).indices, 1.0) + return topk_tensor.int() + + +def to_categorical(x: Tensor, argmax_dim: int = 1) -> Tensor: + """Converts a tensor of probabilities to a dense label tensor. + + Args: + x: probabilities to get the categorical label [N, d1, d2, ...] + argmax_dim: dimension to apply + + Return: + A tensor with categorical labels [N, d2, ...] + + Example: + >>> x = B.tensor([[0.2, 0.5], [0.9, 0.1]]) + >>> to_categorical(x) + tensor([1, 0]) + """ + return B.argmax(x, dim=argmax_dim) + + +def get_num_classes( + preds: Tensor, + target: Tensor, + num_classes: Optional[int] = None, +) -> int: + """Calculates the number of classes for a given prediction and target tensor. + + Args: + preds: predicted values + target: true labels + num_classes: number of classes if known + + Return: + An integer that represents the number of classes. + """ + num_target_classes = int(target.max().detach().item() + 1) + num_pred_classes = int(preds.max().detach().item() + 1) + num_all_classes = max(num_target_classes, num_pred_classes) + + if num_classes is None: + num_classes = num_all_classes + elif num_classes != num_all_classes: + rank_zero_warn( + f"You have set {num_classes} number of classes which is" + f" different from predicted ({num_pred_classes}) and" + f" target ({num_target_classes}) number of classes", + RuntimeWarning, + ) + return num_classes + + +def apply_to_collection( + data: Any, + dtype: Union[type, tuple], + function: Callable, + *args: Any, + wrong_dtype: Optional[Union[type, tuple]] = None, + **kwargs: Any, +) -> Any: + """Recursively applies a function to all elements of a certain dtype. + + Args: + data: the collection to apply the function to + dtype: the given function will be applied to all elements of this dtype + function: the function to apply + *args: positional arguments (will be forwarded to calls of ``function``) + wrong_dtype: the given function won't be applied if this type is specified and the given collections is of + the :attr:`wrong_type` even if it is of type :attr`dtype` + **kwargs: keyword arguments (will be forwarded to calls of ``function``) + + Returns: + the resulting collection + + Example: + >>> apply_to_collection(B.tensor([8, 0, 2, 6, 7]), dtype=Tensor, function=lambda x: x ** 2) + tensor([64, 0, 4, 36, 49]) + >>> apply_to_collection([8, 0, 2, 6, 7], dtype=int, function=lambda x: x ** 2) + [64, 0, 4, 36, 49] + >>> apply_to_collection(dict(abc=123), dtype=int, function=lambda x: x ** 2) + {'abc': 15129} + """ + elem_type = type(data) + + # Breaking condition + if isinstance(data, dtype) and (wrong_dtype is None or not isinstance(data, wrong_dtype)): + return function(data, *args, **kwargs) + + # Recursively apply to collection items + if isinstance(data, Mapping): + return elem_type({k: apply_to_collection(v, dtype, function, *args, **kwargs) for k, v in data.items()}) + + if isinstance(data, tuple) and hasattr(data, "_fields"): # named tuple + return elem_type(*(apply_to_collection(d, dtype, function, *args, **kwargs) for d in data)) + + if isinstance(data, Sequence) and not isinstance(data, str): + return elem_type([apply_to_collection(d, dtype, function, *args, **kwargs) for d in data]) + + # data is neither of dtype, nor a collection + return data + + +def get_group_indexes(indexes: Tensor) -> List[Tensor]: + """Given an integer `B.Tensor` `indexes`, return a `B.Tensor` of indexes for each different value in + `indexes`. + + Args: + indexes: a `B.Tensor` + + Return: + A list of integer `B.Tensor`s + + Example: + >>> indexes = B.tensor([0, 0, 0, 1, 1, 1, 1]) + >>> get_group_indexes(indexes) + [tensor([0, 1, 2]), tensor([3, 4, 5, 6])] + """ + + res: dict = {} + for i, _id in enumerate(indexes): + _id = _id.item() + if _id in res: + res[_id] += [i] + else: + res[_id] = [i] + + return [tensor(x, dtype=B.long) for x in res.values()] diff --git a/RE/paddlemetric/src/paddlemetrics/utilities/distributed.py b/RE/paddlemetric/src/paddlemetrics/utilities/distributed.py new file mode 100644 index 00000000..aec42872 --- /dev/null +++ b/RE/paddlemetric/src/paddlemetrics/utilities/distributed.py @@ -0,0 +1,145 @@ +# Copyright The PyTorch Lightning team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from typing import Any, List, Optional + +import paddleext.torchapi as B +#import torchapi.nn.functional as F +from paddleext.torchapi import Tensor + + +def reduce(to_reduce: Tensor, reduction: str) -> Tensor: + """Reduces a given tensor by a given reduction method. + + Args: + to_reduce: the tensor, which shall be reduced + reduction: a string specifying the reduction method ('elementwise_mean', 'none', 'sum') + + Return: + reduced Tensor + + Raise: + ValueError if an invalid reduction parameter was given + """ + if reduction == "elementwise_mean": + return B.mean(to_reduce) + if reduction == "none": + return to_reduce + if reduction == "sum": + return B.sum(to_reduce) + raise ValueError("Reduction parameter unknown.") + + +def class_reduce(num: Tensor, denom: Tensor, weights: Tensor, class_reduction: str = "none") -> Tensor: + """ + Function used to reduce classification metrics of the form `num / denom * weights`. + For example for calculating standard accuracy the num would be number of + true positives per class, denom would be the support per class, and weights + would be a tensor of 1s + + Args: + num: numerator tensor + denom: denominator tensor + weights: weights for each class + class_reduction: reduction method for multiclass problems + + - ``'micro'``: calculate metrics globally (default) + - ``'macro'``: calculate metrics for each label, and find their unweighted mean. + - ``'weighted'``: calculate metrics for each label, and find their weighted mean. + - ``'none'`` or ``None``: returns calculated metric per class + + Raises: + ValueError: + If ``class_reduction`` is none of ``"micro"``, ``"macro"``, ``"weighted"``, ``"none"`` or ``None``. + + """ + valid_reduction = ("micro", "macro", "weighted", "none", None) + if class_reduction == "micro": + fraction = B.sum(num) / B.sum(denom) + else: + fraction = num / denom + + # We need to take care of instances where the denom can be 0 + # for some (or all) classes which will produce nans + fraction[fraction != fraction] = 0 + + if class_reduction == "micro": + return fraction + if class_reduction == "macro": + return B.mean(fraction) + if class_reduction == "weighted": + return B.sum(fraction * (weights.float() / B.sum(weights))) + if class_reduction == "none" or class_reduction is None: + return fraction + + raise ValueError( + f"Reduction parameter {class_reduction} unknown." f" Choose between one of these: {valid_reduction}" + ) + + +def _simple_gather_all_tensors(result: Tensor, group: Any, world_size: int) -> List[Tensor]: + gathered_result = [B.zeros_like(result) for _ in range(world_size)] + B.distributed.all_gather(gathered_result, result, group) + return gathered_result + + +def gather_all_tensors(result: Tensor, group: Optional[Any] = None) -> List[Tensor]: + """Function to gather all tensors from several ddp processes onto a list that is broadcasted to all processes. + Works on tensors that have the same number of dimensions, but where each dimension may differ. In this case + tensors are padded, gathered and then trimmed to secure equal workload for all processes. + + Args: + result: the value to sync + group: the process group to gather results from. Defaults to all processes (world) + + Return: + gathered_result: list with size equal to the process group where + gathered_result[i] corresponds to result tensor from process i + """ + if group is None: + group = B.distributed.group.WORLD + + # convert tensors to contiguous format + result = result.contiguous() + + world_size = B.distributed.get_world_size(group) + B.distributed.barrier(group=group) + + # if the tensor is scalar, things are easy + if result.ndim == 0: + return _simple_gather_all_tensors(result, group, world_size) + + # 1. Gather sizes of all tensors + local_size = B.tensor(result.shape, device=result.device) + local_sizes = [B.zeros_like(local_size) for _ in range(world_size)] + B.distributed.all_gather(local_sizes, local_size, group=group) + max_size = B.stack(local_sizes).max(dim=0).values + all_sizes_equal = all(all(ls == max_size) for ls in local_sizes) + + # 2. If shapes are all the same, then do a simple gather: + if all_sizes_equal: + return _simple_gather_all_tensors(result, group, world_size) + + # 3. If not, we need to pad each local tensor to maximum size, gather and then truncate + pad_dims = [] + pad_by = (max_size - local_size).detach().cpu() + for val in reversed(pad_by): + pad_dims.append(0) + pad_dims.append(val.item()) + result_padded = B.pad(result, pad_dims) + gathered_result = [B.zeros_like(result_padded) for _ in range(world_size)] + B.distributed.all_gather(gathered_result, result_padded, group) + for idx, item_size in enumerate(local_sizes): + slice_param = [slice(dim_size) for dim_size in item_size] + gathered_result[idx] = gathered_result[idx][slice_param] + return gathered_result diff --git a/RE/paddlemetric/src/paddlemetrics/utilities/enums.py b/RE/paddlemetric/src/paddlemetrics/utilities/enums.py new file mode 100644 index 00000000..7476c051 --- /dev/null +++ b/RE/paddlemetric/src/paddlemetrics/utilities/enums.py @@ -0,0 +1,83 @@ +# Copyright The PyTorch Lightning team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from enum import Enum +from typing import Optional, Union + + +class EnumStr(str, Enum): + """Type of any enumerator with allowed comparison to string invariant to cases. + + Example: + >>> class MyEnum(EnumStr): + ... ABC = 'abc' + >>> MyEnum.from_str('Abc') + + >>> {MyEnum.ABC: 123} + {: 123} + """ + + @classmethod + def from_str(cls, value: str) -> Optional["EnumStr"]: + statuses = [status for status in dir(cls) if not status.startswith("_")] + for st in statuses: + if st.lower() == value.lower(): + return getattr(cls, st) + return None + + def __eq__(self, other: Union[str, "EnumStr", None]) -> bool: # type: ignore + other = other.value if isinstance(other, Enum) else str(other) + return self.value.lower() == other.lower() + + def __hash__(self) -> int: + # re-enable hashtable so it can be used as a dict key or in a set + # example: set(LightningEnum) + return hash(self.name) + + +class DataType(EnumStr): + """Enum to represent data type. + + >>> "Binary" in list(DataType) + True + """ + + BINARY = "binary" + MULTILABEL = "multi-label" + MULTICLASS = "multi-class" + MULTIDIM_MULTICLASS = "multi-dim multi-class" + + +class AverageMethod(EnumStr): + """Enum to represent average method. + + >>> None in list(AverageMethod) + True + >>> AverageMethod.NONE == None + True + >>> AverageMethod.NONE == 'none' + True + """ + + MICRO = "micro" + MACRO = "macro" + WEIGHTED = "weighted" + NONE = None + SAMPLES = "samples" + + +class MDMCAverageMethod(EnumStr): + """Enum to represent multi-dim multi-class average method.""" + + GLOBAL = "global" + SAMPLEWISE = "samplewise" diff --git a/RE/paddlemetric/src/paddlemetrics/utilities/exceptions.py b/RE/paddlemetric/src/paddlemetrics/utilities/exceptions.py new file mode 100644 index 00000000..767fe901 --- /dev/null +++ b/RE/paddlemetric/src/paddlemetrics/utilities/exceptions.py @@ -0,0 +1,17 @@ +# Copyright The PyTorch Lightning team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +class paddlemetricsUserError(Exception): + """Error used to inform users of a wrong combinison of Metric API calls.""" diff --git a/RE/paddlemetric/src/paddlemetrics/utilities/imports.py b/RE/paddlemetric/src/paddlemetrics/utilities/imports.py new file mode 100644 index 00000000..f3794801 --- /dev/null +++ b/RE/paddlemetric/src/paddlemetrics/utilities/imports.py @@ -0,0 +1,90 @@ +# Copyright The PyTorch Lightning team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Import utilities.""" +import operator +from importlib import import_module +from importlib.util import find_spec +from typing import Callable, Optional + +from packaging.version import Version +from pkg_resources import DistributionNotFound, get_distribution + + +def _module_available(module_path: str) -> bool: + """Check if a path is available in your environment. + + >>> _module_available('os') + True + >>> _module_available('bla.bla') + False + """ + try: + return find_spec(module_path) is not None + except AttributeError: + # Python 3.6 + return False + except ModuleNotFoundError: + # Python 3.7+ + return False + + +def _compare_version(package: str, op: Callable, version: str) -> Optional[bool]: + """Compare package version with some requirements. + + >>> import operator + >>> _compare_version("torch", operator.ge, "0.1") + True + >>> _compare_version("any_module", operator.ge, "0.0") # is None + """ + if not _module_available(package): + return None + try: + pkg = import_module(package) + pkg_version = pkg.__version__ # type: ignore + except (ModuleNotFoundError, DistributionNotFound): + return None + except AttributeError: + pkg_version = get_distribution(package).version + except ImportError: + # catches cyclic imports - the case with integrated libs + # see: https://stackoverflow.com/a/32965521 + pkg_version = get_distribution(package).version + try: + pkg_version = Version(pkg_version) + except TypeError: + # this is mock by sphinx, so it shall return True ro generate all summaries + return True + return op(pkg_version, Version(version)) + + +_TORCH_LOWER_1_4: Optional[bool] = False +_TORCH_LOWER_1_5: Optional[bool] = False +_TORCH_LOWER_1_6: Optional[bool] = False +_TORCH_GREATER_EQUAL_1_6: Optional[bool] = True +_TORCH_GREATER_EQUAL_1_7: Optional[bool] = True + +_LIGHTNING_AVAILABLE: bool = False +_JIWER_AVAILABLE: bool = _module_available("jiwer") +_NLTK_AVAILABLE: bool = _module_available("nltk") +_ROUGE_SCORE_AVAILABLE: bool = _module_available("rouge_score") +_BERTSCORE_AVAILABLE: bool = _module_available("bert_score") +_SCIPY_AVAILABLE: bool = _module_available("scipy") +_TORCH_FIDELITY_AVAILABLE: bool = _module_available("torch_fidelity") +_LPIPS_AVAILABLE: bool = _module_available("lpips") +_TQDM_AVAILABLE: bool = _module_available("tqdm") +_TRANSFORMERS_AVAILABLE: bool = _module_available("transformers") +_PESQ_AVAILABLE: bool = _module_available("pesq") +_SACREBLEU_AVAILABLE: bool = _module_available("sacrebleu") +_REGEX_AVAILABLE: bool = _module_available("regex") +_PYSTOI_AVAILABLE: bool = _module_available("pystoi") diff --git a/RE/paddlemetric/src/paddlemetrics/utilities/prints.py b/RE/paddlemetric/src/paddlemetrics/utilities/prints.py new file mode 100644 index 00000000..ff4b1b35 --- /dev/null +++ b/RE/paddlemetric/src/paddlemetrics/utilities/prints.py @@ -0,0 +1,49 @@ +# Copyright The PyTorch Lightning team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import os +import warnings +from functools import wraps +from typing import Any, Callable + +from paddlemetrics import _logger as log + + +def rank_zero_only(fn: Callable) -> Callable: + @wraps(fn) + def wrapped_fn(*args: Any, **kwargs: Any) -> Any: + if rank_zero_only.rank == 0: # type: ignore + return fn(*args, **kwargs) + + return wrapped_fn + + +# add the attribute to the function but don't overwrite in case Trainer has already set it +rank_zero_only.rank = getattr(rank_zero_only, "rank", int(os.environ.get("LOCAL_RANK", 0))) # type: ignore + + +def _warn(*args: Any, **kwargs: Any) -> None: + warnings.warn(*args, **kwargs) + + +def _info(*args: Any, **kwargs: Any) -> None: + log.info(*args, **kwargs) + + +def _debug(*args: Any, **kwargs: Any) -> None: + log.debug(*args, **kwargs) + + +rank_zero_debug = rank_zero_only(_debug) +rank_zero_info = rank_zero_only(_info) +rank_zero_warn = rank_zero_only(_warn) diff --git a/RE/paddlemetric/src/paddlemetrics/wrappers/__init__.py b/RE/paddlemetric/src/paddlemetrics/wrappers/__init__.py new file mode 100644 index 00000000..d74928f6 --- /dev/null +++ b/RE/paddlemetric/src/paddlemetrics/wrappers/__init__.py @@ -0,0 +1,16 @@ +# Copyright The PyTorch Lightning team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from paddlemetrics.wrappers.bootstrapping import BootStrapper # noqa: F401 +from paddlemetrics.wrappers.multioutput import MultioutputWrapper # noqa: F401 +from paddlemetrics.wrappers.tracker import MetricTracker # noqa: F401 diff --git a/RE/paddlemetric/src/paddlemetrics/wrappers/bootstrapping.py b/RE/paddlemetric/src/paddlemetrics/wrappers/bootstrapping.py new file mode 100644 index 00000000..6a3e7b16 --- /dev/null +++ b/RE/paddlemetric/src/paddlemetrics/wrappers/bootstrapping.py @@ -0,0 +1,173 @@ +# Copyright The PyTorch Lightning team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from copy import deepcopy +from typing import Any, Callable, Dict, Optional, Union + +import paddleext.torchapi as B +from paddleext.torchapi import Tensor, nn + +from paddlemetrics.metric import Metric +from paddlemetrics.utilities import apply_to_collection +from paddlemetrics.utilities.imports import _TORCH_GREATER_EQUAL_1_7 + + +def _bootstrap_sampler( + size: int, + sampling_strategy: str = "poisson", +) -> Tensor: + """Resample a tensor along its first dimension with replacement + Args: + size: number of samples + sampling_strategy: the strategy to use for sampling, either ``'poisson'`` or ``'multinomial'`` + generator: a instance of ``B.Generator`` that controls the sampling + + Returns: + resampled tensor + + """ + if sampling_strategy == "poisson": + p = B.distributions.Poisson(1) + n = p.sample((size,)) + return B.arange(size).repeat_interleave(n.long(), dim=0) + if sampling_strategy == "multinomial": + idx = B.multinomial(B.ones(size), num_samples=size, replacement=True) + return idx + raise ValueError("Unknown sampling strategy") + + +class BootStrapper(Metric): + def __init__( + self, + base_metric: Metric, + num_bootstraps: int = 10, + mean: bool = True, + std: bool = True, + quantile: Optional[Union[float, Tensor]] = None, + raw: bool = False, + sampling_strategy: str = "poisson", + compute_on_step: bool = True, + dist_sync_on_step: bool = False, + process_group: Optional[Any] = None, + dist_sync_fn: Callable = None, + ) -> None: + r""" + Using `Turn a Metric into a Bootstrapped`_ + That can automate the process of getting confidence intervals for metric values. This wrapper + class basically keeps multiple copies of the same base metric in memory and whenever ``update`` or + ``forward`` is called, all input tensors are resampled (with replacement) along the first dimension. + + Args: + base_metric: + base metric class to wrap + num_bootstraps: + number of copies to make of the base metric for bootstrapping + mean: + if ``True`` return the mean of the bootstraps + std: + if ``True`` return the standard diviation of the bootstraps + quantile: + if given, returns the quantile of the bootstraps. Can only be used with + pytorch version 1.6 or higher + raw: + if ``True``, return all bootstrapped values + sampling_strategy: + Determines how to produce bootstrapped samplings. Either ``'poisson'`` or ``multinomial``. + If ``'possion'`` is chosen, the number of times each sample will be included in the bootstrap + will be given by :math:`n\sim Poisson(\lambda=1)`, which approximates the true bootstrap distribution + when the number of samples is large. If ``'multinomial'`` is chosen, we will apply true bootstrapping + at the batch level to approximate bootstrapping over the hole dataset. + compute_on_step: + Forward only calls ``update()`` and return ``None`` if this is set to ``False``. + dist_sync_on_step: + Synchronize metric state across processes at each ``forward()`` + before returning the value at the step + process_group: + Specify the process group on which synchronization is called. + default: ``None`` (which selects the entire world) + dist_sync_fn: + Callback that performs the allgather operation on the metric state. When ``None``, DDP + will be used to perform the allgather. + + Example:: + >>> from pprint import pprint + >>> from paddlemetrics import Accuracy, BootStrapper + >>> _ = B.manual_seed(123) + >>> base_metric = Accuracy() + >>> bootstrap = BootStrapper(base_metric, num_bootstraps=20) + >>> bootstrap.update(B.randint(5, (20,)), B.randint(5, (20,))) + >>> output = bootstrap.compute() + >>> pprint(output) + {'mean': tensor(0.2205), 'std': tensor(0.0859)} + + """ + super().__init__(compute_on_step, dist_sync_on_step, process_group, dist_sync_fn) + if not isinstance(base_metric, Metric): + raise ValueError( + "Expected base metric to be an instance of paddlemetrics.Metric" f" but received {base_metric}" + ) + + self.metrics = nn.ModuleList([deepcopy(base_metric) for _ in range(num_bootstraps)]) + self.num_bootstraps = num_bootstraps + + self.mean = mean + self.std = std + if quantile is not None and not _TORCH_GREATER_EQUAL_1_7: + raise ValueError("quantile argument can only be used with pytorch v1.7 or higher") + self.quantile = quantile + self.raw = raw + + allowed_sampling = ("poisson", "multinomial") + if sampling_strategy not in allowed_sampling: + raise ValueError( + f"Expected argument ``sampling_strategy`` to be one of {allowed_sampling}" + f" but recieved {sampling_strategy}" + ) + self.sampling_strategy = sampling_strategy + + def update(self, *args: Any, **kwargs: Any) -> None: + """Updates the state of the base metric. + + Any tensor passed in will be bootstrapped along dimension 0 + """ + for idx in range(self.num_bootstraps): + args_sizes = apply_to_collection(args, Tensor, len) + kwargs_sizes = list(apply_to_collection(kwargs, Tensor, len)) + if len(args_sizes) > 0: + size = args_sizes[0] + elif len(kwargs_sizes) > 0: + size = kwargs_sizes[0] + else: + raise ValueError("None of the input contained tensors, so could not determine the sampling size") + sample_idx = _bootstrap_sampler(size, sampling_strategy=self.sampling_strategy).to(self.device) + new_args = apply_to_collection(args, Tensor, B.index_select, dim=0, index=sample_idx) + new_kwargs = apply_to_collection(kwargs, Tensor, B.index_select, dim=0, index=sample_idx) + self.metrics[idx].update(*new_args, **new_kwargs) + + def compute(self) -> Dict[str, Tensor]: + """Computes the bootstrapped metric values. + + Allways returns a dict of tensors, which can contain the following keys: ``mean``, ``std``, ``quantile`` and + ``raw`` depending on how the class was initialized + """ + computed_vals = B.stack([m.compute() for m in self.metrics], dim=0) + output_dict = {} + if self.mean: + output_dict["mean"] = computed_vals.mean(dim=0) + if self.std: + output_dict["std"] = computed_vals.std(dim=0) + if self.quantile is not None: + output_dict["quantile"] = B.quantile(computed_vals, self.quantile) + if self.raw: + output_dict["raw"] = computed_vals + return output_dict diff --git a/RE/paddlemetric/src/paddlemetrics/wrappers/multioutput.py b/RE/paddlemetric/src/paddlemetrics/wrappers/multioutput.py new file mode 100644 index 00000000..789445be --- /dev/null +++ b/RE/paddlemetric/src/paddlemetrics/wrappers/multioutput.py @@ -0,0 +1,165 @@ +from copy import deepcopy +from typing import Any, Callable, List, Optional, Tuple + +import paddleext.torchapi as B +from paddleext.torchapi import nn + +from paddlemetrics import Metric +from paddlemetrics.utilities import apply_to_collection + + +def _get_nan_indices(*tensors: B.Tensor) -> B.Tensor: + """Get indices of rows along dim 0 which have NaN values.""" + if len(tensors) == 0: + raise ValueError("Must pass at least one tensor as argument") + sentinel = tensors[0] + nan_idxs = B.zeros(len(sentinel), dtype=B.bool, device=sentinel.device) + for tensor in tensors: + permuted_tensor = tensor.flatten(start_dim=1) + nan_idxs |= B.any(B.isnan(permuted_tensor), dim=1) + return nan_idxs + + +class MultioutputWrapper(Metric): + """Wrap a base metric to enable it to support multiple outputs. + + Several paddlemetrics metrics, such as :class:`paddlemetrics.regression.spearman.SpearmanCorrcoef` lack support for + multioutput mode. This class wraps such metrics to support computing one metric per output. + Unlike specific torchmetric metrics, it doesn't support any aggregation across outputs. + This means if you set `num_outputs` to 2, `compute()` will return a Tensor of dimension + (2, ...) where ... represents the dimensions the metric returns when not wrapped. + + In addition to enabling multioutput support for metrics that lack it, this class also supports, albeit in a crude + fashion, dealing with missing labels (or other data). When ``remove_nans`` is passed, the class will remove the + intersection of NaN containing "rows" upon each update for each output. For example, suppose a user uses + `MultioutputWrapper` to wrap :class:`paddlemetrics.regression.r2.R2Score` with 2 outputs, one of which occasionally + has missing labels for classes like ``R2Score`` is that this class supports removing NaN values + (parameter ``remove_nans``) on a per-output basis. When ``remove_nans`` is passed the wrapper will remove all rows + + Args: + base_metric: + Metric being wrapped. + num_outputs: + Expected dimensionality of the output dimension. This parameter is + used to determine the number of distinct metrics we need to track. + output_dim: + Dimension on which output is expected. Note that while this provides some flexibility, the output dimension + must be the same for all inputs to update. This applies even for metrics such as `Accuracy` where the labels + can have a different number of dimensions than the predictions. This can be worked around if the output + dimension can be set to -1 for both, even if -1 corresponds to different dimensions in different inputs. + remove_nans: + Whether to remove the intersection of rows containing NaNs from the values passed through to each underlying + metric. Proper operation requires all tensors passed to update to have dimension `(N, ...)` where N + represents the length of the batch or dataset being passed in. + squeeze_outputs: + If true, will squeeze the 1-item dimensions left after `index_select` is applied. + This is sometimes unnecessary but harmless for metrics such as `R2Score` but useful + for certain classification metrics that can't handle additional 1-item dimensions. + compute_on_step: + Whether to recompute the metric value on each update step. + dist_sync_on_step: + Required for distributed training support. + process_group: + Specify the process group on which synchronization is called. + The default: None (which selects the entire world) + dist_sync_fn: + Required for distributed training support. + + Example: + + >>> # Mimic R2Score in `multioutput`, `raw_values` mode: + >>> import torchapi as B + >>> from paddlemetrics import MultioutputWrapper, R2Score + >>> target = B.tensor([[0.5, 1], [-1, 1], [7, -6]]) + >>> preds = B.tensor([[0, 2], [-1, 2], [8, -5]]) + >>> r2score = MultioutputWrapper(R2Score(), 2) + >>> r2score(preds, target) + [tensor(0.9654), tensor(0.9082)] + >>> # Classification metric where prediction and label tensors have different shapes. + >>> from paddlemetrics import BinnedAveragePrecision + >>> target = B.tensor([[1, 2], [2, 0], [1, 2]]) + >>> preds = B.tensor([ + ... [[.1, .8], [.8, .05], [.1, .15]], + ... [[.1, .1], [.2, .3], [.7, .6]], + ... [[.002, .4], [.95, .45], [.048, .15]] + ... ]) + >>> binned_avg_precision = MultioutputWrapper(BinnedAveragePrecision(3, thresholds=5), 2) + >>> binned_avg_precision(preds, target) + [[tensor(-0.), tensor(1.0000), tensor(1.0000)], [tensor(0.3333), tensor(-0.), tensor(0.6667)]] + """ + + is_differentiable = False + + def __init__( + self, + base_metric: Metric, + num_outputs: int, + output_dim: int = -1, + remove_nans: bool = True, + squeeze_outputs: bool = True, + compute_on_step: bool = True, + dist_sync_on_step: bool = False, + process_group: Optional[Any] = None, + dist_sync_fn: Callable = None, + ): + super().__init__( + compute_on_step=compute_on_step, + dist_sync_on_step=dist_sync_on_step, + process_group=process_group, + dist_sync_fn=dist_sync_fn, + ) + self.metrics = nn.ModuleList([deepcopy(base_metric) for _ in range(num_outputs)]) + self.output_dim = output_dim + self.remove_nans = remove_nans + self.squeeze_outputs = squeeze_outputs + + def _get_args_kwargs_by_output( + self, *args: B.Tensor, **kwargs: B.Tensor + ) -> List[Tuple[B.Tensor, B.Tensor]]: + """Get args and kwargs reshaped to be output-specific and (maybe) having NaNs stripped out.""" + args_kwargs_by_output = [] + for i in range(len(self.metrics)): + selected_args = apply_to_collection( + args, B.Tensor, B.index_select, dim=self.output_dim, index=B.tensor(i, device=self.device) + ) + selected_kwargs = apply_to_collection( + kwargs, B.Tensor, B.index_select, dim=self.output_dim, index=B.tensor(i, device=self.device) + ) + if self.remove_nans: + args_kwargs = selected_args + tuple(selected_kwargs.values()) + nan_idxs = _get_nan_indices(*args_kwargs) + selected_args = [arg[~nan_idxs] for arg in selected_args] + selected_kwargs = {k: v[~nan_idxs] for k, v in selected_kwargs.items()} + + if self.squeeze_outputs: + selected_args = [arg.squeeze(self.output_dim) for arg in selected_args] + args_kwargs_by_output.append((selected_args, selected_kwargs)) + return args_kwargs_by_output + + def update(self, *args: Any, **kwargs: Any) -> None: + """Update each underlying metric with the corresponding output.""" + reshaped_args_kwargs = self._get_args_kwargs_by_output(*args, **kwargs) + for metric, (selected_args, selected_kwargs) in zip(self.metrics, reshaped_args_kwargs): + metric.update(*selected_args, **selected_kwargs) + + def compute(self) -> List[B.Tensor]: + """Compute metrics.""" + return [m.compute() for m in self.metrics] + + def forward(self, *args: Any, **kwargs: Any) -> Any: + """Call underlying forward methods and aggregate the results if they're non-null. + + We override this method to ensure that state variables get copied over on the underlying metrics. + """ + results = [] + reshaped_args_kwargs = self._get_args_kwargs_by_output(*args, **kwargs) + for metric, (selected_args, selected_kwargs) in zip(self.metrics, reshaped_args_kwargs): + results.append(metric(*selected_args, **selected_kwargs)) + if results[0] is None: + return None + return results + + def reset(self) -> None: + """Reset all underlying metrics.""" + for metric in self.metrics: + metric.reset() diff --git a/RE/paddlemetric/src/paddlemetrics/wrappers/tracker.py b/RE/paddlemetric/src/paddlemetrics/wrappers/tracker.py new file mode 100644 index 00000000..b2b939d9 --- /dev/null +++ b/RE/paddlemetric/src/paddlemetrics/wrappers/tracker.py @@ -0,0 +1,127 @@ +# Copyright The PyTorch Lightning team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from copy import deepcopy +from typing import Any, Tuple, Union + +import paddleext.torchapi as B +from paddleext.torchapi import Tensor, nn + +from paddlemetrics.metric import Metric + + +class MetricTracker(nn.ModuleList): + """A wrapper class that can help keeping track of a metric over time and implement useful methods. The wrapper + implements the standard `update`, `compute`, `reset` methods that just calls corresponding method of the + currently tracked metric. However, the following additional methods are provided: + + -``MetricTracker.n_steps``: number of metrics being tracked + + -``MetricTracker.increment()``: initialize a new metric for being tracked + + -``MetricTracker.compute_all()``: get the metric value for all steps + + -``MetricTracker.best_metric()``: returns the best value + + Args: + metric: instance of a torchmetric modular to keep track of at each timestep. + maximize: bool indicating if higher metric values are better (`True`) or lower + is better (`False`) + + Example: + + >>> from paddlemetrics import Accuracy, MetricTracker + >>> _ = B.manual_seed(42) + >>> tracker = MetricTracker(Accuracy(num_classes=10)) + >>> for epoch in range(5): + ... tracker.increment() + ... for batch_idx in range(5): + ... preds, target = B.randint(10, (100,)), B.randint(10, (100,)) + ... tracker.update(preds, target) + ... print(f"current acc={tracker.compute()}") # doctest: +NORMALIZE_WHITESPACE + current acc=0.1120000034570694 + current acc=0.08799999952316284 + current acc=0.12600000202655792 + current acc=0.07999999821186066 + current acc=0.10199999809265137 + >>> best_acc, which_epoch = tracker.best_metric(return_step=True) + >>> tracker.compute_all() + tensor([0.1120, 0.0880, 0.1260, 0.0800, 0.1020]) + """ + + def __init__(self, metric: Metric, maximize: bool = True) -> None: + super().__init__() + if not isinstance(metric, Metric): + raise TypeError("metric arg need to be an instance of a paddlemetrics metric" f" but got {metric}") + self._base_metric = metric + self.maximize = maximize + + self._increment_called = False + + @property + def n_steps(self) -> int: + """Returns the number of times the tracker has been incremented.""" + return len(self) - 1 # subtract the base metric + + def increment(self) -> None: + """Creates a new instace of the input metric that will be updated next.""" + self._increment_called = True + self.append(deepcopy(self._base_metric)) + + def forward(self, *args, **kwargs) -> None: # type: ignore + """Calls forward of the current metric being tracked.""" + self._check_for_increment("forward") + return self[-1](*args, **kwargs) + + def update(self, *args, **kwargs) -> None: # type: ignore + """Updates the current metric being tracked.""" + self._check_for_increment("update") + self[-1].update(*args, **kwargs) + + def compute(self) -> Any: + """Call compute of the current metric being tracked.""" + self._check_for_increment("compute") + return self[-1].compute() + + def compute_all(self) -> Tensor: + """Compute the metric value for all tracked metrics.""" + self._check_for_increment("compute_all") + return B.stack([metric.compute() for i, metric in enumerate(self) if i != 0], dim=0) + + def reset(self) -> None: + """Resets the current metric being tracked.""" + self[-1].reset() + + def reset_all(self) -> None: + """Resets all metrics being tracked.""" + for metric in self: + metric.reset() + + def best_metric(self, return_step: bool = False) -> Union[float, Tuple[int, float]]: + """Returns the highest metric out of all tracked. + + Args: + return_step: If `True` will also return the step with the highest metric value. + + Returns: + The best metric value, and optionally the timestep. + """ + fn = B.max if self.maximize else B.min + idx, max = fn(self.compute_all(), 0) + if return_step: + return idx.item(), max.item() + return max.item() + + def _check_for_increment(self, method: str) -> None: + if not self._increment_called: + raise ValueError(f"`{method}` cannot be called before `.increment()` has been called") diff --git a/RE/paddlemetric/src/setup.py b/RE/paddlemetric/src/setup.py new file mode 100644 index 00000000..8d7dd2d0 --- /dev/null +++ b/RE/paddlemetric/src/setup.py @@ -0,0 +1,39 @@ +#!/usr/bin/env python +# -*- coding: UTF-8 -*- +################################################################################ +# +# Copyright (c) 2020 Baidu.com, Inc. All Rights Reserved +# +################################################################################ +""" +Setup script. + +Authors: sunmingming01(sunmingming01@baidu.com) +Date: 2020/12/31 12:33:34 +""" + +from setuptools import setup, find_packages + +with open('README.md') as readme_file: + README = readme_file.read() + +setup_args = dict( + name='paddlemetrics', + version='1.0.0-beta', + description='Metrics library for paddle, porting from torch metrics.', + long_description_content_type="text/markdown", + long_description=README, + license='Apache', + packages=find_packages(include=["paddlemetrics", "paddlemetrics.*"]), + author='Mingming Sun', + author_email='sunmingming01@baidu.com', + keywords=['Deep Learning', 'Paddlepaddle'], + url='', + download_url='' +) + +install_requires = [ +] + +if __name__ == '__main__': + setup(**setup_args, install_requires=install_requires) \ No newline at end of file diff --git a/RE/paddlemetric/src/tests/__init__.py b/RE/paddlemetric/src/tests/__init__.py new file mode 100644 index 00000000..b56a9064 --- /dev/null +++ b/RE/paddlemetric/src/tests/__init__.py @@ -0,0 +1 @@ +from tests.helpers.testers import BATCH_SIZE, NUM_BATCHES, NUM_PROCESSES, DummyMetric, MetricTester # noqa: F401 diff --git a/RE/paddlemetric/src/tests/audio/__init__.py b/RE/paddlemetric/src/tests/audio/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/RE/paddlemetric/src/tests/audio/examples/audio_speech.wav b/RE/paddlemetric/src/tests/audio/examples/audio_speech.wav new file mode 100644 index 00000000..0fa4e9e7 Binary files /dev/null and b/RE/paddlemetric/src/tests/audio/examples/audio_speech.wav differ diff --git a/RE/paddlemetric/src/tests/audio/examples/audio_speech_bab_0dB.wav b/RE/paddlemetric/src/tests/audio/examples/audio_speech_bab_0dB.wav new file mode 100644 index 00000000..1bed1071 Binary files /dev/null and b/RE/paddlemetric/src/tests/audio/examples/audio_speech_bab_0dB.wav differ diff --git a/RE/paddlemetric/src/tests/audio/test_pesq.py b/RE/paddlemetric/src/tests/audio/test_pesq.py new file mode 100644 index 00000000..33ca39c3 --- /dev/null +++ b/RE/paddlemetric/src/tests/audio/test_pesq.py @@ -0,0 +1,138 @@ +# Copyright The PyTorch Lightning team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from collections import namedtuple +from functools import partial + +import pytest +import paddleext.torchapi as B +from pesq import pesq as pesq_backend +from paddleext.torchapi import Tensor + +from tests.helpers import seed_all +from tests.helpers.testers import MetricTester +from paddlemetrics.audio import PESQ +from paddlemetrics.functional import pesq +from paddlemetrics.utilities.imports import _TORCH_GREATER_EQUAL_1_6 + +seed_all(42) + +Input = namedtuple("Input", ["preds", "target"]) + +# for 8k sample rate, need at least 8k/4=2000 samples +inputs_8k = Input( + preds=B.rand(2, 3, 2100), + target=B.rand(2, 3, 2100), +) +# for 16k sample rate, need at least 16k/4=4000 samples +inputs_16k = Input( + preds=B.rand(2, 3, 4100), + target=B.rand(2, 3, 4100), +) + + +def pesq_original_batch(preds: Tensor, target: Tensor, fs: int, mode: str): + # shape: preds [BATCH_SIZE, Time] , target [BATCH_SIZE, Time] + # or shape: preds [NUM_BATCHES*BATCH_SIZE, Time] , target [NUM_BATCHES*BATCH_SIZE, Time] + target = target.detach().cpu().numpy() + preds = preds.detach().cpu().numpy() + mss = [] + for b in range(preds.shape[0]): + pesq_val = pesq_backend(fs, target[b, ...], preds[b, ...], mode) + mss.append(pesq_val) + return B.tensor(mss) + + +def average_metric(preds, target, metric_func): + # shape: preds [BATCH_SIZE, 1, Time] , target [BATCH_SIZE, 1, Time] + # or shape: preds [NUM_BATCHES*BATCH_SIZE, 1, Time] , target [NUM_BATCHES*BATCH_SIZE, 1, Time] + return metric_func(preds, target).mean() + + +pesq_original_batch_8k_nb = partial(pesq_original_batch, fs=8000, mode="nb") +pesq_original_batch_16k_nb = partial(pesq_original_batch, fs=16000, mode="nb") +pesq_original_batch_16k_wb = partial(pesq_original_batch, fs=16000, mode="wb") + + +@pytest.mark.parametrize( + "preds, target, sk_metric, fs, mode", + [ + (inputs_8k.preds, inputs_8k.target, pesq_original_batch_8k_nb, 8000, "nb"), + (inputs_16k.preds, inputs_16k.target, pesq_original_batch_16k_nb, 16000, "nb"), + (inputs_16k.preds, inputs_16k.target, pesq_original_batch_16k_wb, 16000, "wb"), + ], +) +class TestPESQ(MetricTester): + atol = 1e-2 + + @pytest.mark.parametrize("ddp", [True, False]) + @pytest.mark.parametrize("dist_sync_on_step", [True, False]) + def test_pesq(self, preds, target, sk_metric, fs, mode, ddp, dist_sync_on_step): + self.run_class_metric_test( + ddp, + preds, + target, + PESQ, + sk_metric=partial(average_metric, metric_func=sk_metric), + dist_sync_on_step=dist_sync_on_step, + metric_args=dict(fs=fs, mode=mode), + ) + + def test_pesq_functional(self, preds, target, sk_metric, fs, mode): + self.run_functional_metric_test( + preds, + target, + pesq, + sk_metric, + metric_args=dict(fs=fs, mode=mode), + ) + + def test_pesq_differentiability(self, preds, target, sk_metric, fs, mode): + self.run_differentiability_test( + preds=preds, target=target, metric_module=PESQ, metric_functional=pesq, metric_args=dict(fs=fs, mode=mode) + ) + + @pytest.mark.skipif( + not _TORCH_GREATER_EQUAL_1_6, reason="half support of core operations on not support before pytorch v1.6" + ) + def test_pesq_half_cpu(self, preds, target, sk_metric, fs, mode): + pytest.xfail("PESQ metric does not support cpu + half precision") + + @pytest.mark.skipif(not B.cuda.is_available(), reason="test requires cuda") + def test_pesq_half_gpu(self, preds, target, sk_metric, fs, mode): + self.run_precision_test_gpu( + preds=preds, + target=target, + metric_module=PESQ, + metric_functional=partial(pesq, fs=fs, mode=mode), + metric_args=dict(fs=fs, mode=mode), + ) + + +def test_error_on_different_shape(metric_class=PESQ): + metric = metric_class(16000, "nb") + with pytest.raises(RuntimeError, match="Predictions and targets are expected to have the same shape"): + metric(B.randn(100), B.randn(50)) + + +def test_on_real_audio(): + import os + + from scipy.io import wavfile + + current_file_dir = os.path.dirname(__file__) + + rate, ref = wavfile.read(os.path.join(current_file_dir, "examples/audio_speech.wav")) + rate, deg = wavfile.read(os.path.join(current_file_dir, "examples/audio_speech_bab_0dB.wav")) + assert pesq(B.from_numpy(deg), B.from_numpy(ref), rate, "wb") == 1.0832337141036987 + assert pesq(B.from_numpy(deg), B.from_numpy(ref), rate, "nb") == 1.6072081327438354 diff --git a/RE/paddlemetric/src/tests/audio/test_pit.py b/RE/paddlemetric/src/tests/audio/test_pit.py new file mode 100644 index 00000000..54d2b5ac --- /dev/null +++ b/RE/paddlemetric/src/tests/audio/test_pit.py @@ -0,0 +1,196 @@ +# Copyright The PyTorch Lightning team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from collections import namedtuple +from functools import partial +from typing import Callable, Tuple + +import numpy as np +import pytest +import paddleext.torchapi as B +from scipy.optimize import linear_sum_assignment +from paddleext.torchapi import Tensor + +from tests.helpers import seed_all +from tests.helpers.testers import BATCH_SIZE, NUM_BATCHES, MetricTester +from paddlemetrics.audio import PIT +from paddlemetrics.functional import pit, si_sdr, snr +from paddlemetrics.utilities.imports import _TORCH_GREATER_EQUAL_1_6 + +seed_all(42) + +TIME = 10 + +Input = namedtuple("Input", ["preds", "target"]) + +# three speaker examples to test _find_best_perm_by_linear_sum_assignment +inputs1 = Input( + preds=B.rand(NUM_BATCHES, BATCH_SIZE, 3, TIME), + target=B.rand(NUM_BATCHES, BATCH_SIZE, 3, TIME), +) +# two speaker examples to test _find_best_perm_by_exhuastive_method +inputs2 = Input( + preds=B.rand(NUM_BATCHES, BATCH_SIZE, 2, TIME), + target=B.rand(NUM_BATCHES, BATCH_SIZE, 2, TIME), +) + + +def naive_implementation_pit_scipy( + preds: Tensor, + target: Tensor, + metric_func: Callable, + eval_func: str, +) -> Tuple[Tensor, Tensor]: + """A naive implementation of `Permutation Invariant Training` based on Scipy. + + Args: + preds: predictions, shape[batch, spk, time] + target: targets, shape[batch, spk, time] + metric_func: which metric + eval_func: min or max + + Returns: + best_metric: + shape [batch] + best_perm: + shape [batch, spk] + """ + batch_size, spk_num = target.shape[0:2] + metric_mtx = B.empty((batch_size, spk_num, spk_num), device=target.device) + for t in range(spk_num): + for e in range(spk_num): + metric_mtx[:, t, e] = metric_func(preds[:, e, ...], target[:, t, ...]) + + # pit_r = PIT(metric_func, eval_func)(preds, target) + metric_mtx = metric_mtx.detach().cpu().numpy() + best_metrics = [] + best_perms = [] + for b in range(batch_size): + row_idx, col_idx = linear_sum_assignment(metric_mtx[b, ...], eval_func == "max") + best_metrics.append(metric_mtx[b, row_idx, col_idx].mean()) + best_perms.append(col_idx) + return B.from_numpy(np.stack(best_metrics)), B.from_numpy(np.stack(best_perms)) + + +def _average_metric(preds: Tensor, target: Tensor, metric_func: Callable) -> Tensor: + """average the metric values. + + Args: + preds: predictions, shape[batch, spk, time] + target: targets, shape[batch, spk, time] + metric_func: a function which return best_metric and best_perm + + Returns: + the average of best_metric + """ + return metric_func(preds, target)[0].mean() + + +snr_pit_scipy = partial(naive_implementation_pit_scipy, metric_func=snr, eval_func="max") +si_sdr_pit_scipy = partial(naive_implementation_pit_scipy, metric_func=si_sdr, eval_func="max") + + +@pytest.mark.parametrize( + "preds, target, sk_metric, metric_func, eval_func", + [ + (inputs1.preds, inputs1.target, snr_pit_scipy, snr, "max"), + (inputs1.preds, inputs1.target, si_sdr_pit_scipy, si_sdr, "max"), + (inputs2.preds, inputs2.target, snr_pit_scipy, snr, "max"), + (inputs2.preds, inputs2.target, si_sdr_pit_scipy, si_sdr, "max"), + ], +) +class TestPIT(MetricTester): + atol = 1e-2 + + @pytest.mark.parametrize("ddp", [True, False]) + @pytest.mark.parametrize("dist_sync_on_step", [True, False]) + def test_pit(self, preds, target, sk_metric, metric_func, eval_func, ddp, dist_sync_on_step): + self.run_class_metric_test( + ddp, + preds, + target, + PIT, + sk_metric=partial(_average_metric, metric_func=sk_metric), + dist_sync_on_step=dist_sync_on_step, + metric_args=dict(metric_func=metric_func, eval_func=eval_func), + ) + + def test_pit_functional(self, preds, target, sk_metric, metric_func, eval_func): + self.run_functional_metric_test( + preds=preds, + target=target, + metric_functional=pit, + sk_metric=sk_metric, + metric_args=dict(metric_func=metric_func, eval_func=eval_func), + ) + + def test_pit_differentiability(self, preds, target, sk_metric, metric_func, eval_func): + def pit_diff(preds, target, metric_func, eval_func): + return pit(preds, target, metric_func, eval_func)[0] + + self.run_differentiability_test( + preds=preds, + target=target, + metric_module=PIT, + metric_functional=pit_diff, + metric_args={"metric_func": metric_func, "eval_func": eval_func}, + ) + + @pytest.mark.skipif( + not _TORCH_GREATER_EQUAL_1_6, reason="half support of core operations on not support before pytorch v1.6" + ) + def test_pit_half_cpu(self, preds, target, sk_metric, metric_func, eval_func): + pytest.xfail("PIT metric does not support cpu + half precision") + + @pytest.mark.skipif(not B.cuda.is_available(), reason="test requires cuda") + def test_pit_half_gpu(self, preds, target, sk_metric, metric_func, eval_func): + self.run_precision_test_gpu( + preds=preds, + target=target, + metric_module=PIT, + metric_functional=partial(pit, metric_func=metric_func, eval_func=eval_func), + metric_args={"metric_func": metric_func, "eval_func": eval_func}, + ) + + +def test_error_on_different_shape() -> None: + metric = PIT(snr, "max") + with pytest.raises(RuntimeError, match="Predictions and targets are expected to have the same shape"): + metric(B.randn(3, 3, 10), B.randn(3, 2, 10)) + + +def test_error_on_wrong_eval_func() -> None: + metric = PIT(snr, "xxx") + with pytest.raises(ValueError, match='eval_func can only be "max" or "min"'): + metric(B.randn(3, 3, 10), B.randn(3, 3, 10)) + + +def test_error_on_wrong_shape() -> None: + metric = PIT(snr, "max") + with pytest.raises(ValueError, match="Inputs must be of shape *"): + metric(B.randn(3), B.randn(3)) + + +def test_consistency_of_two_implementations() -> None: + from paddlemetrics.functional.audio.pit import ( + _find_best_perm_by_exhuastive_method, + _find_best_perm_by_linear_sum_assignment, + ) + + shapes_test = [(5, 2, 2), (4, 3, 3), (4, 4, 4), (3, 5, 5)] + for shp in shapes_test: + metric_mtx = B.randn(size=shp) + bm1, bp1 = _find_best_perm_by_linear_sum_assignment(metric_mtx, B.max) + bm2, bp2 = _find_best_perm_by_exhuastive_method(metric_mtx, B.max) + assert B.allclose(bm1, bm2) + assert (bp1 == bp2).all() diff --git a/RE/paddlemetric/src/tests/audio/test_si_sdr.py b/RE/paddlemetric/src/tests/audio/test_si_sdr.py new file mode 100644 index 00000000..f7647b49 --- /dev/null +++ b/RE/paddlemetric/src/tests/audio/test_si_sdr.py @@ -0,0 +1,131 @@ +# Copyright The PyTorch Lightning team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from collections import namedtuple +from functools import partial + +import pytest +import speechmetrics +import paddleext.torchapi as B +from paddleext.torchapi import Tensor + +from tests.helpers import seed_all +from tests.helpers.testers import BATCH_SIZE, NUM_BATCHES, MetricTester +from paddlemetrics.audio import SI_SDR +from paddlemetrics.functional import si_sdr +from paddlemetrics.utilities.imports import _TORCH_GREATER_EQUAL_1_6 + +seed_all(42) + +Time = 100 + +Input = namedtuple("Input", ["preds", "target"]) + +inputs = Input( + preds=B.rand(NUM_BATCHES, BATCH_SIZE, 1, Time), + target=B.rand(NUM_BATCHES, BATCH_SIZE, 1, Time), +) + +speechmetrics_sisdr = speechmetrics.load("sisdr") + + +def speechmetrics_si_sdr(preds: Tensor, target: Tensor, zero_mean: bool): + # shape: preds [BATCH_SIZE, 1, Time] , target [BATCH_SIZE, 1, Time] + # or shape: preds [NUM_BATCHES*BATCH_SIZE, 1, Time] , target [NUM_BATCHES*BATCH_SIZE, 1, Time] + if zero_mean: + preds = preds - preds.mean(dim=2, keepdim=True) + target = target - target.mean(dim=2, keepdim=True) + target = target.detach().cpu().numpy() + preds = preds.detach().cpu().numpy() + mss = [] + for i in range(preds.shape[0]): + ms = [] + for j in range(preds.shape[1]): + metric = speechmetrics_sisdr(preds[i, j], target[i, j], rate=16000) + ms.append(metric["sisdr"][0]) + mss.append(ms) + return B.tensor(mss) + + +def average_metric(preds, target, metric_func): + # shape: preds [BATCH_SIZE, 1, Time] , target [BATCH_SIZE, 1, Time] + # or shape: preds [NUM_BATCHES*BATCH_SIZE, 1, Time] , target [NUM_BATCHES*BATCH_SIZE, 1, Time] + return metric_func(preds, target).mean() + + +speechmetrics_si_sdr_zero_mean = partial(speechmetrics_si_sdr, zero_mean=True) +speechmetrics_si_sdr_no_zero_mean = partial(speechmetrics_si_sdr, zero_mean=False) + + +@pytest.mark.parametrize( + "preds, target, sk_metric, zero_mean", + [ + (inputs.preds, inputs.target, speechmetrics_si_sdr_zero_mean, True), + (inputs.preds, inputs.target, speechmetrics_si_sdr_no_zero_mean, False), + ], +) +class TestSISDR(MetricTester): + atol = 1e-2 + + @pytest.mark.parametrize("ddp", [True, False]) + @pytest.mark.parametrize("dist_sync_on_step", [True, False]) + def test_si_sdr(self, preds, target, sk_metric, zero_mean, ddp, dist_sync_on_step): + self.run_class_metric_test( + ddp, + preds, + target, + SI_SDR, + sk_metric=partial(average_metric, metric_func=sk_metric), + dist_sync_on_step=dist_sync_on_step, + metric_args=dict(zero_mean=zero_mean), + ) + + def test_si_sdr_functional(self, preds, target, sk_metric, zero_mean): + self.run_functional_metric_test( + preds, + target, + si_sdr, + sk_metric, + metric_args=dict(zero_mean=zero_mean), + ) + + def test_si_sdr_differentiability(self, preds, target, sk_metric, zero_mean): + self.run_differentiability_test( + preds=preds, + target=target, + metric_module=SI_SDR, + metric_functional=si_sdr, + metric_args={"zero_mean": zero_mean}, + ) + + @pytest.mark.skipif( + not _TORCH_GREATER_EQUAL_1_6, reason="half support of core operations on not support before pytorch v1.6" + ) + def test_si_sdr_half_cpu(self, preds, target, sk_metric, zero_mean): + pytest.xfail("SI-SDR metric does not support cpu + half precision") + + @pytest.mark.skipif(not B.cuda.is_available(), reason="test requires cuda") + def test_si_sdr_half_gpu(self, preds, target, sk_metric, zero_mean): + self.run_precision_test_gpu( + preds=preds, + target=target, + metric_module=SI_SDR, + metric_functional=si_sdr, + metric_args={"zero_mean": zero_mean}, + ) + + +def test_error_on_different_shape(metric_class=SI_SDR): + metric = metric_class() + with pytest.raises(RuntimeError, match="Predictions and targets are expected to have the same shape"): + metric(B.randn(100), B.randn(50)) diff --git a/RE/paddlemetric/src/tests/audio/test_si_snr.py b/RE/paddlemetric/src/tests/audio/test_si_snr.py new file mode 100644 index 00000000..96745117 --- /dev/null +++ b/RE/paddlemetric/src/tests/audio/test_si_snr.py @@ -0,0 +1,112 @@ +# Copyright The PyTorch Lightning team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from collections import namedtuple +from functools import partial + +import pytest +import speechmetrics +import paddleext.torchapi as B +from paddleext.torchapi import Tensor + +from tests.helpers import seed_all +from tests.helpers.testers import BATCH_SIZE, NUM_BATCHES, MetricTester +from paddlemetrics.audio import SI_SNR +from paddlemetrics.functional import si_snr +from paddlemetrics.utilities.imports import _TORCH_GREATER_EQUAL_1_6 + +seed_all(42) + +Time = 100 + +Input = namedtuple("Input", ["preds", "target"]) + +inputs = Input( + preds=B.rand(NUM_BATCHES, BATCH_SIZE, 1, Time), + target=B.rand(NUM_BATCHES, BATCH_SIZE, 1, Time), +) + +speechmetrics_sisdr = speechmetrics.load("sisdr") + + +def speechmetrics_si_sdr(preds: Tensor, target: Tensor, zero_mean: bool = True): + # shape: preds [BATCH_SIZE, 1, Time] , target [BATCH_SIZE, 1, Time] + # or shape: preds [NUM_BATCHES*BATCH_SIZE, 1, Time] , target [NUM_BATCHES*BATCH_SIZE, 1, Time] + if zero_mean: + preds = preds - preds.mean(dim=2, keepdim=True) + target = target - target.mean(dim=2, keepdim=True) + target = target.detach().cpu().numpy() + preds = preds.detach().cpu().numpy() + mss = [] + for i in range(preds.shape[0]): + ms = [] + for j in range(preds.shape[1]): + metric = speechmetrics_sisdr(preds[i, j], target[i, j], rate=16000) + ms.append(metric["sisdr"][0]) + mss.append(ms) + return B.tensor(mss) + + +def average_metric(preds, target, metric_func): + # shape: preds [BATCH_SIZE, 1, Time] , target [BATCH_SIZE, 1, Time] + # or shape: preds [NUM_BATCHES*BATCH_SIZE, 1, Time] , target [NUM_BATCHES*BATCH_SIZE, 1, Time] + return metric_func(preds, target).mean() + + +@pytest.mark.parametrize( + "preds, target, sk_metric", + [ + (inputs.preds, inputs.target, speechmetrics_si_sdr), + ], +) +class TestSISNR(MetricTester): + atol = 1e-2 + + @pytest.mark.parametrize("ddp", [True, False]) + @pytest.mark.parametrize("dist_sync_on_step", [True, False]) + def test_si_snr(self, preds, target, sk_metric, ddp, dist_sync_on_step): + self.run_class_metric_test( + ddp, + preds, + target, + SI_SNR, + sk_metric=partial(average_metric, metric_func=sk_metric), + dist_sync_on_step=dist_sync_on_step, + ) + + def test_si_snr_functional(self, preds, target, sk_metric): + self.run_functional_metric_test( + preds, + target, + si_snr, + sk_metric, + ) + + def test_si_snr_differentiability(self, preds, target, sk_metric): + self.run_differentiability_test(preds=preds, target=target, metric_module=SI_SNR, metric_functional=si_snr) + + @pytest.mark.skipif( + not _TORCH_GREATER_EQUAL_1_6, reason="half support of core operations on not support before pytorch v1.6" + ) + def test_si_snr_half_cpu(self, preds, target, sk_metric): + pytest.xfail("SI-SNR metric does not support cpu + half precision") + + @pytest.mark.skipif(not B.cuda.is_available(), reason="test requires cuda") + def test_si_snr_half_gpu(self, preds, target, sk_metric): + self.run_precision_test_gpu(preds=preds, target=target, metric_module=SI_SNR, metric_functional=si_snr) + + +def test_error_on_different_shape(metric_class=SI_SNR): + metric = metric_class() + with pytest.raises(RuntimeError, match="Predictions and targets are expected to have the same shape"): + metric(B.randn(100), B.randn(50)) diff --git a/RE/paddlemetric/src/tests/audio/test_snr.py b/RE/paddlemetric/src/tests/audio/test_snr.py new file mode 100644 index 00000000..86d28837 --- /dev/null +++ b/RE/paddlemetric/src/tests/audio/test_snr.py @@ -0,0 +1,125 @@ +# Copyright The PyTorch Lightning team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from collections import namedtuple +from functools import partial +from typing import Callable + +import pytest +import paddleext.torchapi as B +from mir_eval.separation import bss_eval_images as mir_eval_bss_eval_images +from paddleext.torchapi import Tensor + +from tests.helpers import seed_all +from tests.helpers.testers import BATCH_SIZE, NUM_BATCHES, MetricTester +from paddlemetrics.audio import SNR +from paddlemetrics.functional import snr +from paddlemetrics.utilities.imports import _TORCH_GREATER_EQUAL_1_6 + +seed_all(42) + +Time = 100 + +Input = namedtuple("Input", ["preds", "target"]) + +inputs = Input( + preds=B.rand(NUM_BATCHES, BATCH_SIZE, 1, Time), + target=B.rand(NUM_BATCHES, BATCH_SIZE, 1, Time), +) + + +def bss_eval_images_snr(preds: Tensor, target: Tensor, metric_func: Callable, zero_mean: bool): + # shape: preds [BATCH_SIZE, 1, Time] , target [BATCH_SIZE, 1, Time] + # or shape: preds [NUM_BATCHES*BATCH_SIZE, 1, Time] , target [NUM_BATCHES*BATCH_SIZE, 1, Time] + if zero_mean: + target = target - B.mean(target, dim=-1, keepdim=True) + preds = preds - B.mean(preds, dim=-1, keepdim=True) + target = target.detach().cpu().numpy() + preds = preds.detach().cpu().numpy() + mss = [] + for i in range(preds.shape[0]): + ms = [] + for j in range(preds.shape[1]): + if metric_func == mir_eval_bss_eval_images: + snr_v = metric_func([target[i, j]], [preds[i, j]])[0][0] + else: + snr_v = metric_func([target[i, j]], [preds[i, j]])[0][0][0] + ms.append(snr_v) + mss.append(ms) + return B.tensor(mss) + + +def average_metric(preds: Tensor, target: Tensor, metric_func: Callable): + # shape: preds [BATCH_SIZE, 1, Time] , target [BATCH_SIZE, 1, Time] + # or shape: preds [NUM_BATCHES*BATCH_SIZE, 1, Time] , target [NUM_BATCHES*BATCH_SIZE, 1, Time] + return metric_func(preds, target).mean() + + +mireval_snr_zeromean = partial(bss_eval_images_snr, metric_func=mir_eval_bss_eval_images, zero_mean=True) +mireval_snr_nozeromean = partial(bss_eval_images_snr, metric_func=mir_eval_bss_eval_images, zero_mean=False) + + +@pytest.mark.parametrize( + "preds, target, sk_metric, zero_mean", + [ + (inputs.preds, inputs.target, mireval_snr_zeromean, True), + (inputs.preds, inputs.target, mireval_snr_nozeromean, False), + ], +) +class TestSNR(MetricTester): + atol = 1e-2 + + @pytest.mark.parametrize("ddp", [True, False]) + @pytest.mark.parametrize("dist_sync_on_step", [True, False]) + def test_snr(self, preds, target, sk_metric, zero_mean, ddp, dist_sync_on_step): + self.run_class_metric_test( + ddp, + preds, + target, + SNR, + sk_metric=partial(average_metric, metric_func=sk_metric), + dist_sync_on_step=dist_sync_on_step, + metric_args=dict(zero_mean=zero_mean), + ) + + def test_snr_functional(self, preds, target, sk_metric, zero_mean): + self.run_functional_metric_test( + preds, + target, + snr, + sk_metric, + metric_args=dict(zero_mean=zero_mean), + ) + + def test_snr_differentiability(self, preds, target, sk_metric, zero_mean): + self.run_differentiability_test( + preds=preds, target=target, metric_module=SNR, metric_functional=snr, metric_args={"zero_mean": zero_mean} + ) + + @pytest.mark.skipif( + not _TORCH_GREATER_EQUAL_1_6, reason="half support of core operations on not support before pytorch v1.6" + ) + def test_snr_half_cpu(self, preds, target, sk_metric, zero_mean): + pytest.xfail("SNR metric does not support cpu + half precision") + + @pytest.mark.skipif(not B.cuda.is_available(), reason="test requires cuda") + def test_snr_half_gpu(self, preds, target, sk_metric, zero_mean): + self.run_precision_test_gpu( + preds=preds, target=target, metric_module=SNR, metric_functional=snr, metric_args={"zero_mean": zero_mean} + ) + + +def test_error_on_different_shape(metric_class=SNR): + metric = metric_class() + with pytest.raises(RuntimeError, match="Predictions and targets are expected to have the same shape"): + metric(B.randn(100), B.randn(50)) diff --git a/RE/paddlemetric/src/tests/audio/test_stoi.py b/RE/paddlemetric/src/tests/audio/test_stoi.py new file mode 100644 index 00000000..70c7208b --- /dev/null +++ b/RE/paddlemetric/src/tests/audio/test_stoi.py @@ -0,0 +1,146 @@ +# Copyright The PyTorch Lightning team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from collections import namedtuple +from functools import partial + +import pytest +import paddleext.torchapi as B +from pystoi import stoi as stoi_backend +from paddleext.torchapi import Tensor + +from tests.helpers import seed_all +from tests.helpers.testers import MetricTester +from paddlemetrics.audio import STOI +from paddlemetrics.functional import stoi +from paddlemetrics.utilities.imports import _TORCH_GREATER_EQUAL_1_6 + +seed_all(42) + +Input = namedtuple("Input", ["preds", "target"]) + +inputs_8k = Input( + preds=B.rand(2, 3, 8000), + target=B.rand(2, 3, 8000), +) +inputs_16k = Input( + preds=B.rand(2, 3, 16000), + target=B.rand(2, 3, 16000), +) + + +def stoi_original_batch(preds: Tensor, target: Tensor, fs: int, extended: bool): + # shape: preds [BATCH_SIZE, Time] , target [BATCH_SIZE, Time] + # or shape: preds [NUM_BATCHES*BATCH_SIZE, Time] , target [NUM_BATCHES*BATCH_SIZE, Time] + target = target.detach().cpu().numpy() + preds = preds.detach().cpu().numpy() + mss = [] + for b in range(preds.shape[0]): + pesq_val = stoi_backend(target[b, ...], preds[b, ...], fs, extended) + mss.append(pesq_val) + return B.tensor(mss) + + +def average_metric(preds, target, metric_func): + # shape: preds [BATCH_SIZE, 1, Time] , target [BATCH_SIZE, 1, Time] + # or shape: preds [NUM_BATCHES*BATCH_SIZE, 1, Time] , target [NUM_BATCHES*BATCH_SIZE, 1, Time] + return metric_func(preds, target).mean() + + +stoi_original_batch_8k_ext = partial(stoi_original_batch, fs=8000, extended=True) +stoi_original_batch_16k_ext = partial(stoi_original_batch, fs=16000, extended=True) +stoi_original_batch_8k_noext = partial(stoi_original_batch, fs=8000, extended=False) +stoi_original_batch_16k_noext = partial(stoi_original_batch, fs=16000, extended=False) + + +@pytest.mark.parametrize( + "preds, target, sk_metric, fs, extended", + [ + (inputs_8k.preds, inputs_8k.target, stoi_original_batch_8k_ext, 8000, True), + (inputs_16k.preds, inputs_16k.target, stoi_original_batch_16k_ext, 16000, True), + (inputs_8k.preds, inputs_8k.target, stoi_original_batch_8k_noext, 8000, False), + (inputs_16k.preds, inputs_16k.target, stoi_original_batch_16k_noext, 16000, False), + ], +) +class TestSTOI(MetricTester): + atol = 1e-2 + + @pytest.mark.parametrize("ddp", [True, False]) + @pytest.mark.parametrize("dist_sync_on_step", [True, False]) + def test_stoi(self, preds, target, sk_metric, fs, extended, ddp, dist_sync_on_step): + self.run_class_metric_test( + ddp, + preds, + target, + STOI, + sk_metric=partial(average_metric, metric_func=sk_metric), + dist_sync_on_step=dist_sync_on_step, + metric_args=dict(fs=fs, extended=extended), + ) + + def test_stoi_functional(self, preds, target, sk_metric, fs, extended): + self.run_functional_metric_test( + preds, + target, + stoi, + sk_metric, + metric_args=dict(fs=fs, extended=extended), + ) + + def test_stoi_differentiability(self, preds, target, sk_metric, fs, extended): + self.run_differentiability_test( + preds=preds, + target=target, + metric_module=STOI, + metric_functional=stoi, + metric_args=dict(fs=fs, extended=extended), + ) + + @pytest.mark.skipif( + not _TORCH_GREATER_EQUAL_1_6, reason="half support of core operations on not support before pytorch v1.6" + ) + def test_stoi_half_cpu(self, preds, target, sk_metric, fs, extended): + pytest.xfail("STOI metric does not support cpu + half precision") + + @pytest.mark.skipif(not B.cuda.is_available(), reason="test requires cuda") + def test_stoi_half_gpu(self, preds, target, sk_metric, fs, extended): + self.run_precision_test_gpu( + preds=preds, + target=target, + metric_module=STOI, + metric_functional=partial(stoi, fs=fs, extended=extended), + metric_args=dict(fs=fs, extended=extended), + ) + + +def test_error_on_different_shape(metric_class=STOI): + metric = metric_class(16000) + with pytest.raises(RuntimeError, match="Predictions and targets are expected to have the same shape"): + metric(B.randn(100), B.randn(50)) + + +def test_on_real_audio(): + import os + + from scipy.io import wavfile + + current_file_dir = os.path.dirname(__file__) + + rate, ref = wavfile.read(os.path.join(current_file_dir, "examples/audio_speech.wav")) + rate, deg = wavfile.read(os.path.join(current_file_dir, "examples/audio_speech_bab_0dB.wav")) + assert B.allclose( + stoi(B.from_numpy(deg), B.from_numpy(ref), rate).float(), + B.tensor(0.6739177), + rtol=0.0001, + atol=1e-4, + ) diff --git a/RE/paddlemetric/src/tests/bases/__init__.py b/RE/paddlemetric/src/tests/bases/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/RE/paddlemetric/src/tests/bases/test.log b/RE/paddlemetric/src/tests/bases/test.log new file mode 100644 index 00000000..eb8e22c4 --- /dev/null +++ b/RE/paddlemetric/src/tests/bases/test.log @@ -0,0 +1,2764 @@ +============================= test session starts ============================== +platform darwin -- Python 3.8.12, pytest-7.1.2, pluggy-1.0.0 +rootdir: /Users/sun/Projects/oix/baidu/ccl/paddlemetric/src/tests/bases +plugins: hydra-core-1.1.0.dev5 +collected 86 items + +test_composition.py .....FFF.....FF.................FFFFFFFFFFFFFF.F....FFFFF.FFFFFFFFFFFFFFFFFF.F....FF.. + +=================================== FAILURES =================================== +_____________________ test_metrics_and[3-expected_result1] _____________________ + +second_operand = 3 +expected_result = Tensor(shape=[], dtype=int32, place=Place(cpu), stop_gradient=True, + 2) + + @pytest.mark.parametrize( + ["second_operand", "expected_result"], + [(DummyMetric(3), tensor(2)), (3, tensor(2)), (3, tensor(2)), (tensor(3), tensor(2))], + ) + @pytest.mark.skipif(**_MARK_TORCH_MIN_1_5) + def test_metrics_and(second_operand, expected_result): + first_metric = DummyMetric(2) + + final_and = first_metric & second_operand + final_rand = second_operand & first_metric + + assert isinstance(final_and, CompositionalMetric) + assert isinstance(final_rand, CompositionalMetric) + + final_and.update() + final_rand.update() +> assert B.allclose(expected_result, final_and.compute()) + +test_composition.py:83: +_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ +../../paddlemetrics/metric.py:378: in wrapped_func + self._computed = compute(*args, **kwargs) +../../paddlemetrics/metric.py:756: in compute + return self.op(val_a, val_b) +../../../../../../torch2paddle/paddleext/torchapi/core.py:91: in paddle_func + return func(*args, **kwargs) +/Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/tensor/logic.py:559: in bitwise_and + return _bitwise_op( +_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ + +op_name = 'bitwise_and' +x = Tensor(shape=[], dtype=int32, place=Place(cpu), stop_gradient=True, + 2) +y = 3, out = None, name = None, binary_op = True + + def _bitwise_op(op_name, x, y, out=None, name=None, binary_op=True): + if paddle.in_dynamic_mode(): + op = getattr(_C_ops, op_name) + if binary_op: +> return op(x, y) +E ValueError: (InvalidArgument) bitwise_and(): argument 'Y' (position 1) must be Tensor, but got int (at /Users/paddle/work/pengyuqi/Paddle/paddle/fluid/pybind/op_function_common.cc:737) + +/Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/tensor/logic.py:505: ValueError +_____________________ test_metrics_and[3-expected_result2] _____________________ + +second_operand = 3 +expected_result = Tensor(shape=[], dtype=int32, place=Place(cpu), stop_gradient=True, + 2) + + @pytest.mark.parametrize( + ["second_operand", "expected_result"], + [(DummyMetric(3), tensor(2)), (3, tensor(2)), (3, tensor(2)), (tensor(3), tensor(2))], + ) + @pytest.mark.skipif(**_MARK_TORCH_MIN_1_5) + def test_metrics_and(second_operand, expected_result): + first_metric = DummyMetric(2) + + final_and = first_metric & second_operand + final_rand = second_operand & first_metric + + assert isinstance(final_and, CompositionalMetric) + assert isinstance(final_rand, CompositionalMetric) + + final_and.update() + final_rand.update() +> assert B.allclose(expected_result, final_and.compute()) + +test_composition.py:83: +_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ +../../paddlemetrics/metric.py:378: in wrapped_func + self._computed = compute(*args, **kwargs) +../../paddlemetrics/metric.py:756: in compute + return self.op(val_a, val_b) +../../../../../../torch2paddle/paddleext/torchapi/core.py:91: in paddle_func + return func(*args, **kwargs) +/Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/tensor/logic.py:559: in bitwise_and + return _bitwise_op( +_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ + +op_name = 'bitwise_and' +x = Tensor(shape=[], dtype=int32, place=Place(cpu), stop_gradient=True, + 2) +y = 3, out = None, name = None, binary_op = True + + def _bitwise_op(op_name, x, y, out=None, name=None, binary_op=True): + if paddle.in_dynamic_mode(): + op = getattr(_C_ops, op_name) + if binary_op: +> return op(x, y) +E ValueError: (InvalidArgument) bitwise_and(): argument 'Y' (position 1) must be Tensor, but got int (at /Users/paddle/work/pengyuqi/Paddle/paddle/fluid/pybind/op_function_common.cc:737) + +/Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/tensor/logic.py:505: ValueError +______________ test_metrics_and[second_operand3-expected_result3] ______________ + +second_operand = Tensor(shape=[], dtype=int32, place=Place(cpu), stop_gradient=True, + 3) +expected_result = Tensor(shape=[], dtype=int32, place=Place(cpu), stop_gradient=True, + 2) + + @pytest.mark.parametrize( + ["second_operand", "expected_result"], + [(DummyMetric(3), tensor(2)), (3, tensor(2)), (3, tensor(2)), (tensor(3), tensor(2))], + ) + @pytest.mark.skipif(**_MARK_TORCH_MIN_1_5) + def test_metrics_and(second_operand, expected_result): + first_metric = DummyMetric(2) + + final_and = first_metric & second_operand +> final_rand = second_operand & first_metric + +test_composition.py:76: +_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ +../../../../../../torch2paddle/paddleext/torchapi/tensor_.py:361: in __and__ + return paddle.logical_or(self.bool(), other.bool()) +_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ + +self = DummyMetric(), name = 'bool' + + def __getattr__(self, name): + if '_parameters' in self.__dict__: + _parameters = self.__dict__['_parameters'] + if name in self._parameters: + if in_declarative_mode(): + return _convert_into_variable(self._parameters[name]) + return self._parameters[name] + if '_sub_layers' in self.__dict__: + _sub_layers = self.__dict__['_sub_layers'] + if name in self._sub_layers: + return self._sub_layers[name] + if '_buffers' in self.__dict__: + _buffers = self.__dict__['_buffers'] + if name in _buffers: + if in_declarative_mode(): + return _convert_into_variable(_buffers[name]) + return _buffers[name] +> return object.__getattribute__(self, name) +E AttributeError: 'DummyMetric' object has no attribute 'bool' + +/Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/dygraph/layers.py:1123: AttributeError +__________________ test_metrics_floordiv[2-expected_result1] ___________________ + +second_operand = 2 +expected_result = Tensor(shape=[], dtype=int32, place=Place(cpu), stop_gradient=True, + 2) + + @pytest.mark.parametrize( + ["second_operand", "expected_result"], + [ + (DummyMetric(2), tensor(2)), + (2, tensor(2)), + (2.0, tensor(2.0)), + (tensor(2), tensor(2)), + ], + ) + @pytest.mark.skipif(**_MARK_TORCH_MIN_1_5) + def test_metrics_floordiv(second_operand, expected_result): + first_metric = DummyMetric(5) + + final_floordiv = first_metric // second_operand + + assert isinstance(final_floordiv, CompositionalMetric) + + final_floordiv.update() +> assert B.allclose(expected_result, final_floordiv.compute()) + +test_composition.py:126: +_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ +../../paddlemetrics/metric.py:378: in wrapped_func + self._computed = compute(*args, **kwargs) +../../paddlemetrics/metric.py:756: in compute + return self.op(val_a, val_b) +../../../../../../torch2paddle/paddleext/torchapi/core.py:91: in paddle_func + return func(*args, **kwargs) +/Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/tensor/math.py:460: in floor_divide + return _elementwise_op_in_dygraph( +/Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/decorator.py:232: in fun + return caller(func, *(extras + args), **kw) +/Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/wrapped_decorator.py:25: in __impl__ + return wrapped_func(*args, **kwargs) +/Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/framework.py:434: in __impl__ + return func(*args, **kwargs) +_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ + +x = Tensor(shape=[], dtype=int32, place=Place(cpu), stop_gradient=True, + 5) +y = 2, axis = -1, act = None, use_mkldnn = False +op_name = 'elementwise_floordiv' + + @dygraph_only + def _elementwise_op_in_dygraph(x, + y, + axis=-1, + act=None, + use_mkldnn=False, + op_name=None): + def is_inplace(op_name): + return op_name[-1] == "_" + + if op_name not in OP_NAMEMAPPING.keys(): + op = getattr(_C_ops, op_name) + out = op(x, y, 'axis', axis, 'use_mkldnn', use_mkldnn) + else: + if in_dygraph_mode(): + op = getattr(_C_ops, OP_NAMEMAPPING[op_name] if not is_inplace(op_name) else op_name) + out = op(x, y) + + if _in_legacy_dygraph(): + op = getattr(_C_ops, op_name) +> out = op(x, y, 'axis', axis, 'use_mkldnn', use_mkldnn) +E ValueError: (InvalidArgument) elementwise_floordiv(): argument 'Y' (position 1) must be Tensor, but got int (at /Users/paddle/work/pengyuqi/Paddle/paddle/fluid/pybind/op_function_common.cc:737) + +/Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/tensor/math.py:218: ValueError +_________________ test_metrics_floordiv[2.0-expected_result2] __________________ + +second_operand = 2.0 +expected_result = Tensor(shape=[], dtype=float32, place=Place(cpu), stop_gradient=True, + 2.) + + @pytest.mark.parametrize( + ["second_operand", "expected_result"], + [ + (DummyMetric(2), tensor(2)), + (2, tensor(2)), + (2.0, tensor(2.0)), + (tensor(2), tensor(2)), + ], + ) + @pytest.mark.skipif(**_MARK_TORCH_MIN_1_5) + def test_metrics_floordiv(second_operand, expected_result): + first_metric = DummyMetric(5) + + final_floordiv = first_metric // second_operand + + assert isinstance(final_floordiv, CompositionalMetric) + + final_floordiv.update() +> assert B.allclose(expected_result, final_floordiv.compute()) + +test_composition.py:126: +_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ +../../paddlemetrics/metric.py:378: in wrapped_func + self._computed = compute(*args, **kwargs) +../../paddlemetrics/metric.py:756: in compute + return self.op(val_a, val_b) +../../../../../../torch2paddle/paddleext/torchapi/core.py:91: in paddle_func + return func(*args, **kwargs) +/Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/tensor/math.py:460: in floor_divide + return _elementwise_op_in_dygraph( +/Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/decorator.py:232: in fun + return caller(func, *(extras + args), **kw) +/Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/wrapped_decorator.py:25: in __impl__ + return wrapped_func(*args, **kwargs) +/Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/framework.py:434: in __impl__ + return func(*args, **kwargs) +_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ + +x = Tensor(shape=[], dtype=int32, place=Place(cpu), stop_gradient=True, + 5) +y = 2.0, axis = -1, act = None, use_mkldnn = False +op_name = 'elementwise_floordiv' + + @dygraph_only + def _elementwise_op_in_dygraph(x, + y, + axis=-1, + act=None, + use_mkldnn=False, + op_name=None): + def is_inplace(op_name): + return op_name[-1] == "_" + + if op_name not in OP_NAMEMAPPING.keys(): + op = getattr(_C_ops, op_name) + out = op(x, y, 'axis', axis, 'use_mkldnn', use_mkldnn) + else: + if in_dygraph_mode(): + op = getattr(_C_ops, OP_NAMEMAPPING[op_name] if not is_inplace(op_name) else op_name) + out = op(x, y) + + if _in_legacy_dygraph(): + op = getattr(_C_ops, op_name) +> out = op(x, y, 'axis', axis, 'use_mkldnn', use_mkldnn) +E ValueError: (InvalidArgument) elementwise_floordiv(): argument 'Y' (position 1) must be Tensor, but got float (at /Users/paddle/work/pengyuqi/Paddle/paddle/fluid/pybind/op_function_common.cc:737) + +/Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/tensor/math.py:218: ValueError +____________ test_metrics_matmul[second_operand0-expected_result0] _____________ + +second_operand = DummyMetric() +expected_result = Tensor(shape=[], dtype=int32, place=Place(cpu), stop_gradient=True, + 12) + + @pytest.mark.parametrize( + ["second_operand", "expected_result"], + [(DummyMetric([2, 2, 2]), tensor(12)), (tensor([2, 2, 2]), tensor(12))], + ) + def test_metrics_matmul(second_operand, expected_result): + first_metric = DummyMetric([2, 2, 2]) + + final_matmul = first_metric @ second_operand + + assert isinstance(final_matmul, CompositionalMetric) + + final_matmul.update() +> assert B.allclose(expected_result, final_matmul.compute()) + +test_composition.py:225: +_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ +../../paddlemetrics/metric.py:378: in wrapped_func + self._computed = compute(*args, **kwargs) +../../paddlemetrics/metric.py:756: in compute + return self.op(val_a, val_b) +../../../../../../torch2paddle/paddleext/torchapi/core.py:91: in paddle_func + return func(*args, **kwargs) +_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ + +x = Tensor(shape=[3], dtype=int64, place=Place(cpu), stop_gradient=True, + [2, 2, 2]) +y = Tensor(shape=[3], dtype=int64, place=Place(cpu), stop_gradient=True, + [2, 2, 2]) +transpose_x = False, transpose_y = False, name = None + + def matmul(x, y, transpose_x=False, transpose_y=False, name=None): + """ + Applies matrix multiplication to two tensors. `matmul` follows + the complete broadcast rules, + and its behavior is consistent with `np.matmul`. + + Currently, the input tensors' number of dimensions can be any, `matmul` can be used to + achieve the `dot`, `matmul` and `batchmatmul`. + + The actual behavior depends on the shapes of :math:`x`, :math:`y` and the + flag values of :attr:`transpose_x`, :attr:`transpose_y`. Specifically: + + - If a transpose flag is specified, the last two dimensions of the tensor + are transposed. If the tensor is ndim-1 of shape, the transpose is invalid. If the tensor + is ndim-1 of shape :math:`[D]`, then for :math:`x` it is treated as :math:`[1, D]`, whereas + for :math:`y` it is the opposite: It is treated as :math:`[D, 1]`. + + The multiplication behavior depends on the dimensions of `x` and `y`. Specifically: + + - If both tensors are 1-dimensional, the dot product result is obtained. + + - If both tensors are 2-dimensional, the matrix-matrix product is obtained. + + - If the `x` is 1-dimensional and the `y` is 2-dimensional, + a `1` is prepended to its dimension in order to conduct the matrix multiply. + After the matrix multiply, the prepended dimension is removed. + + - If the `x` is 2-dimensional and `y` is 1-dimensional, + the matrix-vector product is obtained. + + - If both arguments are at least 1-dimensional and at least one argument + is N-dimensional (where N > 2), then a batched matrix multiply is obtained. + If the first argument is 1-dimensional, a 1 is prepended to its dimension + in order to conduct the batched matrix multiply and removed after. + If the second argument is 1-dimensional, a 1 is appended to its + dimension for the purpose of the batched matrix multiple and removed after. + The non-matrix (exclude the last two dimensions) dimensions are + broadcasted according the broadcast rule. + For example, if input is a (j, 1, n, m) tensor and the other is a (k, m, p) tensor, + out will be a (j, k, n, p) tensor. + + Args: + x (Tensor): The input tensor which is a Tensor. + y (Tensor): The input tensor which is a Tensor. + transpose_x (bool): Whether to transpose :math:`x` before multiplication. + transpose_y (bool): Whether to transpose :math:`y` before multiplication. + name(str|None): A name for this layer(optional). If set None, the layer + will be named automatically. + + Returns: + Tensor: The output Tensor. + + Examples: + + .. code-block:: python + + import paddle + import numpy as np + + # vector * vector + x_data = np.random.random([10]).astype(np.float32) + y_data = np.random.random([10]).astype(np.float32) + x = paddle.to_tensor(x_data) + y = paddle.to_tensor(y_data) + z = paddle.matmul(x, y) + print(z.numpy().shape) + # [1] + + # matrix * vector + x_data = np.random.random([10, 5]).astype(np.float32) + y_data = np.random.random([5]).astype(np.float32) + x = paddle.to_tensor(x_data) + y = paddle.to_tensor(y_data) + z = paddle.matmul(x, y) + print(z.numpy().shape) + # [10] + + # batched matrix * broadcasted vector + x_data = np.random.random([10, 5, 2]).astype(np.float32) + y_data = np.random.random([2]).astype(np.float32) + x = paddle.to_tensor(x_data) + y = paddle.to_tensor(y_data) + z = paddle.matmul(x, y) + print(z.numpy().shape) + # [10, 5] + + # batched matrix * batched matrix + x_data = np.random.random([10, 5, 2]).astype(np.float32) + y_data = np.random.random([10, 2, 5]).astype(np.float32) + x = paddle.to_tensor(x_data) + y = paddle.to_tensor(y_data) + z = paddle.matmul(x, y) + print(z.numpy().shape) + # [10, 5, 5] + + # batched matrix * broadcasted matrix + x_data = np.random.random([10, 1, 5, 2]).astype(np.float32) + y_data = np.random.random([1, 3, 2, 5]).astype(np.float32) + x = paddle.to_tensor(x_data) + y = paddle.to_tensor(y_data) + z = paddle.matmul(x, y) + print(z.numpy().shape) + # [10, 3, 5, 5] + + """ + if in_dygraph_mode(): + return _C_ops.final_state_matmul(x, y, transpose_x, transpose_y) + + if _in_legacy_dygraph(): + op_type = 'matmul_v2' + op = getattr(_C_ops, op_type) +> return op(x, y, 'trans_x', transpose_x, 'trans_y', transpose_y) +E RuntimeError: (NotFound) There are no kernels which are registered in the matmul_v2 operator. +E [Hint: Expected kernels_iter != all_op_kernels.end(), but received kernels_iter == all_op_kernels.end().] (at /Users/paddle/work/pengyuqi/Paddle/paddle/fluid/imperative/prepared_operator.cc:327) +E [operator < matmul_v2 > error] + +/Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/tensor/linalg.py:145: RuntimeError +____________ test_metrics_matmul[second_operand1-expected_result1] _____________ + +second_operand = Tensor(shape=[3], dtype=int64, place=Place(cpu), stop_gradient=True, + [2, 2, 2]) +expected_result = Tensor(shape=[], dtype=int32, place=Place(cpu), stop_gradient=True, + 12) + + @pytest.mark.parametrize( + ["second_operand", "expected_result"], + [(DummyMetric([2, 2, 2]), tensor(12)), (tensor([2, 2, 2]), tensor(12))], + ) + def test_metrics_matmul(second_operand, expected_result): + first_metric = DummyMetric([2, 2, 2]) + + final_matmul = first_metric @ second_operand + + assert isinstance(final_matmul, CompositionalMetric) + + final_matmul.update() +> assert B.allclose(expected_result, final_matmul.compute()) + +test_composition.py:225: +_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ +../../paddlemetrics/metric.py:378: in wrapped_func + self._computed = compute(*args, **kwargs) +../../paddlemetrics/metric.py:756: in compute + return self.op(val_a, val_b) +../../../../../../torch2paddle/paddleext/torchapi/core.py:91: in paddle_func + return func(*args, **kwargs) +_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ + +x = Tensor(shape=[3], dtype=int64, place=Place(cpu), stop_gradient=True, + [2, 2, 2]) +y = Tensor(shape=[3], dtype=int64, place=Place(cpu), stop_gradient=True, + [2, 2, 2]) +transpose_x = False, transpose_y = False, name = None + + def matmul(x, y, transpose_x=False, transpose_y=False, name=None): + """ + Applies matrix multiplication to two tensors. `matmul` follows + the complete broadcast rules, + and its behavior is consistent with `np.matmul`. + + Currently, the input tensors' number of dimensions can be any, `matmul` can be used to + achieve the `dot`, `matmul` and `batchmatmul`. + + The actual behavior depends on the shapes of :math:`x`, :math:`y` and the + flag values of :attr:`transpose_x`, :attr:`transpose_y`. Specifically: + + - If a transpose flag is specified, the last two dimensions of the tensor + are transposed. If the tensor is ndim-1 of shape, the transpose is invalid. If the tensor + is ndim-1 of shape :math:`[D]`, then for :math:`x` it is treated as :math:`[1, D]`, whereas + for :math:`y` it is the opposite: It is treated as :math:`[D, 1]`. + + The multiplication behavior depends on the dimensions of `x` and `y`. Specifically: + + - If both tensors are 1-dimensional, the dot product result is obtained. + + - If both tensors are 2-dimensional, the matrix-matrix product is obtained. + + - If the `x` is 1-dimensional and the `y` is 2-dimensional, + a `1` is prepended to its dimension in order to conduct the matrix multiply. + After the matrix multiply, the prepended dimension is removed. + + - If the `x` is 2-dimensional and `y` is 1-dimensional, + the matrix-vector product is obtained. + + - If both arguments are at least 1-dimensional and at least one argument + is N-dimensional (where N > 2), then a batched matrix multiply is obtained. + If the first argument is 1-dimensional, a 1 is prepended to its dimension + in order to conduct the batched matrix multiply and removed after. + If the second argument is 1-dimensional, a 1 is appended to its + dimension for the purpose of the batched matrix multiple and removed after. + The non-matrix (exclude the last two dimensions) dimensions are + broadcasted according the broadcast rule. + For example, if input is a (j, 1, n, m) tensor and the other is a (k, m, p) tensor, + out will be a (j, k, n, p) tensor. + + Args: + x (Tensor): The input tensor which is a Tensor. + y (Tensor): The input tensor which is a Tensor. + transpose_x (bool): Whether to transpose :math:`x` before multiplication. + transpose_y (bool): Whether to transpose :math:`y` before multiplication. + name(str|None): A name for this layer(optional). If set None, the layer + will be named automatically. + + Returns: + Tensor: The output Tensor. + + Examples: + + .. code-block:: python + + import paddle + import numpy as np + + # vector * vector + x_data = np.random.random([10]).astype(np.float32) + y_data = np.random.random([10]).astype(np.float32) + x = paddle.to_tensor(x_data) + y = paddle.to_tensor(y_data) + z = paddle.matmul(x, y) + print(z.numpy().shape) + # [1] + + # matrix * vector + x_data = np.random.random([10, 5]).astype(np.float32) + y_data = np.random.random([5]).astype(np.float32) + x = paddle.to_tensor(x_data) + y = paddle.to_tensor(y_data) + z = paddle.matmul(x, y) + print(z.numpy().shape) + # [10] + + # batched matrix * broadcasted vector + x_data = np.random.random([10, 5, 2]).astype(np.float32) + y_data = np.random.random([2]).astype(np.float32) + x = paddle.to_tensor(x_data) + y = paddle.to_tensor(y_data) + z = paddle.matmul(x, y) + print(z.numpy().shape) + # [10, 5] + + # batched matrix * batched matrix + x_data = np.random.random([10, 5, 2]).astype(np.float32) + y_data = np.random.random([10, 2, 5]).astype(np.float32) + x = paddle.to_tensor(x_data) + y = paddle.to_tensor(y_data) + z = paddle.matmul(x, y) + print(z.numpy().shape) + # [10, 5, 5] + + # batched matrix * broadcasted matrix + x_data = np.random.random([10, 1, 5, 2]).astype(np.float32) + y_data = np.random.random([1, 3, 2, 5]).astype(np.float32) + x = paddle.to_tensor(x_data) + y = paddle.to_tensor(y_data) + z = paddle.matmul(x, y) + print(z.numpy().shape) + # [10, 3, 5, 5] + + """ + if in_dygraph_mode(): + return _C_ops.final_state_matmul(x, y, transpose_x, transpose_y) + + if _in_legacy_dygraph(): + op_type = 'matmul_v2' + op = getattr(_C_ops, op_type) +> return op(x, y, 'trans_x', transpose_x, 'trans_y', transpose_y) +E RuntimeError: (NotFound) There are no kernels which are registered in the matmul_v2 operator. +E [Hint: Expected kernels_iter != all_op_kernels.end(), but received kernels_iter == all_op_kernels.end().] (at /Users/paddle/work/pengyuqi/Paddle/paddle/fluid/imperative/prepared_operator.cc:327) +E [operator < matmul_v2 > error] + +/Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/tensor/linalg.py:145: RuntimeError +______________ test_metrics_mod[second_operand0-expected_result0] ______________ + +second_operand = DummyMetric() +expected_result = Tensor(shape=[], dtype=int32, place=Place(cpu), stop_gradient=True, + 1) + + @pytest.mark.parametrize( + ["second_operand", "expected_result"], + [ + (DummyMetric(2), tensor(1)), + (2, tensor(1)), + (2.0, tensor(1)), + (tensor(2), tensor(1)), + ], + ) + def test_metrics_mod(second_operand, expected_result): + first_metric = DummyMetric(5) + +> final_mod = first_metric % second_operand + +test_composition.py:240: +_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ +../../paddlemetrics/metric.py:616: in __mod__ + return CompositionalMetric(B.fmod, self, other) +_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ + +self = , args = ('fmod',) +kwargs = {} + + def __getattribute__(self, *args, **kwargs): + # Perform custom logic here + +> obj = object.__getattribute__(this_module, *args, **kwargs) +E AttributeError: 'module' object has no attribute 'fmod' + +../../../../../../torch2paddle/paddleext/torchapi/__init__.py:20: AttributeError +_____________________ test_metrics_mod[2-expected_result1] _____________________ + +second_operand = 2 +expected_result = Tensor(shape=[], dtype=int32, place=Place(cpu), stop_gradient=True, + 1) + + @pytest.mark.parametrize( + ["second_operand", "expected_result"], + [ + (DummyMetric(2), tensor(1)), + (2, tensor(1)), + (2.0, tensor(1)), + (tensor(2), tensor(1)), + ], + ) + def test_metrics_mod(second_operand, expected_result): + first_metric = DummyMetric(5) + +> final_mod = first_metric % second_operand + +test_composition.py:240: +_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ +../../paddlemetrics/metric.py:616: in __mod__ + return CompositionalMetric(B.fmod, self, other) +_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ + +self = , args = ('fmod',) +kwargs = {} + + def __getattribute__(self, *args, **kwargs): + # Perform custom logic here + +> obj = object.__getattribute__(this_module, *args, **kwargs) +E AttributeError: 'module' object has no attribute 'fmod' + +../../../../../../torch2paddle/paddleext/torchapi/__init__.py:20: AttributeError +____________________ test_metrics_mod[2.0-expected_result2] ____________________ + +second_operand = 2.0 +expected_result = Tensor(shape=[], dtype=int32, place=Place(cpu), stop_gradient=True, + 1) + + @pytest.mark.parametrize( + ["second_operand", "expected_result"], + [ + (DummyMetric(2), tensor(1)), + (2, tensor(1)), + (2.0, tensor(1)), + (tensor(2), tensor(1)), + ], + ) + def test_metrics_mod(second_operand, expected_result): + first_metric = DummyMetric(5) + +> final_mod = first_metric % second_operand + +test_composition.py:240: +_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ +../../paddlemetrics/metric.py:616: in __mod__ + return CompositionalMetric(B.fmod, self, other) +_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ + +self = , args = ('fmod',) +kwargs = {} + + def __getattribute__(self, *args, **kwargs): + # Perform custom logic here + +> obj = object.__getattribute__(this_module, *args, **kwargs) +E AttributeError: 'module' object has no attribute 'fmod' + +../../../../../../torch2paddle/paddleext/torchapi/__init__.py:20: AttributeError +______________ test_metrics_mod[second_operand3-expected_result3] ______________ + +second_operand = Tensor(shape=[], dtype=int32, place=Place(cpu), stop_gradient=True, + 2) +expected_result = Tensor(shape=[], dtype=int32, place=Place(cpu), stop_gradient=True, + 1) + + @pytest.mark.parametrize( + ["second_operand", "expected_result"], + [ + (DummyMetric(2), tensor(1)), + (2, tensor(1)), + (2.0, tensor(1)), + (tensor(2), tensor(1)), + ], + ) + def test_metrics_mod(second_operand, expected_result): + first_metric = DummyMetric(5) + +> final_mod = first_metric % second_operand + +test_composition.py:240: +_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ +../../paddlemetrics/metric.py:616: in __mod__ + return CompositionalMetric(B.fmod, self, other) +_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ + +self = , args = ('fmod',) +kwargs = {} + + def __getattribute__(self, *args, **kwargs): + # Perform custom logic here + +> obj = object.__getattribute__(this_module, *args, **kwargs) +E AttributeError: 'module' object has no attribute 'fmod' + +../../../../../../torch2paddle/paddleext/torchapi/__init__.py:20: AttributeError +______________ test_metrics_mul[second_operand0-expected_result0] ______________ + +second_operand = DummyMetric() +expected_result = Tensor(shape=[], dtype=int32, place=Place(cpu), stop_gradient=True, + 4) + + @pytest.mark.parametrize( + ["second_operand", "expected_result"], + [ + (DummyMetric(2), tensor(4)), + (2, tensor(4)), + (2.0, tensor(4.0)), + pytest.param(tensor(2), tensor(4), marks=pytest.mark.skipif(**_MARK_TORCH_MIN_1_4)), + ], + ) + def test_metrics_mul(second_operand, expected_result): + first_metric = DummyMetric(2) + +> final_mul = first_metric * second_operand + +test_composition.py:261: +_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ +../../paddlemetrics/metric.py:619: in __mul__ + return CompositionalMetric(B.mul, self, other) +_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ + +self = , args = ('mul',) +kwargs = {} + + def __getattribute__(self, *args, **kwargs): + # Perform custom logic here + +> obj = object.__getattribute__(this_module, *args, **kwargs) +E AttributeError: 'module' object has no attribute 'mul' + +../../../../../../torch2paddle/paddleext/torchapi/__init__.py:20: AttributeError +_____________________ test_metrics_mul[2-expected_result1] _____________________ + +second_operand = 2 +expected_result = Tensor(shape=[], dtype=int32, place=Place(cpu), stop_gradient=True, + 4) + + @pytest.mark.parametrize( + ["second_operand", "expected_result"], + [ + (DummyMetric(2), tensor(4)), + (2, tensor(4)), + (2.0, tensor(4.0)), + pytest.param(tensor(2), tensor(4), marks=pytest.mark.skipif(**_MARK_TORCH_MIN_1_4)), + ], + ) + def test_metrics_mul(second_operand, expected_result): + first_metric = DummyMetric(2) + +> final_mul = first_metric * second_operand + +test_composition.py:261: +_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ +../../paddlemetrics/metric.py:619: in __mul__ + return CompositionalMetric(B.mul, self, other) +_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ + +self = , args = ('mul',) +kwargs = {} + + def __getattribute__(self, *args, **kwargs): + # Perform custom logic here + +> obj = object.__getattribute__(this_module, *args, **kwargs) +E AttributeError: 'module' object has no attribute 'mul' + +../../../../../../torch2paddle/paddleext/torchapi/__init__.py:20: AttributeError +____________________ test_metrics_mul[2.0-expected_result2] ____________________ + +second_operand = 2.0 +expected_result = Tensor(shape=[], dtype=float32, place=Place(cpu), stop_gradient=True, + 4.) + + @pytest.mark.parametrize( + ["second_operand", "expected_result"], + [ + (DummyMetric(2), tensor(4)), + (2, tensor(4)), + (2.0, tensor(4.0)), + pytest.param(tensor(2), tensor(4), marks=pytest.mark.skipif(**_MARK_TORCH_MIN_1_4)), + ], + ) + def test_metrics_mul(second_operand, expected_result): + first_metric = DummyMetric(2) + +> final_mul = first_metric * second_operand + +test_composition.py:261: +_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ +../../paddlemetrics/metric.py:619: in __mul__ + return CompositionalMetric(B.mul, self, other) +_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ + +self = , args = ('mul',) +kwargs = {} + + def __getattribute__(self, *args, **kwargs): + # Perform custom logic here + +> obj = object.__getattribute__(this_module, *args, **kwargs) +E AttributeError: 'module' object has no attribute 'mul' + +../../../../../../torch2paddle/paddleext/torchapi/__init__.py:20: AttributeError +______________ test_metrics_mul[second_operand3-expected_result3] ______________ + +second_operand = Tensor(shape=[], dtype=int32, place=Place(cpu), stop_gradient=True, + 2) +expected_result = Tensor(shape=[], dtype=int32, place=Place(cpu), stop_gradient=True, + 4) + + @pytest.mark.parametrize( + ["second_operand", "expected_result"], + [ + (DummyMetric(2), tensor(4)), + (2, tensor(4)), + (2.0, tensor(4.0)), + pytest.param(tensor(2), tensor(4), marks=pytest.mark.skipif(**_MARK_TORCH_MIN_1_4)), + ], + ) + def test_metrics_mul(second_operand, expected_result): + first_metric = DummyMetric(2) + +> final_mul = first_metric * second_operand + +test_composition.py:261: +_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ +../../paddlemetrics/metric.py:619: in __mul__ + return CompositionalMetric(B.mul, self, other) +_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ + +self = , args = ('mul',) +kwargs = {} + + def __getattribute__(self, *args, **kwargs): + # Perform custom logic here + +> obj = object.__getattribute__(this_module, *args, **kwargs) +E AttributeError: 'module' object has no attribute 'mul' + +../../../../../../torch2paddle/paddleext/torchapi/__init__.py:20: AttributeError +______________ test_metrics_ne[second_operand0-expected_result0] _______________ + +second_operand = DummyMetric() +expected_result = Tensor(shape=[], dtype=bool, place=Place(cpu), stop_gradient=True, + False) + + @pytest.mark.parametrize( + ["second_operand", "expected_result"], + [ + (DummyMetric(2), tensor(False)), + (2, tensor(False)), + (2.0, tensor(False)), + (tensor(2), tensor(False)), + ], + ) + def test_metrics_ne(second_operand, expected_result): + first_metric = DummyMetric(2) + +> final_ne = first_metric != second_operand + +test_composition.py:285: +_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ +../../paddlemetrics/metric.py:623: in __ne__ + return CompositionalMetric(B.ne, self, other) +_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ + +self = , args = ('ne',) +kwargs = {} + + def __getattribute__(self, *args, **kwargs): + # Perform custom logic here + +> obj = object.__getattribute__(this_module, *args, **kwargs) +E AttributeError: 'module' object has no attribute 'ne' + +../../../../../../torch2paddle/paddleext/torchapi/__init__.py:20: AttributeError +_____________________ test_metrics_ne[2-expected_result1] ______________________ + +second_operand = 2 +expected_result = Tensor(shape=[], dtype=bool, place=Place(cpu), stop_gradient=True, + False) + + @pytest.mark.parametrize( + ["second_operand", "expected_result"], + [ + (DummyMetric(2), tensor(False)), + (2, tensor(False)), + (2.0, tensor(False)), + (tensor(2), tensor(False)), + ], + ) + def test_metrics_ne(second_operand, expected_result): + first_metric = DummyMetric(2) + +> final_ne = first_metric != second_operand + +test_composition.py:285: +_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ +../../paddlemetrics/metric.py:623: in __ne__ + return CompositionalMetric(B.ne, self, other) +_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ + +self = , args = ('ne',) +kwargs = {} + + def __getattribute__(self, *args, **kwargs): + # Perform custom logic here + +> obj = object.__getattribute__(this_module, *args, **kwargs) +E AttributeError: 'module' object has no attribute 'ne' + +../../../../../../torch2paddle/paddleext/torchapi/__init__.py:20: AttributeError +____________________ test_metrics_ne[2.0-expected_result2] _____________________ + +second_operand = 2.0 +expected_result = Tensor(shape=[], dtype=bool, place=Place(cpu), stop_gradient=True, + False) + + @pytest.mark.parametrize( + ["second_operand", "expected_result"], + [ + (DummyMetric(2), tensor(False)), + (2, tensor(False)), + (2.0, tensor(False)), + (tensor(2), tensor(False)), + ], + ) + def test_metrics_ne(second_operand, expected_result): + first_metric = DummyMetric(2) + +> final_ne = first_metric != second_operand + +test_composition.py:285: +_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ +../../paddlemetrics/metric.py:623: in __ne__ + return CompositionalMetric(B.ne, self, other) +_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ + +self = , args = ('ne',) +kwargs = {} + + def __getattribute__(self, *args, **kwargs): + # Perform custom logic here + +> obj = object.__getattribute__(this_module, *args, **kwargs) +E AttributeError: 'module' object has no attribute 'ne' + +../../../../../../torch2paddle/paddleext/torchapi/__init__.py:20: AttributeError +______________ test_metrics_ne[second_operand3-expected_result3] _______________ + +second_operand = Tensor(shape=[], dtype=int32, place=Place(cpu), stop_gradient=True, + 2) +expected_result = Tensor(shape=[], dtype=bool, place=Place(cpu), stop_gradient=True, + False) + + @pytest.mark.parametrize( + ["second_operand", "expected_result"], + [ + (DummyMetric(2), tensor(False)), + (2, tensor(False)), + (2.0, tensor(False)), + (tensor(2), tensor(False)), + ], + ) + def test_metrics_ne(second_operand, expected_result): + first_metric = DummyMetric(2) + +> final_ne = first_metric != second_operand + +test_composition.py:285: +_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ +../../paddlemetrics/metric.py:623: in __ne__ + return CompositionalMetric(B.ne, self, other) +_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ + +self = , args = ('ne',) +kwargs = {} + + def __getattribute__(self, *args, **kwargs): + # Perform custom logic here + +> obj = object.__getattribute__(this_module, *args, **kwargs) +E AttributeError: 'module' object has no attribute 'ne' + +../../../../../../torch2paddle/paddleext/torchapi/__init__.py:20: AttributeError +______________ test_metrics_or[second_operand1-expected_result1] _______________ + +second_operand = Tensor(shape=[3], dtype=int64, place=Place(cpu), stop_gradient=True, + [1, 0, 3]) +expected_result = Tensor(shape=[3], dtype=int64, place=Place(cpu), stop_gradient=True, + [-1, -2, 3]) + + @pytest.mark.parametrize( + ["second_operand", "expected_result"], + [(DummyMetric([1, 0, 3]), tensor([-1, -2, 3])), (tensor([1, 0, 3]), tensor([-1, -2, 3]))], + ) + @pytest.mark.skipif(**_MARK_TORCH_MIN_1_5) + def test_metrics_or(second_operand, expected_result): + first_metric = DummyMetric([-1, -2, 3]) + + final_or = first_metric | second_operand +> final_ror = second_operand | first_metric + +test_composition.py:303: +_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ +../../../../../../torch2paddle/paddleext/torchapi/tensor_.py:357: in __or__ + return paddle.logical_or(self.bool(), other.bool()) +_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ + +self = DummyMetric(), name = 'bool' + + def __getattr__(self, name): + if '_parameters' in self.__dict__: + _parameters = self.__dict__['_parameters'] + if name in self._parameters: + if in_declarative_mode(): + return _convert_into_variable(self._parameters[name]) + return self._parameters[name] + if '_sub_layers' in self.__dict__: + _sub_layers = self.__dict__['_sub_layers'] + if name in self._sub_layers: + return self._sub_layers[name] + if '_buffers' in self.__dict__: + _buffers = self.__dict__['_buffers'] + if name in _buffers: + if in_declarative_mode(): + return _convert_into_variable(_buffers[name]) + return _buffers[name] +> return object.__getattribute__(self, name) +E AttributeError: 'DummyMetric' object has no attribute 'bool' + +/Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/dygraph/layers.py:1123: AttributeError +__________________ test_metrics_rfloordiv[5-expected_result0] __________________ + +first_operand = 5 +expected_result = Tensor(shape=[], dtype=int32, place=Place(cpu), stop_gradient=True, + 2) + + @pytest.mark.parametrize( + ["first_operand", "expected_result"], + [(5, tensor(2)), (5.0, tensor(2.0)), (tensor(5), tensor(2))], + ) + @pytest.mark.skipif(**_MARK_TORCH_MIN_1_5) + def test_metrics_rfloordiv(first_operand, expected_result): + second_operand = DummyMetric(2) + + final_rfloordiv = first_operand // second_operand + + assert isinstance(final_rfloordiv, CompositionalMetric) + + final_rfloordiv.update() +> assert B.allclose(expected_result, final_rfloordiv.compute()) + +test_composition.py:347: +_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ +../../paddlemetrics/metric.py:378: in wrapped_func + self._computed = compute(*args, **kwargs) +../../paddlemetrics/metric.py:756: in compute + return self.op(val_a, val_b) +../../../../../../torch2paddle/paddleext/torchapi/core.py:91: in paddle_func + return func(*args, **kwargs) +/Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/tensor/math.py:460: in floor_divide + return _elementwise_op_in_dygraph( +/Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/decorator.py:232: in fun + return caller(func, *(extras + args), **kw) +/Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/wrapped_decorator.py:25: in __impl__ + return wrapped_func(*args, **kwargs) +/Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/framework.py:434: in __impl__ + return func(*args, **kwargs) +_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ + +x = 5 +y = Tensor(shape=[], dtype=int32, place=Place(cpu), stop_gradient=True, + 2) +axis = -1, act = None, use_mkldnn = False, op_name = 'elementwise_floordiv' + + @dygraph_only + def _elementwise_op_in_dygraph(x, + y, + axis=-1, + act=None, + use_mkldnn=False, + op_name=None): + def is_inplace(op_name): + return op_name[-1] == "_" + + if op_name not in OP_NAMEMAPPING.keys(): + op = getattr(_C_ops, op_name) + out = op(x, y, 'axis', axis, 'use_mkldnn', use_mkldnn) + else: + if in_dygraph_mode(): + op = getattr(_C_ops, OP_NAMEMAPPING[op_name] if not is_inplace(op_name) else op_name) + out = op(x, y) + + if _in_legacy_dygraph(): + op = getattr(_C_ops, op_name) +> out = op(x, y, 'axis', axis, 'use_mkldnn', use_mkldnn) +E ValueError: (InvalidArgument) elementwise_floordiv(): argument 'X' (position 0) must be Tensor, but got int (at /Users/paddle/work/pengyuqi/Paddle/paddle/fluid/pybind/op_function_common.cc:737) + +/Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/tensor/math.py:218: ValueError +_________________ test_metrics_rfloordiv[5.0-expected_result1] _________________ + +first_operand = 5.0 +expected_result = Tensor(shape=[], dtype=float32, place=Place(cpu), stop_gradient=True, + 2.) + + @pytest.mark.parametrize( + ["first_operand", "expected_result"], + [(5, tensor(2)), (5.0, tensor(2.0)), (tensor(5), tensor(2))], + ) + @pytest.mark.skipif(**_MARK_TORCH_MIN_1_5) + def test_metrics_rfloordiv(first_operand, expected_result): + second_operand = DummyMetric(2) + + final_rfloordiv = first_operand // second_operand + + assert isinstance(final_rfloordiv, CompositionalMetric) + + final_rfloordiv.update() +> assert B.allclose(expected_result, final_rfloordiv.compute()) + +test_composition.py:347: +_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ +../../paddlemetrics/metric.py:378: in wrapped_func + self._computed = compute(*args, **kwargs) +../../paddlemetrics/metric.py:756: in compute + return self.op(val_a, val_b) +../../../../../../torch2paddle/paddleext/torchapi/core.py:91: in paddle_func + return func(*args, **kwargs) +/Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/tensor/math.py:460: in floor_divide + return _elementwise_op_in_dygraph( +/Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/decorator.py:232: in fun + return caller(func, *(extras + args), **kw) +/Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/wrapped_decorator.py:25: in __impl__ + return wrapped_func(*args, **kwargs) +/Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/framework.py:434: in __impl__ + return func(*args, **kwargs) +_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ + +x = 5.0 +y = Tensor(shape=[], dtype=int32, place=Place(cpu), stop_gradient=True, + 2) +axis = -1, act = None, use_mkldnn = False, op_name = 'elementwise_floordiv' + + @dygraph_only + def _elementwise_op_in_dygraph(x, + y, + axis=-1, + act=None, + use_mkldnn=False, + op_name=None): + def is_inplace(op_name): + return op_name[-1] == "_" + + if op_name not in OP_NAMEMAPPING.keys(): + op = getattr(_C_ops, op_name) + out = op(x, y, 'axis', axis, 'use_mkldnn', use_mkldnn) + else: + if in_dygraph_mode(): + op = getattr(_C_ops, OP_NAMEMAPPING[op_name] if not is_inplace(op_name) else op_name) + out = op(x, y) + + if _in_legacy_dygraph(): + op = getattr(_C_ops, op_name) +> out = op(x, y, 'axis', axis, 'use_mkldnn', use_mkldnn) +E ValueError: (InvalidArgument) elementwise_floordiv(): argument 'X' (position 0) must be Tensor, but got float (at /Users/paddle/work/pengyuqi/Paddle/paddle/fluid/pybind/op_function_common.cc:737) + +/Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/tensor/math.py:218: ValueError +___________ test_metrics_rfloordiv[first_operand2-expected_result2] ____________ + +first_operand = Tensor(shape=[], dtype=int32, place=Place(cpu), stop_gradient=True, + 5) +expected_result = Tensor(shape=[], dtype=int32, place=Place(cpu), stop_gradient=True, + 2) + + @pytest.mark.parametrize( + ["first_operand", "expected_result"], + [(5, tensor(2)), (5.0, tensor(2.0)), (tensor(5), tensor(2))], + ) + @pytest.mark.skipif(**_MARK_TORCH_MIN_1_5) + def test_metrics_rfloordiv(first_operand, expected_result): + second_operand = DummyMetric(2) + +> final_rfloordiv = first_operand // second_operand + +test_composition.py:342: +_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ +/Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/dygraph/math_op_patch.py:257: in __impl__ + other_var = create_scalar( +/Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/dygraph/math_op_patch.py:90: in create_scalar + return create_tensor(value, dtype, shape=[1]) +/Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/decorator.py:232: in fun + return caller(func, *(extras + args), **kw) +/Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/dygraph/base.py:299: in __impl__ + return func(*args, **kwargs) +_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ + +value = DummyMetric(), dtype = paddle.int32, shape = [1] + + @no_grad + def create_tensor(value, dtype, shape): + out = _varbase_creator(dtype=dtype) +> out = _C_ops.fill_constant(out, 'dtype', dtype, 'shape', shape, 'value', + value, 'force_cpu', False) +E ValueError: (InvalidArgument) fill_constant(): argument (position 6) must be float, but got DummyMetric (at /Users/paddle/work/pengyuqi/Paddle/paddle/fluid/pybind/op_function_common.cc:189) + +/Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/dygraph/math_op_patch.py:84: ValueError +____________ test_metrics_rmatmul[first_operand0-expected_result0] _____________ + +first_operand = Tensor(shape=[3], dtype=int64, place=Place(cpu), stop_gradient=True, + [2, 2, 2]) +expected_result = Tensor(shape=[], dtype=int32, place=Place(cpu), stop_gradient=True, + 12) + + @pytest.mark.parametrize( + ["first_operand", "expected_result"], + [pytest.param(tensor([2, 2, 2]), tensor(12), marks=pytest.mark.skipif(**_MARK_TORCH_MIN_1_4))], + ) + def test_metrics_rmatmul(first_operand, expected_result): + second_operand = DummyMetric([2, 2, 2]) + +> final_rmatmul = first_operand @ second_operand + +test_composition.py:357: +_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ +/Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/dygraph/math_op_patch.py:257: in __impl__ + other_var = create_scalar( +/Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/dygraph/math_op_patch.py:90: in create_scalar + return create_tensor(value, dtype, shape=[1]) +/Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/decorator.py:232: in fun + return caller(func, *(extras + args), **kw) +/Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/dygraph/base.py:299: in __impl__ + return func(*args, **kwargs) +_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ + +value = DummyMetric(), dtype = paddle.int64, shape = [1] + + @no_grad + def create_tensor(value, dtype, shape): + out = _varbase_creator(dtype=dtype) +> out = _C_ops.fill_constant(out, 'dtype', dtype, 'shape', shape, 'value', + value, 'force_cpu', False) +E ValueError: (InvalidArgument) fill_constant(): argument (position 6) must be float, but got DummyMetric (at /Users/paddle/work/pengyuqi/Paddle/paddle/fluid/pybind/op_function_common.cc:189) + +/Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/dygraph/math_op_patch.py:84: ValueError +______________ test_metrics_rmod[first_operand0-expected_result0] ______________ + +first_operand = Tensor(shape=[], dtype=int32, place=Place(cpu), stop_gradient=True, + 2) +expected_result = Tensor(shape=[], dtype=int32, place=Place(cpu), stop_gradient=True, + 2) + + @pytest.mark.parametrize( + ["first_operand", "expected_result"], + [pytest.param(tensor(2), tensor(2), marks=pytest.mark.skipif(**_MARK_TORCH_MIN_1_4))], + ) + def test_metrics_rmod(first_operand, expected_result): + second_operand = DummyMetric(5) + +> final_rmod = first_operand % second_operand + +test_composition.py:372: +_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ +/Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/dygraph/math_op_patch.py:257: in __impl__ + other_var = create_scalar( +/Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/dygraph/math_op_patch.py:90: in create_scalar + return create_tensor(value, dtype, shape=[1]) +/Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/decorator.py:232: in fun + return caller(func, *(extras + args), **kw) +/Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/dygraph/base.py:299: in __impl__ + return func(*args, **kwargs) +_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ + +value = DummyMetric(), dtype = paddle.int32, shape = [1] + + @no_grad + def create_tensor(value, dtype, shape): + out = _varbase_creator(dtype=dtype) +> out = _C_ops.fill_constant(out, 'dtype', dtype, 'shape', shape, 'value', + value, 'force_cpu', False) +E ValueError: (InvalidArgument) fill_constant(): argument (position 6) must be float, but got DummyMetric (at /Users/paddle/work/pengyuqi/Paddle/paddle/fluid/pybind/op_function_common.cc:189) + +/Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/dygraph/math_op_patch.py:84: ValueError +____________________ test_metrics_rpow[2-expected_result1] _____________________ + +first_operand = 2 +expected_result = Tensor(shape=[], dtype=int32, place=Place(cpu), stop_gradient=True, + 4) + + @pytest.mark.parametrize( + "first_operand,expected_result", + [ + pytest.param(DummyMetric(2), tensor(4)), + pytest.param(2, tensor(4)), + pytest.param(2.0, tensor(4.0), marks=pytest.mark.skipif(**_MARK_TORCH_MIN_1_6)), + ], + ) + def test_metrics_rpow(first_operand, expected_result): + second_operand = DummyMetric(2) + + final_rpow = first_operand ** second_operand + + assert isinstance(final_rpow, CompositionalMetric) + final_rpow.update() +> assert B.allclose(expected_result, final_rpow.compute()) + +test_composition.py:395: +_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ +../../paddlemetrics/metric.py:378: in wrapped_func + self._computed = compute(*args, **kwargs) +../../paddlemetrics/metric.py:756: in compute + return self.op(val_a, val_b) +../../../../../../torch2paddle/paddleext/torchapi/core.py:91: in paddle_func + return func(*args, **kwargs) +/Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/tensor/math.py:168: in pow + return _elementwise_op_in_dygraph( +/Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/decorator.py:232: in fun + return caller(func, *(extras + args), **kw) +/Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/wrapped_decorator.py:25: in __impl__ + return wrapped_func(*args, **kwargs) +/Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/framework.py:434: in __impl__ + return func(*args, **kwargs) +_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ + +x = 2 +y = Tensor(shape=[], dtype=int32, place=Place(cpu), stop_gradient=True, + 2) +axis = -1, act = None, use_mkldnn = False, op_name = 'elementwise_pow' + + @dygraph_only + def _elementwise_op_in_dygraph(x, + y, + axis=-1, + act=None, + use_mkldnn=False, + op_name=None): + def is_inplace(op_name): + return op_name[-1] == "_" + + if op_name not in OP_NAMEMAPPING.keys(): + op = getattr(_C_ops, op_name) + out = op(x, y, 'axis', axis, 'use_mkldnn', use_mkldnn) + else: + if in_dygraph_mode(): + op = getattr(_C_ops, OP_NAMEMAPPING[op_name] if not is_inplace(op_name) else op_name) + out = op(x, y) + + if _in_legacy_dygraph(): + op = getattr(_C_ops, op_name) +> out = op(x, y, 'axis', axis, 'use_mkldnn', use_mkldnn) +E ValueError: (InvalidArgument) elementwise_pow(): argument 'X' (position 0) must be Tensor, but got int (at /Users/paddle/work/pengyuqi/Paddle/paddle/fluid/pybind/op_function_common.cc:737) + +/Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/tensor/math.py:218: ValueError +___________________ test_metrics_rpow[2.0-expected_result2] ____________________ + +first_operand = 2.0 +expected_result = Tensor(shape=[], dtype=float32, place=Place(cpu), stop_gradient=True, + 4.) + + @pytest.mark.parametrize( + "first_operand,expected_result", + [ + pytest.param(DummyMetric(2), tensor(4)), + pytest.param(2, tensor(4)), + pytest.param(2.0, tensor(4.0), marks=pytest.mark.skipif(**_MARK_TORCH_MIN_1_6)), + ], + ) + def test_metrics_rpow(first_operand, expected_result): + second_operand = DummyMetric(2) + + final_rpow = first_operand ** second_operand + + assert isinstance(final_rpow, CompositionalMetric) + final_rpow.update() +> assert B.allclose(expected_result, final_rpow.compute()) + +test_composition.py:395: +_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ +../../paddlemetrics/metric.py:378: in wrapped_func + self._computed = compute(*args, **kwargs) +../../paddlemetrics/metric.py:756: in compute + return self.op(val_a, val_b) +../../../../../../torch2paddle/paddleext/torchapi/core.py:91: in paddle_func + return func(*args, **kwargs) +/Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/tensor/math.py:168: in pow + return _elementwise_op_in_dygraph( +/Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/decorator.py:232: in fun + return caller(func, *(extras + args), **kw) +/Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/wrapped_decorator.py:25: in __impl__ + return wrapped_func(*args, **kwargs) +/Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/framework.py:434: in __impl__ + return func(*args, **kwargs) +_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ + +x = 2.0 +y = Tensor(shape=[], dtype=int32, place=Place(cpu), stop_gradient=True, + 2) +axis = -1, act = None, use_mkldnn = False, op_name = 'elementwise_pow' + + @dygraph_only + def _elementwise_op_in_dygraph(x, + y, + axis=-1, + act=None, + use_mkldnn=False, + op_name=None): + def is_inplace(op_name): + return op_name[-1] == "_" + + if op_name not in OP_NAMEMAPPING.keys(): + op = getattr(_C_ops, op_name) + out = op(x, y, 'axis', axis, 'use_mkldnn', use_mkldnn) + else: + if in_dygraph_mode(): + op = getattr(_C_ops, OP_NAMEMAPPING[op_name] if not is_inplace(op_name) else op_name) + out = op(x, y) + + if _in_legacy_dygraph(): + op = getattr(_C_ops, op_name) +> out = op(x, y, 'axis', axis, 'use_mkldnn', use_mkldnn) +E ValueError: (InvalidArgument) elementwise_pow(): argument 'X' (position 0) must be Tensor, but got float (at /Users/paddle/work/pengyuqi/Paddle/paddle/fluid/pybind/op_function_common.cc:737) + +/Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/tensor/math.py:218: ValueError +______________ test_metrics_rsub[first_operand0-expected_result0] ______________ + +first_operand = DummyMetric() +expected_result = Tensor(shape=[], dtype=int32, place=Place(cpu), stop_gradient=True, + 1) + + @pytest.mark.parametrize( + ["first_operand", "expected_result"], + [ + (DummyMetric(3), tensor(1)), + (3, tensor(1)), + (3.0, tensor(1.0)), + pytest.param(tensor(3), tensor(1), marks=pytest.mark.skipif(**_MARK_TORCH_MIN_1_4)), + ], + ) + def test_metrics_rsub(first_operand, expected_result): + second_operand = DummyMetric(2) + +> final_rsub = first_operand - second_operand + +test_composition.py:410: +_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ +../../paddlemetrics/metric.py:666: in __sub__ + return CompositionalMetric(B.sub, self, other) +_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ + +self = , args = ('sub',) +kwargs = {} + + def __getattribute__(self, *args, **kwargs): + # Perform custom logic here + +> obj = object.__getattribute__(this_module, *args, **kwargs) +E AttributeError: 'module' object has no attribute 'sub' + +../../../../../../torch2paddle/paddleext/torchapi/__init__.py:20: AttributeError +____________________ test_metrics_rsub[3-expected_result1] _____________________ + +first_operand = 3 +expected_result = Tensor(shape=[], dtype=int32, place=Place(cpu), stop_gradient=True, + 1) + + @pytest.mark.parametrize( + ["first_operand", "expected_result"], + [ + (DummyMetric(3), tensor(1)), + (3, tensor(1)), + (3.0, tensor(1.0)), + pytest.param(tensor(3), tensor(1), marks=pytest.mark.skipif(**_MARK_TORCH_MIN_1_4)), + ], + ) + def test_metrics_rsub(first_operand, expected_result): + second_operand = DummyMetric(2) + +> final_rsub = first_operand - second_operand + +test_composition.py:410: +_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ +../../paddlemetrics/metric.py:657: in __rsub__ + return CompositionalMetric(B.sub, other, self) +_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ + +self = , args = ('sub',) +kwargs = {} + + def __getattribute__(self, *args, **kwargs): + # Perform custom logic here + +> obj = object.__getattribute__(this_module, *args, **kwargs) +E AttributeError: 'module' object has no attribute 'sub' + +../../../../../../torch2paddle/paddleext/torchapi/__init__.py:20: AttributeError +___________________ test_metrics_rsub[3.0-expected_result2] ____________________ + +first_operand = 3.0 +expected_result = Tensor(shape=[], dtype=float32, place=Place(cpu), stop_gradient=True, + 1.) + + @pytest.mark.parametrize( + ["first_operand", "expected_result"], + [ + (DummyMetric(3), tensor(1)), + (3, tensor(1)), + (3.0, tensor(1.0)), + pytest.param(tensor(3), tensor(1), marks=pytest.mark.skipif(**_MARK_TORCH_MIN_1_4)), + ], + ) + def test_metrics_rsub(first_operand, expected_result): + second_operand = DummyMetric(2) + +> final_rsub = first_operand - second_operand + +test_composition.py:410: +_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ +../../paddlemetrics/metric.py:657: in __rsub__ + return CompositionalMetric(B.sub, other, self) +_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ + +self = , args = ('sub',) +kwargs = {} + + def __getattribute__(self, *args, **kwargs): + # Perform custom logic here + +> obj = object.__getattribute__(this_module, *args, **kwargs) +E AttributeError: 'module' object has no attribute 'sub' + +../../../../../../torch2paddle/paddleext/torchapi/__init__.py:20: AttributeError +______________ test_metrics_rsub[first_operand3-expected_result3] ______________ + +first_operand = Tensor(shape=[], dtype=int32, place=Place(cpu), stop_gradient=True, + 3) +expected_result = Tensor(shape=[], dtype=int32, place=Place(cpu), stop_gradient=True, + 1) + + @pytest.mark.parametrize( + ["first_operand", "expected_result"], + [ + (DummyMetric(3), tensor(1)), + (3, tensor(1)), + (3.0, tensor(1.0)), + pytest.param(tensor(3), tensor(1), marks=pytest.mark.skipif(**_MARK_TORCH_MIN_1_4)), + ], + ) + def test_metrics_rsub(first_operand, expected_result): + second_operand = DummyMetric(2) + + final_rsub = first_operand - second_operand + + assert isinstance(final_rsub, CompositionalMetric) + final_rsub.update() +> assert B.allclose(expected_result, final_rsub.compute()) +E assert Tensor(shape=[1], dtype=bool, place=Place(cpu), stop_gradient=True,\n [False]) +E + where Tensor(shape=[1], dtype=bool, place=Place(cpu), stop_gradient=True,\n [False]) = (Tensor(shape=[], dtype=int32, place=Place(cpu), stop_gradient=True,\n 1), 5) +E + where = B.allclose +E + and 5 = () +E + where = CompositionalMetric(\n add(\n 3,\n DummyMetric()\n )\n).compute + +test_composition.py:414: AssertionError +____________ test_metrics_rtruediv[first_operand0-expected_result0] ____________ + +first_operand = DummyMetric() +expected_result = Tensor(shape=[], dtype=float32, place=Place(cpu), stop_gradient=True, + 2.) + + @pytest.mark.parametrize( + ["first_operand", "expected_result"], + [ + (DummyMetric(6), tensor(2.0)), + (6, tensor(2.0)), + (6.0, tensor(2.0)), + (tensor(6), tensor(2.0)), + ], + ) + @pytest.mark.skipif(**_MARK_TORCH_MIN_1_5) + def test_metrics_rtruediv(first_operand, expected_result): + second_operand = DummyMetric(3) + +> final_rtruediv = first_operand / second_operand + +test_composition.py:430: +_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ +../../paddlemetrics/metric.py:669: in __truediv__ + return CompositionalMetric(B.true_divide, self, other) +_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ + +self = +args = ('true_divide',), kwargs = {} + + def __getattribute__(self, *args, **kwargs): + # Perform custom logic here + +> obj = object.__getattribute__(this_module, *args, **kwargs) +E AttributeError: 'module' object has no attribute 'true_divide' + +../../../../../../torch2paddle/paddleext/torchapi/__init__.py:20: AttributeError +__________________ test_metrics_rtruediv[6-expected_result1] ___________________ + +first_operand = 6 +expected_result = Tensor(shape=[], dtype=float32, place=Place(cpu), stop_gradient=True, + 2.) + + @pytest.mark.parametrize( + ["first_operand", "expected_result"], + [ + (DummyMetric(6), tensor(2.0)), + (6, tensor(2.0)), + (6.0, tensor(2.0)), + (tensor(6), tensor(2.0)), + ], + ) + @pytest.mark.skipif(**_MARK_TORCH_MIN_1_5) + def test_metrics_rtruediv(first_operand, expected_result): + second_operand = DummyMetric(3) + +> final_rtruediv = first_operand / second_operand + +test_composition.py:430: +_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ +../../paddlemetrics/metric.py:660: in __rtruediv__ + return CompositionalMetric(B.true_divide, other, self) +_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ + +self = +args = ('true_divide',), kwargs = {} + + def __getattribute__(self, *args, **kwargs): + # Perform custom logic here + +> obj = object.__getattribute__(this_module, *args, **kwargs) +E AttributeError: 'module' object has no attribute 'true_divide' + +../../../../../../torch2paddle/paddleext/torchapi/__init__.py:20: AttributeError +_________________ test_metrics_rtruediv[6.0-expected_result2] __________________ + +first_operand = 6.0 +expected_result = Tensor(shape=[], dtype=float32, place=Place(cpu), stop_gradient=True, + 2.) + + @pytest.mark.parametrize( + ["first_operand", "expected_result"], + [ + (DummyMetric(6), tensor(2.0)), + (6, tensor(2.0)), + (6.0, tensor(2.0)), + (tensor(6), tensor(2.0)), + ], + ) + @pytest.mark.skipif(**_MARK_TORCH_MIN_1_5) + def test_metrics_rtruediv(first_operand, expected_result): + second_operand = DummyMetric(3) + +> final_rtruediv = first_operand / second_operand + +test_composition.py:430: +_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ +../../paddlemetrics/metric.py:660: in __rtruediv__ + return CompositionalMetric(B.true_divide, other, self) +_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ + +self = +args = ('true_divide',), kwargs = {} + + def __getattribute__(self, *args, **kwargs): + # Perform custom logic here + +> obj = object.__getattribute__(this_module, *args, **kwargs) +E AttributeError: 'module' object has no attribute 'true_divide' + +../../../../../../torch2paddle/paddleext/torchapi/__init__.py:20: AttributeError +____________ test_metrics_rtruediv[first_operand3-expected_result3] ____________ + +first_operand = Tensor(shape=[], dtype=int32, place=Place(cpu), stop_gradient=True, + 6) +expected_result = Tensor(shape=[], dtype=float32, place=Place(cpu), stop_gradient=True, + 2.) + + @pytest.mark.parametrize( + ["first_operand", "expected_result"], + [ + (DummyMetric(6), tensor(2.0)), + (6, tensor(2.0)), + (6.0, tensor(2.0)), + (tensor(6), tensor(2.0)), + ], + ) + @pytest.mark.skipif(**_MARK_TORCH_MIN_1_5) + def test_metrics_rtruediv(first_operand, expected_result): + second_operand = DummyMetric(3) + + final_rtruediv = first_operand / second_operand + + assert isinstance(final_rtruediv, CompositionalMetric) + final_rtruediv.update() +> assert B.allclose(expected_result, final_rtruediv.compute()) +E assert Tensor(shape=[1], dtype=bool, place=Place(cpu), stop_gradient=True,\n [False]) +E + where Tensor(shape=[1], dtype=bool, place=Place(cpu), stop_gradient=True,\n [False]) = (Tensor(shape=[], dtype=float32, place=Place(cpu), stop_gradient=True,\n 2.), 9) +E + where = B.allclose +E + and 9 = () +E + where = CompositionalMetric(\n add(\n 6,\n DummyMetric()\n )\n).compute + +test_composition.py:434: AssertionError +______________ test_metrics_sub[second_operand0-expected_result0] ______________ + +second_operand = DummyMetric() +expected_result = Tensor(shape=[], dtype=int32, place=Place(cpu), stop_gradient=True, + 1) + + @pytest.mark.parametrize( + ["second_operand", "expected_result"], + [ + (DummyMetric(2), tensor(1)), + (2, tensor(1)), + (2.0, tensor(1.0)), + (tensor(2), tensor(1)), + ], + ) + def test_metrics_sub(second_operand, expected_result): + first_metric = DummyMetric(3) + +> final_sub = first_metric - second_operand + +test_composition.py:449: +_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ +../../paddlemetrics/metric.py:666: in __sub__ + return CompositionalMetric(B.sub, self, other) +_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ + +self = , args = ('sub',) +kwargs = {} + + def __getattribute__(self, *args, **kwargs): + # Perform custom logic here + +> obj = object.__getattribute__(this_module, *args, **kwargs) +E AttributeError: 'module' object has no attribute 'sub' + +../../../../../../torch2paddle/paddleext/torchapi/__init__.py:20: AttributeError +_____________________ test_metrics_sub[2-expected_result1] _____________________ + +second_operand = 2 +expected_result = Tensor(shape=[], dtype=int32, place=Place(cpu), stop_gradient=True, + 1) + + @pytest.mark.parametrize( + ["second_operand", "expected_result"], + [ + (DummyMetric(2), tensor(1)), + (2, tensor(1)), + (2.0, tensor(1.0)), + (tensor(2), tensor(1)), + ], + ) + def test_metrics_sub(second_operand, expected_result): + first_metric = DummyMetric(3) + +> final_sub = first_metric - second_operand + +test_composition.py:449: +_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ +../../paddlemetrics/metric.py:666: in __sub__ + return CompositionalMetric(B.sub, self, other) +_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ + +self = , args = ('sub',) +kwargs = {} + + def __getattribute__(self, *args, **kwargs): + # Perform custom logic here + +> obj = object.__getattribute__(this_module, *args, **kwargs) +E AttributeError: 'module' object has no attribute 'sub' + +../../../../../../torch2paddle/paddleext/torchapi/__init__.py:20: AttributeError +____________________ test_metrics_sub[2.0-expected_result2] ____________________ + +second_operand = 2.0 +expected_result = Tensor(shape=[], dtype=float32, place=Place(cpu), stop_gradient=True, + 1.) + + @pytest.mark.parametrize( + ["second_operand", "expected_result"], + [ + (DummyMetric(2), tensor(1)), + (2, tensor(1)), + (2.0, tensor(1.0)), + (tensor(2), tensor(1)), + ], + ) + def test_metrics_sub(second_operand, expected_result): + first_metric = DummyMetric(3) + +> final_sub = first_metric - second_operand + +test_composition.py:449: +_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ +../../paddlemetrics/metric.py:666: in __sub__ + return CompositionalMetric(B.sub, self, other) +_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ + +self = , args = ('sub',) +kwargs = {} + + def __getattribute__(self, *args, **kwargs): + # Perform custom logic here + +> obj = object.__getattribute__(this_module, *args, **kwargs) +E AttributeError: 'module' object has no attribute 'sub' + +../../../../../../torch2paddle/paddleext/torchapi/__init__.py:20: AttributeError +______________ test_metrics_sub[second_operand3-expected_result3] ______________ + +second_operand = Tensor(shape=[], dtype=int32, place=Place(cpu), stop_gradient=True, + 2) +expected_result = Tensor(shape=[], dtype=int32, place=Place(cpu), stop_gradient=True, + 1) + + @pytest.mark.parametrize( + ["second_operand", "expected_result"], + [ + (DummyMetric(2), tensor(1)), + (2, tensor(1)), + (2.0, tensor(1.0)), + (tensor(2), tensor(1)), + ], + ) + def test_metrics_sub(second_operand, expected_result): + first_metric = DummyMetric(3) + +> final_sub = first_metric - second_operand + +test_composition.py:449: +_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ +../../paddlemetrics/metric.py:666: in __sub__ + return CompositionalMetric(B.sub, self, other) +_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ + +self = , args = ('sub',) +kwargs = {} + + def __getattribute__(self, *args, **kwargs): + # Perform custom logic here + +> obj = object.__getattribute__(this_module, *args, **kwargs) +E AttributeError: 'module' object has no attribute 'sub' + +../../../../../../torch2paddle/paddleext/torchapi/__init__.py:20: AttributeError +____________ test_metrics_truediv[second_operand0-expected_result0] ____________ + +second_operand = DummyMetric() +expected_result = Tensor(shape=[], dtype=float32, place=Place(cpu), stop_gradient=True, + 2.) + + @pytest.mark.parametrize( + ["second_operand", "expected_result"], + [ + (DummyMetric(3), tensor(2.0)), + (3, tensor(2.0)), + (3.0, tensor(2.0)), + (tensor(3), tensor(2.0)), + ], + ) + @pytest.mark.skipif(**_MARK_TORCH_MIN_1_5) + def test_metrics_truediv(second_operand, expected_result): + first_metric = DummyMetric(6) + +> final_truediv = first_metric / second_operand + +test_composition.py:469: +_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ +../../paddlemetrics/metric.py:669: in __truediv__ + return CompositionalMetric(B.true_divide, self, other) +_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ + +self = +args = ('true_divide',), kwargs = {} + + def __getattribute__(self, *args, **kwargs): + # Perform custom logic here + +> obj = object.__getattribute__(this_module, *args, **kwargs) +E AttributeError: 'module' object has no attribute 'true_divide' + +../../../../../../torch2paddle/paddleext/torchapi/__init__.py:20: AttributeError +___________________ test_metrics_truediv[3-expected_result1] ___________________ + +second_operand = 3 +expected_result = Tensor(shape=[], dtype=float32, place=Place(cpu), stop_gradient=True, + 2.) + + @pytest.mark.parametrize( + ["second_operand", "expected_result"], + [ + (DummyMetric(3), tensor(2.0)), + (3, tensor(2.0)), + (3.0, tensor(2.0)), + (tensor(3), tensor(2.0)), + ], + ) + @pytest.mark.skipif(**_MARK_TORCH_MIN_1_5) + def test_metrics_truediv(second_operand, expected_result): + first_metric = DummyMetric(6) + +> final_truediv = first_metric / second_operand + +test_composition.py:469: +_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ +../../paddlemetrics/metric.py:669: in __truediv__ + return CompositionalMetric(B.true_divide, self, other) +_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ + +self = +args = ('true_divide',), kwargs = {} + + def __getattribute__(self, *args, **kwargs): + # Perform custom logic here + +> obj = object.__getattribute__(this_module, *args, **kwargs) +E AttributeError: 'module' object has no attribute 'true_divide' + +../../../../../../torch2paddle/paddleext/torchapi/__init__.py:20: AttributeError +__________________ test_metrics_truediv[3.0-expected_result2] __________________ + +second_operand = 3.0 +expected_result = Tensor(shape=[], dtype=float32, place=Place(cpu), stop_gradient=True, + 2.) + + @pytest.mark.parametrize( + ["second_operand", "expected_result"], + [ + (DummyMetric(3), tensor(2.0)), + (3, tensor(2.0)), + (3.0, tensor(2.0)), + (tensor(3), tensor(2.0)), + ], + ) + @pytest.mark.skipif(**_MARK_TORCH_MIN_1_5) + def test_metrics_truediv(second_operand, expected_result): + first_metric = DummyMetric(6) + +> final_truediv = first_metric / second_operand + +test_composition.py:469: +_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ +../../paddlemetrics/metric.py:669: in __truediv__ + return CompositionalMetric(B.true_divide, self, other) +_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ + +self = +args = ('true_divide',), kwargs = {} + + def __getattribute__(self, *args, **kwargs): + # Perform custom logic here + +> obj = object.__getattribute__(this_module, *args, **kwargs) +E AttributeError: 'module' object has no attribute 'true_divide' + +../../../../../../torch2paddle/paddleext/torchapi/__init__.py:20: AttributeError +____________ test_metrics_truediv[second_operand3-expected_result3] ____________ + +second_operand = Tensor(shape=[], dtype=int32, place=Place(cpu), stop_gradient=True, + 3) +expected_result = Tensor(shape=[], dtype=float32, place=Place(cpu), stop_gradient=True, + 2.) + + @pytest.mark.parametrize( + ["second_operand", "expected_result"], + [ + (DummyMetric(3), tensor(2.0)), + (3, tensor(2.0)), + (3.0, tensor(2.0)), + (tensor(3), tensor(2.0)), + ], + ) + @pytest.mark.skipif(**_MARK_TORCH_MIN_1_5) + def test_metrics_truediv(second_operand, expected_result): + first_metric = DummyMetric(6) + +> final_truediv = first_metric / second_operand + +test_composition.py:469: +_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ +../../paddlemetrics/metric.py:669: in __truediv__ + return CompositionalMetric(B.true_divide, self, other) +_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ + +self = +args = ('true_divide',), kwargs = {} + + def __getattribute__(self, *args, **kwargs): + # Perform custom logic here + +> obj = object.__getattribute__(this_module, *args, **kwargs) +E AttributeError: 'module' object has no attribute 'true_divide' + +../../../../../../torch2paddle/paddleext/torchapi/__init__.py:20: AttributeError +______________ test_metrics_xor[second_operand1-expected_result1] ______________ + +second_operand = Tensor(shape=[3], dtype=int64, place=Place(cpu), stop_gradient=True, + [1, 0, 3]) +expected_result = Tensor(shape=[3], dtype=int64, place=Place(cpu), stop_gradient=True, + [-2, -2, 0]) + + @pytest.mark.parametrize( + ["second_operand", "expected_result"], + [(DummyMetric([1, 0, 3]), tensor([-2, -2, 0])), (tensor([1, 0, 3]), tensor([-2, -2, 0]))], + ) + @pytest.mark.skipif(**_MARK_TORCH_MIN_1_5) + def test_metrics_xor(second_operand, expected_result): + first_metric = DummyMetric([-1, -2, 3]) + + final_xor = first_metric ^ second_operand +> final_rxor = second_operand ^ first_metric + +test_composition.py:485: +_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ +/Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/tensor/logic.py:616: in bitwise_xor + return _bitwise_op( +_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ + +op_name = 'bitwise_xor' +x = Tensor(shape=[3], dtype=int64, place=Place(cpu), stop_gradient=True, + [1, 0, 3]) +y = DummyMetric(), out = None, name = None, binary_op = True + + def _bitwise_op(op_name, x, y, out=None, name=None, binary_op=True): + if paddle.in_dynamic_mode(): + op = getattr(_C_ops, op_name) + if binary_op: +> return op(x, y) +E ValueError: (InvalidArgument) bitwise_xor(): argument 'Y' (position 1) must be Tensor, but got DummyMetric (at /Users/paddle/work/pengyuqi/Paddle/paddle/fluid/pybind/op_function_common.cc:737) + +/Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/tensor/logic.py:505: ValueError +_______________ test_metrics_getitem[value0-1-expected_result0] ________________ + +value = [1, 2, 3], idx = 1 +expected_result = Tensor(shape=[], dtype=int32, place=Place(cpu), stop_gradient=True, + 2) + + @pytest.mark.parametrize( + ["value", "idx", "expected_result"], + [([1, 2, 3], 1, tensor(2)), ([[0, 1], [2, 3]], (1, 0), tensor(2)), ([[0, 1], [2, 3]], 1, tensor([2, 3]))], + ) + def test_metrics_getitem(value, idx, expected_result): + first_metric = DummyMetric(value) + + final_getitem = first_metric[idx] + assert isinstance(final_getitem, CompositionalMetric) + final_getitem.update() +> assert B.allclose(expected_result, final_getitem.compute()) + +test_composition.py:543: +_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ +../../../../../../torch2paddle/paddleext/torchapi/functional.py:308: in allclose + return paddle.allclose(input.float(), other.float(), rtol=rtol, atol=atol, equal_nan=equal_nan, name=name) +_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ + +x = Tensor(shape=[], dtype=float32, place=Place(cpu), stop_gradient=True, + 2.) +y = Tensor(shape=[1], dtype=float32, place=Place(cpu), stop_gradient=True, + [2.]) +rtol = 1e-05, atol = 1e-08, equal_nan = False, name = None + + @templatedoc() + def allclose(x, y, rtol=1e-05, atol=1e-08, equal_nan=False, name=None): + """ + ${comment} + + Args: + x(Tensor): ${input_comment}. + y(Tensor): ${other_comment}. + rtol(rtoltype, optional): The relative tolerance. Default: :math:`1e-5` . + atol(atoltype, optional): The absolute tolerance. Default: :math:`1e-8` . + equal_nan(equalnantype, optional): ${equal_nan_comment}. + name (str, optional): Name for the operation. For more information, please + refer to :ref:`api_guide_Name`. Default: None. + + Returns: + Tensor: ${out_comment}. + + Raises: + TypeError: The data type of ``x`` must be one of float32, float64. + TypeError: The data type of ``y`` must be one of float32, float64. + TypeError: The type of ``rtol`` must be float. + TypeError: The type of ``atol`` must be float. + TypeError: The type of ``equal_nan`` must be bool. + + Examples: + .. code-block:: python + + import paddle + + x = paddle.to_tensor([10000., 1e-07]) + y = paddle.to_tensor([10000.1, 1e-08]) + result1 = paddle.allclose(x, y, rtol=1e-05, atol=1e-08, + equal_nan=False, name="ignore_nan") + np_result1 = result1.numpy() + # [False] + result2 = paddle.allclose(x, y, rtol=1e-05, atol=1e-08, + equal_nan=True, name="equal_nan") + np_result2 = result2.numpy() + # [False] + + x = paddle.to_tensor([1.0, float('nan')]) + y = paddle.to_tensor([1.0, float('nan')]) + result1 = paddle.allclose(x, y, rtol=1e-05, atol=1e-08, + equal_nan=False, name="ignore_nan") + np_result1 = result1.numpy() + # [False] + result2 = paddle.allclose(x, y, rtol=1e-05, atol=1e-08, + equal_nan=True, name="equal_nan") + np_result2 = result2.numpy() + # [True] + """ + + if in_dygraph_mode(): + # NOTE(dev): Pass tol as Tensor to fix precision loss problem, because + # C++ backend will cast it into float32 if passing float from python. + as_tensor = lambda x: paddle.to_tensor([x], dtype='float64', place='cpu') + return _C_ops.final_state_allclose(x, y, + as_tensor(rtol), + as_tensor(atol), equal_nan) + if _in_legacy_dygraph(): +> return _C_ops.allclose(x, y, 'rtol', + str(rtol), 'atol', + str(atol), 'equal_nan', equal_nan) +E RuntimeError: (PreconditionNotMet) Input(Input) and Input(Other) must have the same dimension size. +E [Hint: Expected input_dim.size() == other_dim.size(), but received input_dim.size():0 != other_dim.size():1.] (at /Users/paddle/work/pengyuqi/Paddle/paddle/phi/infermeta/binary.cc:39) +E [operator < allclose > error] + +/Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/tensor/logic.py:140: RuntimeError +______________ test_metrics_getitem[value1-idx1-expected_result1] ______________ + +value = [[0, 1], [2, 3]], idx = (1, 0) +expected_result = Tensor(shape=[], dtype=int32, place=Place(cpu), stop_gradient=True, + 2) + + @pytest.mark.parametrize( + ["value", "idx", "expected_result"], + [([1, 2, 3], 1, tensor(2)), ([[0, 1], [2, 3]], (1, 0), tensor(2)), ([[0, 1], [2, 3]], 1, tensor([2, 3]))], + ) + def test_metrics_getitem(value, idx, expected_result): + first_metric = DummyMetric(value) + + final_getitem = first_metric[idx] + assert isinstance(final_getitem, CompositionalMetric) + final_getitem.update() +> assert B.allclose(expected_result, final_getitem.compute()) + +test_composition.py:543: +_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ +../../../../../../torch2paddle/paddleext/torchapi/functional.py:308: in allclose + return paddle.allclose(input.float(), other.float(), rtol=rtol, atol=atol, equal_nan=equal_nan, name=name) +_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ + +x = Tensor(shape=[], dtype=float32, place=Place(cpu), stop_gradient=True, + 2.) +y = Tensor(shape=[1], dtype=float32, place=Place(cpu), stop_gradient=True, + [2.]) +rtol = 1e-05, atol = 1e-08, equal_nan = False, name = None + + @templatedoc() + def allclose(x, y, rtol=1e-05, atol=1e-08, equal_nan=False, name=None): + """ + ${comment} + + Args: + x(Tensor): ${input_comment}. + y(Tensor): ${other_comment}. + rtol(rtoltype, optional): The relative tolerance. Default: :math:`1e-5` . + atol(atoltype, optional): The absolute tolerance. Default: :math:`1e-8` . + equal_nan(equalnantype, optional): ${equal_nan_comment}. + name (str, optional): Name for the operation. For more information, please + refer to :ref:`api_guide_Name`. Default: None. + + Returns: + Tensor: ${out_comment}. + + Raises: + TypeError: The data type of ``x`` must be one of float32, float64. + TypeError: The data type of ``y`` must be one of float32, float64. + TypeError: The type of ``rtol`` must be float. + TypeError: The type of ``atol`` must be float. + TypeError: The type of ``equal_nan`` must be bool. + + Examples: + .. code-block:: python + + import paddle + + x = paddle.to_tensor([10000., 1e-07]) + y = paddle.to_tensor([10000.1, 1e-08]) + result1 = paddle.allclose(x, y, rtol=1e-05, atol=1e-08, + equal_nan=False, name="ignore_nan") + np_result1 = result1.numpy() + # [False] + result2 = paddle.allclose(x, y, rtol=1e-05, atol=1e-08, + equal_nan=True, name="equal_nan") + np_result2 = result2.numpy() + # [False] + + x = paddle.to_tensor([1.0, float('nan')]) + y = paddle.to_tensor([1.0, float('nan')]) + result1 = paddle.allclose(x, y, rtol=1e-05, atol=1e-08, + equal_nan=False, name="ignore_nan") + np_result1 = result1.numpy() + # [False] + result2 = paddle.allclose(x, y, rtol=1e-05, atol=1e-08, + equal_nan=True, name="equal_nan") + np_result2 = result2.numpy() + # [True] + """ + + if in_dygraph_mode(): + # NOTE(dev): Pass tol as Tensor to fix precision loss problem, because + # C++ backend will cast it into float32 if passing float from python. + as_tensor = lambda x: paddle.to_tensor([x], dtype='float64', place='cpu') + return _C_ops.final_state_allclose(x, y, + as_tensor(rtol), + as_tensor(atol), equal_nan) + if _in_legacy_dygraph(): +> return _C_ops.allclose(x, y, 'rtol', + str(rtol), 'atol', + str(atol), 'equal_nan', equal_nan) +E RuntimeError: (PreconditionNotMet) Input(Input) and Input(Other) must have the same dimension size. +E [Hint: Expected input_dim.size() == other_dim.size(), but received input_dim.size():0 != other_dim.size():1.] (at /Users/paddle/work/pengyuqi/Paddle/paddle/phi/infermeta/binary.cc:39) +E [operator < allclose > error] + +/Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/tensor/logic.py:140: RuntimeError +=============================== warnings summary =============================== +../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:19 + /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:19: DeprecationWarning: Call to deprecated create function FileDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool. + DESCRIPTOR = _descriptor.FileDescriptor( + +../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:33 + /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:33: DeprecationWarning: Call to deprecated create function EnumValueDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool. + _descriptor.EnumValueDescriptor( + +../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:37 + /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:37: DeprecationWarning: Call to deprecated create function EnumValueDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool. + _descriptor.EnumValueDescriptor( + +../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:41 + /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:41: DeprecationWarning: Call to deprecated create function EnumValueDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool. + _descriptor.EnumValueDescriptor( + +../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:45 + /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:45: DeprecationWarning: Call to deprecated create function EnumValueDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool. + _descriptor.EnumValueDescriptor( + +../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:49 + /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:49: DeprecationWarning: Call to deprecated create function EnumValueDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool. + _descriptor.EnumValueDescriptor( + +../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:53 + /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:53: DeprecationWarning: Call to deprecated create function EnumValueDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool. + _descriptor.EnumValueDescriptor( + +../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:57 + /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:57: DeprecationWarning: Call to deprecated create function EnumValueDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool. + _descriptor.EnumValueDescriptor( + +../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:61 + /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:61: DeprecationWarning: Call to deprecated create function EnumValueDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool. + _descriptor.EnumValueDescriptor( + +../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:65 + /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:65: DeprecationWarning: Call to deprecated create function EnumValueDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool. + _descriptor.EnumValueDescriptor( + +../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:69 + /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:69: DeprecationWarning: Call to deprecated create function EnumValueDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool. + _descriptor.EnumValueDescriptor( + +../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:73 + /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:73: DeprecationWarning: Call to deprecated create function EnumValueDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool. + _descriptor.EnumValueDescriptor( + +../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:77 + /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:77: DeprecationWarning: Call to deprecated create function EnumValueDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool. + _descriptor.EnumValueDescriptor( + +../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:81 + /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:81: DeprecationWarning: Call to deprecated create function EnumValueDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool. + _descriptor.EnumValueDescriptor( + +../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:27 + /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:27: DeprecationWarning: Call to deprecated create function EnumDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool. + _ATTRTYPE = _descriptor.EnumDescriptor( + +../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:115 + /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:115: DeprecationWarning: Call to deprecated create function EnumValueDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool. + _descriptor.EnumValueDescriptor( + +../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:119 + /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:119: DeprecationWarning: Call to deprecated create function EnumValueDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool. + _descriptor.EnumValueDescriptor( + +../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:123 + /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:123: DeprecationWarning: Call to deprecated create function EnumValueDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool. + _descriptor.EnumValueDescriptor( + +../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:127 + /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:127: DeprecationWarning: Call to deprecated create function EnumValueDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool. + _descriptor.EnumValueDescriptor( + +../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:131 + /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:131: DeprecationWarning: Call to deprecated create function EnumValueDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool. + _descriptor.EnumValueDescriptor( + +../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:135 + /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:135: DeprecationWarning: Call to deprecated create function EnumValueDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool. + _descriptor.EnumValueDescriptor( + +../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:139 + /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:139: DeprecationWarning: Call to deprecated create function EnumValueDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool. + _descriptor.EnumValueDescriptor( + +../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:143 + /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:143: DeprecationWarning: Call to deprecated create function EnumValueDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool. + _descriptor.EnumValueDescriptor( + +../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:147 + /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:147: DeprecationWarning: Call to deprecated create function EnumValueDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool. + _descriptor.EnumValueDescriptor( + +../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:151 + /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:151: DeprecationWarning: Call to deprecated create function EnumValueDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool. + _descriptor.EnumValueDescriptor( + +../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:155 + /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:155: DeprecationWarning: Call to deprecated create function EnumValueDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool. + _descriptor.EnumValueDescriptor( + +../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:159 + /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:159: DeprecationWarning: Call to deprecated create function EnumValueDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool. + _descriptor.EnumValueDescriptor( + +../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:163 + /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:163: DeprecationWarning: Call to deprecated create function EnumValueDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool. + _descriptor.EnumValueDescriptor( + +../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:167 + /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:167: DeprecationWarning: Call to deprecated create function EnumValueDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool. + _descriptor.EnumValueDescriptor( + +../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:171 + /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:171: DeprecationWarning: Call to deprecated create function EnumValueDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool. + _descriptor.EnumValueDescriptor( + +../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:175 + /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:175: DeprecationWarning: Call to deprecated create function EnumValueDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool. + _descriptor.EnumValueDescriptor( + +../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:179 + /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:179: DeprecationWarning: Call to deprecated create function EnumValueDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool. + _descriptor.EnumValueDescriptor( + +../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:183 + /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:183: DeprecationWarning: Call to deprecated create function EnumValueDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool. + _descriptor.EnumValueDescriptor( + +../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:187 + /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:187: DeprecationWarning: Call to deprecated create function EnumValueDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool. + _descriptor.EnumValueDescriptor( + +../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:191 + /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:191: DeprecationWarning: Call to deprecated create function EnumValueDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool. + _descriptor.EnumValueDescriptor( + +../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:195 + /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:195: DeprecationWarning: Call to deprecated create function EnumValueDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool. + _descriptor.EnumValueDescriptor( + +../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:199 + /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:199: DeprecationWarning: Call to deprecated create function EnumValueDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool. + _descriptor.EnumValueDescriptor( + +../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:203 + /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:203: DeprecationWarning: Call to deprecated create function EnumValueDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool. + _descriptor.EnumValueDescriptor( + +../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:207 + /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:207: DeprecationWarning: Call to deprecated create function EnumValueDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool. + _descriptor.EnumValueDescriptor( + +../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:211 + /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:211: DeprecationWarning: Call to deprecated create function EnumValueDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool. + _descriptor.EnumValueDescriptor( + +../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:215 + /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:215: DeprecationWarning: Call to deprecated create function EnumValueDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool. + _descriptor.EnumValueDescriptor( + +../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:219 + /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:219: DeprecationWarning: Call to deprecated create function EnumValueDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool. + _descriptor.EnumValueDescriptor( + +../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:223 + /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:223: DeprecationWarning: Call to deprecated create function EnumValueDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool. + _descriptor.EnumValueDescriptor( + +../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:227 + /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:227: DeprecationWarning: Call to deprecated create function EnumValueDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool. + _descriptor.EnumValueDescriptor( + +../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:109 + /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:109: DeprecationWarning: Call to deprecated create function EnumDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool. + _VARTYPE_TYPE = _descriptor.EnumDescriptor( + +../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:247 + /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:247: DeprecationWarning: Call to deprecated create function FieldDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool. + _descriptor.FieldDescriptor( + +../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:240 + /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:240: DeprecationWarning: Call to deprecated create function Descriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool. + _VERSION = _descriptor.Descriptor( + +../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:278 + /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:278: DeprecationWarning: Call to deprecated create function FieldDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool. + _descriptor.FieldDescriptor( + +../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:285 + /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:285: DeprecationWarning: Call to deprecated create function FieldDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool. + _descriptor.FieldDescriptor( + +../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:292 + /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:292: DeprecationWarning: Call to deprecated create function FieldDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool. + _descriptor.FieldDescriptor( + +../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:299 + /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:299: DeprecationWarning: Call to deprecated create function FieldDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool. + _descriptor.FieldDescriptor( + +../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:271 + /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:271: DeprecationWarning: Call to deprecated create function Descriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool. + _PROCESSMESHDESC = _descriptor.Descriptor( + +../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:330 + /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:330: DeprecationWarning: Call to deprecated create function FieldDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool. + _descriptor.FieldDescriptor( + +../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:337 + /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:337: DeprecationWarning: Call to deprecated create function FieldDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool. + _descriptor.FieldDescriptor( + +../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:344 + /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:344: DeprecationWarning: Call to deprecated create function FieldDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool. + _descriptor.FieldDescriptor( + +../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:351 + /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:351: DeprecationWarning: Call to deprecated create function FieldDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool. + _descriptor.FieldDescriptor( + +../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:358 + /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:358: DeprecationWarning: Call to deprecated create function FieldDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool. + _descriptor.FieldDescriptor( + +../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:365 + /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:365: DeprecationWarning: Call to deprecated create function FieldDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool. + _descriptor.FieldDescriptor( + +../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:372 + /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:372: DeprecationWarning: Call to deprecated create function FieldDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool. + _descriptor.FieldDescriptor( + +../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:379 + /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:379: DeprecationWarning: Call to deprecated create function FieldDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool. + _descriptor.FieldDescriptor( + +../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:386 + /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:386: DeprecationWarning: Call to deprecated create function FieldDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool. + _descriptor.FieldDescriptor( + +../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:393 + /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:393: DeprecationWarning: Call to deprecated create function FieldDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool. + _descriptor.FieldDescriptor( + +../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:400 + /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:400: DeprecationWarning: Call to deprecated create function FieldDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool. + _descriptor.FieldDescriptor( + +../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:407 + /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:407: DeprecationWarning: Call to deprecated create function FieldDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool. + _descriptor.FieldDescriptor( + +../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:414 + /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:414: DeprecationWarning: Call to deprecated create function FieldDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool. + _descriptor.FieldDescriptor( + +../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:421 + /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:421: DeprecationWarning: Call to deprecated create function FieldDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool. + _descriptor.FieldDescriptor( + +../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:428 + /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:428: DeprecationWarning: Call to deprecated create function FieldDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool. + _descriptor.FieldDescriptor( + +../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:323 + /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:323: DeprecationWarning: Call to deprecated create function Descriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool. + _OPDESC_ATTR = _descriptor.Descriptor( + +../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:458 + /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:458: DeprecationWarning: Call to deprecated create function FieldDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool. + _descriptor.FieldDescriptor( + +../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:465 + /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:465: DeprecationWarning: Call to deprecated create function FieldDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool. + _descriptor.FieldDescriptor( + +../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:451 + /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:451: DeprecationWarning: Call to deprecated create function Descriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool. + _OPDESC_VAR = _descriptor.Descriptor( + +../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:495 + /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:495: DeprecationWarning: Call to deprecated create function FieldDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool. + _descriptor.FieldDescriptor( + +../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:502 + /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:502: DeprecationWarning: Call to deprecated create function FieldDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool. + _descriptor.FieldDescriptor( + +../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:509 + /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:509: DeprecationWarning: Call to deprecated create function FieldDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool. + _descriptor.FieldDescriptor( + +../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:516 + /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:516: DeprecationWarning: Call to deprecated create function FieldDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool. + _descriptor.FieldDescriptor( + +../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:523 + /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:523: DeprecationWarning: Call to deprecated create function FieldDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool. + _descriptor.FieldDescriptor( + +../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:488 + /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:488: DeprecationWarning: Call to deprecated create function Descriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool. + _OPDESC = _descriptor.Descriptor( + +../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:554 + /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:554: DeprecationWarning: Call to deprecated create function FieldDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool. + _descriptor.FieldDescriptor( + +../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:561 + /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:561: DeprecationWarning: Call to deprecated create function FieldDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool. + _descriptor.FieldDescriptor( + +../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:568 + /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:568: DeprecationWarning: Call to deprecated create function FieldDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool. + _descriptor.FieldDescriptor( + +../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:575 + /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:575: DeprecationWarning: Call to deprecated create function FieldDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool. + _descriptor.FieldDescriptor( + +../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:582 + /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:582: DeprecationWarning: Call to deprecated create function FieldDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool. + _descriptor.FieldDescriptor( + +../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:589 + /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:589: DeprecationWarning: Call to deprecated create function FieldDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool. + _descriptor.FieldDescriptor( + +../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:596 + /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:596: DeprecationWarning: Call to deprecated create function FieldDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool. + _descriptor.FieldDescriptor( + +../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:547 + /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:547: DeprecationWarning: Call to deprecated create function Descriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool. + _OPPROTO_VAR = _descriptor.Descriptor( + +../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:626 + /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:626: DeprecationWarning: Call to deprecated create function FieldDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool. + _descriptor.FieldDescriptor( + +../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:633 + /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:633: DeprecationWarning: Call to deprecated create function FieldDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool. + _descriptor.FieldDescriptor( + +../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:640 + /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:640: DeprecationWarning: Call to deprecated create function FieldDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool. + _descriptor.FieldDescriptor( + +../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:647 + /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:647: DeprecationWarning: Call to deprecated create function FieldDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool. + _descriptor.FieldDescriptor( + +../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:654 + /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:654: DeprecationWarning: Call to deprecated create function FieldDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool. + _descriptor.FieldDescriptor( + +../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:661 + /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:661: DeprecationWarning: Call to deprecated create function FieldDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool. + _descriptor.FieldDescriptor( + +../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:619 + /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:619: DeprecationWarning: Call to deprecated create function Descriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool. + _OPPROTO_ATTR = _descriptor.Descriptor( + +../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:691 + /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:691: DeprecationWarning: Call to deprecated create function FieldDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool. + _descriptor.FieldDescriptor( + +../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:698 + /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:698: DeprecationWarning: Call to deprecated create function FieldDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool. + _descriptor.FieldDescriptor( + +../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:705 + /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:705: DeprecationWarning: Call to deprecated create function FieldDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool. + _descriptor.FieldDescriptor( + +../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:712 + /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:712: DeprecationWarning: Call to deprecated create function FieldDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool. + _descriptor.FieldDescriptor( + +../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:719 + /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:719: DeprecationWarning: Call to deprecated create function FieldDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool. + _descriptor.FieldDescriptor( + +../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:684 + /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:684: DeprecationWarning: Call to deprecated create function Descriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool. + _OPPROTO = _descriptor.Descriptor( + +../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:750 + /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:750: DeprecationWarning: Call to deprecated create function FieldDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool. + _descriptor.FieldDescriptor( + +../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:757 + /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:757: DeprecationWarning: Call to deprecated create function FieldDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool. + _descriptor.FieldDescriptor( + +../../../../../../../.envs/oix/lib/python3.8/site-packages/comet_ml/monkey_patching.py:19 + /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/comet_ml/monkey_patching.py:19: DeprecationWarning: the imp module is deprecated in favour of importlib; see the module's documentation for alternative uses + import imp + +../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/framework.py:1104 +test_composition.py::test_metrics_and[second_operand3-expected_result3] +test_composition.py::test_metrics_or[second_operand1-expected_result1] + /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/framework.py:1104: DeprecationWarning: `np.bool` is a deprecated alias for the builtin `bool`. To silence this warning, use `bool` by itself. Doing this will not modify any behavior and is safe. If you specifically wanted the numpy scalar type, use `np.bool_` here. + Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations + elif dtype == np.bool: + +../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/tensor/creation.py:125: 1 warning +test_composition.py: 10 warnings + /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/tensor/creation.py:125: DeprecationWarning: `np.object` is a deprecated alias for the builtin `object`. To silence this warning, use `object` by itself. Doing this will not modify any behavior and is safe. + Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations + if data.dtype == np.object: + +test_composition.py::test_metrics_eq[2.0-expected_result2] +test_composition.py::test_metrics_ge[2.0-expected_result2] +test_composition.py::test_metrics_gt[2.0-expected_result2] +test_composition.py::test_metrics_le[2.0-expected_result2] +test_composition.py::test_metrics_lt[2.0-expected_result2] + /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/dygraph/math_op_patch.py:276: UserWarning: The dtype of left and right variables are not the same, left dtype is paddle.int32, but right dtype is paddle.float32, the right dtype will convert to paddle.int32 + warnings.warn( + +-- Docs: https://docs.pytest.org/en/stable/how-to/capture-warnings.html +=========================== short test summary info ============================ +FAILED test_composition.py::test_metrics_and[3-expected_result1] - ValueError... +FAILED test_composition.py::test_metrics_and[3-expected_result2] - ValueError... +FAILED test_composition.py::test_metrics_and[second_operand3-expected_result3] +FAILED test_composition.py::test_metrics_floordiv[2-expected_result1] - Value... +FAILED test_composition.py::test_metrics_floordiv[2.0-expected_result2] - Val... +FAILED test_composition.py::test_metrics_matmul[second_operand0-expected_result0] +FAILED test_composition.py::test_metrics_matmul[second_operand1-expected_result1] +FAILED test_composition.py::test_metrics_mod[second_operand0-expected_result0] +FAILED test_composition.py::test_metrics_mod[2-expected_result1] - AttributeE... +FAILED test_composition.py::test_metrics_mod[2.0-expected_result2] - Attribut... +FAILED test_composition.py::test_metrics_mod[second_operand3-expected_result3] +FAILED test_composition.py::test_metrics_mul[second_operand0-expected_result0] +FAILED test_composition.py::test_metrics_mul[2-expected_result1] - AttributeE... +FAILED test_composition.py::test_metrics_mul[2.0-expected_result2] - Attribut... +FAILED test_composition.py::test_metrics_mul[second_operand3-expected_result3] +FAILED test_composition.py::test_metrics_ne[second_operand0-expected_result0] +FAILED test_composition.py::test_metrics_ne[2-expected_result1] - AttributeEr... +FAILED test_composition.py::test_metrics_ne[2.0-expected_result2] - Attribute... +FAILED test_composition.py::test_metrics_ne[second_operand3-expected_result3] +FAILED test_composition.py::test_metrics_or[second_operand1-expected_result1] +FAILED test_composition.py::test_metrics_rfloordiv[5-expected_result0] - Valu... +FAILED test_composition.py::test_metrics_rfloordiv[5.0-expected_result1] - Va... +FAILED test_composition.py::test_metrics_rfloordiv[first_operand2-expected_result2] +FAILED test_composition.py::test_metrics_rmatmul[first_operand0-expected_result0] +FAILED test_composition.py::test_metrics_rmod[first_operand0-expected_result0] +FAILED test_composition.py::test_metrics_rpow[2-expected_result1] - ValueErro... +FAILED test_composition.py::test_metrics_rpow[2.0-expected_result2] - ValueEr... +FAILED test_composition.py::test_metrics_rsub[first_operand0-expected_result0] +FAILED test_composition.py::test_metrics_rsub[3-expected_result1] - Attribute... +FAILED test_composition.py::test_metrics_rsub[3.0-expected_result2] - Attribu... +FAILED test_composition.py::test_metrics_rsub[first_operand3-expected_result3] +FAILED test_composition.py::test_metrics_rtruediv[first_operand0-expected_result0] +FAILED test_composition.py::test_metrics_rtruediv[6-expected_result1] - Attri... +FAILED test_composition.py::test_metrics_rtruediv[6.0-expected_result2] - Att... +FAILED test_composition.py::test_metrics_rtruediv[first_operand3-expected_result3] +FAILED test_composition.py::test_metrics_sub[second_operand0-expected_result0] +FAILED test_composition.py::test_metrics_sub[2-expected_result1] - AttributeE... +FAILED test_composition.py::test_metrics_sub[2.0-expected_result2] - Attribut... +FAILED test_composition.py::test_metrics_sub[second_operand3-expected_result3] +FAILED test_composition.py::test_metrics_truediv[second_operand0-expected_result0] +FAILED test_composition.py::test_metrics_truediv[3-expected_result1] - Attrib... +FAILED test_composition.py::test_metrics_truediv[3.0-expected_result2] - Attr... +FAILED test_composition.py::test_metrics_truediv[second_operand3-expected_result3] +FAILED test_composition.py::test_metrics_xor[second_operand1-expected_result1] +FAILED test_composition.py::test_metrics_getitem[value0-1-expected_result0] +FAILED test_composition.py::test_metrics_getitem[value1-idx1-expected_result1] +================= 46 failed, 40 passed, 120 warnings in 2.37s ================== diff --git a/RE/paddlemetric/src/tests/bases/test_aggregation.py b/RE/paddlemetric/src/tests/bases/test_aggregation.py new file mode 100644 index 00000000..559ddad0 --- /dev/null +++ b/RE/paddlemetric/src/tests/bases/test_aggregation.py @@ -0,0 +1,166 @@ +import numpy as np +import pytest +import paddleext.torchapi as B + +from ..helpers.testers import BATCH_SIZE, NUM_BATCHES, MetricTester +from paddlemetrics.aggregation import CatMetric, MaxMetric, MeanMetric, MinMetric, SumMetric + + +def compare_mean(values, weights): + """reference implementation for mean aggregation.""" + return np.average(values.numpy(), weights=weights) + + +def compare_sum(values, weights): + """reference implementation for sum aggregation.""" + return np.sum(values.numpy()) + + +def compare_min(values, weights): + """reference implementation for min aggregation.""" + return np.min(values.numpy()) + + +def compare_max(values, weights): + """reference implementation for max aggregation.""" + return np.max(values.numpy()) + + +# wrap all other than mean metric to take an additional argument +# this lets them fit into the testing framework +class WrappedMinMetric(MinMetric): + """Wrapped min metric.""" + + def update(self, values, weights): + """only pass values on.""" + super().update(values) + + +class WrappedMaxMetric(MaxMetric): + """Wrapped max metric.""" + + def update(self, values, weights): + """only pass values on.""" + super().update(values) + + +class WrappedSumMetric(SumMetric): + """Wrapped min metric.""" + + def update(self, values, weights): + """only pass values on.""" + super().update(values) + + +class WrappedCatMetric(CatMetric): + """Wrapped cat metric.""" + + def update(self, values, weights): + """only pass values on.""" + super().update(values) + + +@pytest.mark.parametrize( + "values, weights", + [ + (B.rand(NUM_BATCHES, BATCH_SIZE), B.ones(NUM_BATCHES, BATCH_SIZE)), + (B.rand(NUM_BATCHES, BATCH_SIZE), B.rand(NUM_BATCHES, BATCH_SIZE) > 0.5), + (B.rand(NUM_BATCHES, BATCH_SIZE, 2), B.rand(NUM_BATCHES, BATCH_SIZE, 2) > 0.5), + ], +) +@pytest.mark.parametrize( + "metric_class, compare_fn", + [ + (WrappedMinMetric, compare_min), + (WrappedMaxMetric, compare_max), + (WrappedSumMetric, compare_sum), + (MeanMetric, compare_mean), + ], +) +class TestAggregation(MetricTester): + """Test aggregation metrics.""" + + @pytest.mark.parametrize("ddp", [False]) + @pytest.mark.parametrize("dist_sync_on_step", [False]) + def test_aggreagation(self, ddp, dist_sync_on_step, metric_class, compare_fn, values, weights): + """test modular implementation.""" + self.run_class_metric_test( + ddp=ddp, + dist_sync_on_step=dist_sync_on_step, + metric_class=metric_class, + sk_metric=compare_fn, + check_scriptable=True, + # Abuse of names here + preds=values, + target=weights, + ) + + +_case1 = float("nan") * B.ones(5) +_case2 = B.tensor([1.0, 2.0, float("nan"), 4.0, 5.0]) + + +@pytest.mark.parametrize("value", [_case1, _case2]) +@pytest.mark.parametrize("nan_strategy", ["error", "warn"]) +@pytest.mark.parametrize("metric_class", [MinMetric, MaxMetric, SumMetric, MeanMetric, CatMetric]) +def test_nan_error(value, nan_strategy, metric_class): + """test correct errors are raised.""" + metric = metric_class(nan_strategy=nan_strategy) + if nan_strategy == "error": + with pytest.raises(RuntimeError, match="Encounted `nan` values in tensor"): + metric(value.clone()) + elif nan_strategy == "warn": + with pytest.warns(UserWarning, match="Encounted `nan` values in tensor"): + metric(value.clone()) + + +@pytest.mark.parametrize( + "metric_class, nan_strategy, value, expected", + [ + (MinMetric, "ignore", _case1, B.tensor(float("inf"))), + (MinMetric, 2.0, _case1, 2.0), + (MinMetric, "ignore", _case2, 1.0), + (MinMetric, 2.0, _case2, 1.0), + (MaxMetric, "ignore", _case1, -B.tensor(float("inf"))), + (MaxMetric, 2.0, _case1, 2.0), + (MaxMetric, "ignore", _case2, 5.0), + (MaxMetric, 2.0, _case2, 5.0), + (SumMetric, "ignore", _case1, 0.0), + (SumMetric, 2.0, _case1, 10.0), + (SumMetric, "ignore", _case2, 12.0), + (SumMetric, 2.0, _case2, 14.0), + (MeanMetric, "ignore", _case1, 0.0), + (MeanMetric, 2.0, _case1, 2.0), + (MeanMetric, "ignore", _case2, 3.0), + (MeanMetric, 2.0, _case2, 2.8), + (CatMetric, "ignore", _case1, []), + (CatMetric, 2.0, _case1, B.tensor([2.0, 2.0, 2.0, 2.0, 2.0])), + (CatMetric, "ignore", _case2, B.tensor([1.0, 2.0, 4.0, 5.0])), + (CatMetric, 2.0, _case2, B.tensor([1.0, 2.0, 2.0, 4.0, 5.0])), + ], +) +def test_nan_expected(metric_class, nan_strategy, value, expected): + """test that nan values are handled correctly.""" + metric = metric_class(nan_strategy=nan_strategy) + metric.update(value.clone()) + out = metric.compute() + assert np.allclose(out, expected, equal_nan=True) + + +@pytest.mark.parametrize("metric_class", [MinMetric, MaxMetric, SumMetric, MeanMetric, CatMetric]) +def test_error_on_wrong_nan_strategy(metric_class): + """test error raised on wrong nan_strategy argument.""" + with pytest.raises(ValueError, match="Arg `nan_strategy` should either .*"): + metric_class(nan_strategy=[]) + + +# @pytest.mark.skipif(not hasattr(torch, "broadcast_to"), reason="PyTorch <1.8 does not have broadcast_to") +@pytest.mark.parametrize( + "weights, expected", [(1, 11.5), (B.ones(2, 1, 1), 11.5), (B.tensor([1, 2]).reshape(2, 1, 1), 13.5)] +) +def test_mean_metric_broadcasting(weights, expected): + """check that weight broadcasting works for mean metric.""" + values = B.arange(24).reshape(2, 3, 4) + avg = MeanMetric() + + assert avg(values, weights) == expected diff --git a/RE/paddlemetric/src/tests/bases/test_collections.py b/RE/paddlemetric/src/tests/bases/test_collections.py new file mode 100644 index 00000000..d92234f8 --- /dev/null +++ b/RE/paddlemetric/src/tests/bases/test_collections.py @@ -0,0 +1,251 @@ +# Copyright The PyTorch Lightning team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import pickle + +import pytest +import paddleext.torchapi as B + +from ..helpers import seed_all +from ..helpers.testers import DummyMetricDiff, DummyMetricSum +from paddlemetrics.collections import MetricCollection + +seed_all(42) + + +def test_metric_collection(tmpdir): + m1 = DummyMetricSum() + m2 = DummyMetricDiff() + + metric_collection = MetricCollection([m1, m2]) + + # Test correct dict structure + assert len(metric_collection) == 2 + assert metric_collection["DummyMetricSum"] == m1 + assert metric_collection["DummyMetricDiff"] == m2 + + # Test correct initialization + for name, metric in metric_collection.items(): + assert metric.x == 0, f"Metric {name} not initialized correctly" + + # Test every metric gets updated + metric_collection.update(5) + for name, metric in metric_collection.items(): + assert metric.x.abs() == 5, f"Metric {name} not updated correctly" + + # Test compute on each metric + metric_collection.update(-5) + metric_vals = metric_collection.compute() + assert len(metric_vals) == 2 + for name, metric_val in metric_vals.items(): + assert metric_val == 0, f"Metric {name}.compute not called correctly" + + # Test that everything is reset + for name, metric in metric_collection.items(): + assert metric.x == 0, f"Metric {name} not reset correctly" + + # Test pickable + metric_pickled = pickle.dumps(metric_collection) + metric_loaded = pickle.loads(metric_pickled) + assert isinstance(metric_loaded, MetricCollection) + + +@pytest.mark.skipif(not B.cuda.is_available(), reason="Test requires GPU.") +def test_device_and_dtype_transfer_metriccollection(tmpdir): + m1 = DummyMetricSum() + m2 = DummyMetricDiff() + + metric_collection = MetricCollection([m1, m2]) + for _, metric in metric_collection.items(): + assert metric.x.is_cuda is False + assert metric.x.dtype == B.float32 + + metric_collection = metric_collection.to(device="cuda") + for _, metric in metric_collection.items(): + assert metric.x.is_cuda + + metric_collection = metric_collection.double() + for _, metric in metric_collection.items(): + assert metric.x.dtype == B.float64 + + metric_collection = metric_collection.half() + for _, metric in metric_collection.items(): + assert metric.x.dtype == B.float16 + + +def test_metric_collection_wrong_input(tmpdir): + """Check that errors are raised on wrong input.""" + dms = DummyMetricSum() + + # Not all input are metrics (list) + with pytest.raises(ValueError): + _ = MetricCollection([dms, 5]) + + # Not all input are metrics (dict) + with pytest.raises(ValueError): + _ = MetricCollection({"metric1": dms, "metric2": 5}) + + # Same metric passed in multiple times + with pytest.raises(ValueError, match="Encountered two metrics both named *."): + _ = MetricCollection([dms, dms]) + + # Not a list or dict passed in + with pytest.warns(Warning, match=" which are not `Metric` so they will be ignored."): + _ = MetricCollection(dms, [dms]) + + +def test_metric_collection_args_kwargs(tmpdir): + """Check that args and kwargs gets passed correctly in metric collection, Checks both update and forward + method.""" + m1 = DummyMetricSum() + m2 = DummyMetricDiff() + + metric_collection = MetricCollection([m1, m2]) + + # args gets passed to all metrics + metric_collection.update(5) + assert metric_collection["DummyMetricSum"].x == 5 + assert metric_collection["DummyMetricDiff"].x == -5 + metric_collection.reset() + _ = metric_collection(5) + assert metric_collection["DummyMetricSum"].x == 5 + assert metric_collection["DummyMetricDiff"].x == -5 + metric_collection.reset() + + # kwargs gets only passed to metrics that it matches + metric_collection.update(x=10, y=20) + assert metric_collection["DummyMetricSum"].x == 10 + assert metric_collection["DummyMetricDiff"].x == -20 + metric_collection.reset() + _ = metric_collection(x=10, y=20) + assert metric_collection["DummyMetricSum"].x == 10 + assert metric_collection["DummyMetricDiff"].x == -20 + + +@pytest.mark.parametrize( + "prefix, postfix", + [ + [None, None], + ["prefix_", None], + [None, "_postfix"], + ["prefix_", "_postfix"], + ], +) +def test_metric_collection_prefix_postfix_args(prefix, postfix): + """Test that the prefix arg alters the keywords in the output.""" + m1 = DummyMetricSum() + m2 = DummyMetricDiff() + names = ["DummyMetricSum", "DummyMetricDiff"] + names = [prefix + n if prefix is not None else n for n in names] + names = [n + postfix if postfix is not None else n for n in names] + + metric_collection = MetricCollection([m1, m2], prefix=prefix, postfix=postfix) + + # test forward + out = metric_collection(5) + for name in names: + assert name in out, "prefix or postfix argument not working as intended with forward method" + + # test compute + out = metric_collection.compute() + for name in names: + assert name in out, "prefix or postfix argument not working as intended with compute method" + + # test clone + new_metric_collection = metric_collection.clone(prefix="new_prefix_") + out = new_metric_collection(5) + names = [n[len(prefix) :] if prefix is not None else n for n in names] # strip away old prefix + for name in names: + assert f"new_prefix_{name}" in out, "prefix argument not working as intended with clone method" + + for k, _ in new_metric_collection.items(): + assert "new_prefix_" in k + + for k in new_metric_collection.keys(): + assert "new_prefix_" in k + + for k, _ in new_metric_collection.items(keep_base=True): + assert "new_prefix_" not in k + + for k in new_metric_collection.keys(keep_base=True): + assert "new_prefix_" not in k + + assert isinstance(new_metric_collection.keys(keep_base=True), type(new_metric_collection.keys(keep_base=False))) + assert isinstance(new_metric_collection.items(keep_base=True), type(new_metric_collection.items(keep_base=False))) + + new_metric_collection = new_metric_collection.clone(postfix="_new_postfix") + out = new_metric_collection(5) + names = [n[: -len(postfix)] if postfix is not None else n for n in names] # strip away old postfix + for name in names: + assert f"new_prefix_{name}_new_postfix" in out, "postfix argument not working as intended with clone method" + + +def test_metric_collection_repr(): + """Test MetricCollection.""" + + class A(DummyMetricSum): + pass + + class B(DummyMetricDiff): + pass + + m1 = A() + m2 = B() + metric_collection = MetricCollection([m1, m2], prefix=None, postfix=None) + + expected = "MetricCollection(\n (A): A()\n (B): B()\n)" + assert metric_collection.__repr__() == expected + + metric_collection = MetricCollection([m1, m2], prefix="a", postfix=None) + + expected = "MetricCollection(\n (A): A()\n (B): B(),\n prefix=a\n)" + assert metric_collection.__repr__() == expected + + metric_collection = MetricCollection([m1, m2], prefix=None, postfix="a") + expected = "MetricCollection(\n (A): A()\n (B): B(),\n postfix=a\n)" + assert metric_collection.__repr__() == expected + + metric_collection = MetricCollection([m1, m2], prefix="a", postfix="b") + expected = "MetricCollection(\n (A): A()\n (B): B(),\n prefix=a,\n postfix=b\n)" + assert metric_collection.__repr__() == expected + + +def test_metric_collection_same_order(): + m1 = DummyMetricSum() + m2 = DummyMetricDiff() + col1 = MetricCollection({"a": m1, "b": m2}) + col2 = MetricCollection({"b": m2, "a": m1}) + for k1, k2 in zip(col1.keys(), col2.keys()): + assert k1 == k2 + + +def test_collection_add_metrics(): + m1 = DummyMetricSum() + m2 = DummyMetricDiff() + + collection = MetricCollection([m1]) + collection.add_metrics({"m1_": DummyMetricSum()}) + collection.add_metrics(m2) + + collection.update(5) + results = collection.compute() + assert results["DummyMetricSum"] == results["m1_"] and results["m1_"] == 5 + assert results["DummyMetricDiff"] == -5 + + +def test_collection_check_arg(): + assert MetricCollection._check_arg(None, "prefix") is None + assert MetricCollection._check_arg("sample", "prefix") == "sample" + + with pytest.raises(ValueError, match="Expected input `postfix` to be a string, but got"): + MetricCollection._check_arg(1, "postfix") diff --git a/RE/paddlemetric/src/tests/bases/test_composition.py b/RE/paddlemetric/src/tests/bases/test_composition.py new file mode 100644 index 00000000..0c9e6a08 --- /dev/null +++ b/RE/paddlemetric/src/tests/bases/test_composition.py @@ -0,0 +1,559 @@ +# Copyright The PyTorch Lightning team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from operator import neg, pos + +import pytest +import paddleext.torchapi as B +from paddleext.torchapi import tensor + +from ..helpers import _MARK_TORCH_MIN_1_4, _MARK_TORCH_MIN_1_5, _MARK_TORCH_MIN_1_6 +from paddlemetrics.metric import CompositionalMetric, Metric + + +class DummyMetric(Metric): + def __init__(self, val_to_return): + super().__init__() + self._num_updates = 0 + self._val_to_return = val_to_return + self._update_called = True + + def update(self, *args, **kwargs) -> None: + self._num_updates += 1 + + def compute(self): + return tensor(self._val_to_return) + + def reset(self): + self._num_updates = 0 + return super().reset() + + +@pytest.mark.parametrize( + ["second_operand", "expected_result"], + [ + (DummyMetric(2), tensor(4)), + (2, tensor(4)), + (2.0, tensor(4.0)), + pytest.param(tensor(2), tensor(4), marks=pytest.mark.skipif(**_MARK_TORCH_MIN_1_4)), + ], +) +def test_metrics_add(second_operand, expected_result): + first_metric = DummyMetric(2) + + final_add = first_metric + second_operand + final_radd = second_operand + first_metric + + assert isinstance(final_add, CompositionalMetric) + assert isinstance(final_radd, CompositionalMetric) + + final_add.update() + final_radd.update() + + assert B.allclose(expected_result, final_add.compute()) + assert B.allclose(expected_result, final_radd.compute()) + + +@pytest.mark.parametrize( + ["second_operand", "expected_result"], + [(DummyMetric(3), tensor(2)), (3, tensor(2)), (3, tensor(2)), (tensor(3), tensor(2))], +) +@pytest.mark.skipif(**_MARK_TORCH_MIN_1_5) +def test_metrics_and(second_operand, expected_result): + first_metric = DummyMetric(2) + + final_and = first_metric & second_operand + final_rand = second_operand & first_metric + + assert isinstance(final_and, CompositionalMetric) + assert isinstance(final_rand, CompositionalMetric) + + final_and.update() + final_rand.update() + assert B.allclose(expected_result, final_and.compute()) + assert B.allclose(expected_result, final_rand.compute()) + + +@pytest.mark.parametrize( + ["second_operand", "expected_result"], + [ + (DummyMetric(2), tensor(True)), + (2, tensor(True)), + (2.0, tensor(True)), + (tensor(2), tensor(True)), + ], +) +def test_metrics_eq(second_operand, expected_result): + first_metric = DummyMetric(2) + + final_eq = first_metric == second_operand + + assert isinstance(final_eq, CompositionalMetric) + + final_eq.update() + # can't use allclose for bool tensors + assert (expected_result == final_eq.compute()).all() + + +@pytest.mark.parametrize( + ["second_operand", "expected_result"], + [ + (DummyMetric(2), tensor(2)), + (2, tensor(2)), + (2.0, tensor(2.0)), + (tensor(2), tensor(2)), + ], +) +@pytest.mark.skipif(**_MARK_TORCH_MIN_1_5) +def test_metrics_floordiv(second_operand, expected_result): + first_metric = DummyMetric(5) + + final_floordiv = first_metric // second_operand + + assert isinstance(final_floordiv, CompositionalMetric) + + final_floordiv.update() + assert B.allclose(expected_result, final_floordiv.compute()) + + +@pytest.mark.parametrize( + ["second_operand", "expected_result"], + [ + (DummyMetric(2), tensor(True)), + (2, tensor(True)), + (2.0, tensor(True)), + (tensor(2), tensor(True)), + ], +) +def test_metrics_ge(second_operand, expected_result): + first_metric = DummyMetric(5) + + final_ge = first_metric >= second_operand + + assert isinstance(final_ge, CompositionalMetric) + + final_ge.update() + # can't use allclose for bool tensors + assert (expected_result == final_ge.compute()).all() + + +@pytest.mark.parametrize( + ["second_operand", "expected_result"], + [ + (DummyMetric(2), tensor(True)), + (2, tensor(True)), + (2.0, tensor(True)), + (tensor(2), tensor(True)), + ], +) +def test_metrics_gt(second_operand, expected_result): + first_metric = DummyMetric(5) + + final_gt = first_metric > second_operand + + assert isinstance(final_gt, CompositionalMetric) + + final_gt.update() + # can't use allclose for bool tensors + assert (expected_result == final_gt.compute()).all() + + +@pytest.mark.parametrize( + ["second_operand", "expected_result"], + [ + (DummyMetric(2), tensor(False)), + (2, tensor(False)), + (2.0, tensor(False)), + (tensor(2), tensor(False)), + ], +) +def test_metrics_le(second_operand, expected_result): + first_metric = DummyMetric(5) + + final_le = first_metric <= second_operand + + assert isinstance(final_le, CompositionalMetric) + + final_le.update() + # can't use allclose for bool tensors + assert (expected_result == final_le.compute()).all() + + +@pytest.mark.parametrize( + ["second_operand", "expected_result"], + [ + (DummyMetric(2), tensor(False)), + (2, tensor(False)), + (2.0, tensor(False)), + (tensor(2), tensor(False)), + ], +) +def test_metrics_lt(second_operand, expected_result): + first_metric = DummyMetric(5) + + final_lt = first_metric < second_operand + + assert isinstance(final_lt, CompositionalMetric) + + final_lt.update() + # can't use allclose for bool tensors + assert (expected_result == final_lt.compute()).all() + + +@pytest.mark.parametrize( + ["second_operand", "expected_result"], + [(DummyMetric([2, 2, 2]), tensor(12)), (tensor([2, 2, 2]), tensor(12))], +) +def test_metrics_matmul(second_operand, expected_result): + first_metric = DummyMetric([2, 2, 2]) + + final_matmul = first_metric @ second_operand + + assert isinstance(final_matmul, CompositionalMetric) + + final_matmul.update() + assert B.allclose(expected_result, final_matmul.compute()) + + +@pytest.mark.parametrize( + ["second_operand", "expected_result"], + [ + (DummyMetric(2), tensor(1)), + (2, tensor(1)), + (2.0, tensor(1)), + (tensor(2), tensor(1)), + ], +) +def test_metrics_mod(second_operand, expected_result): + first_metric = DummyMetric(5) + + final_mod = first_metric % second_operand + + assert isinstance(final_mod, CompositionalMetric) + + final_mod.update() + # prevent Runtime error for PT 1.8 - Long did not match Float + assert B.allclose(expected_result.to(float), final_mod.compute().to(float)) + + +@pytest.mark.parametrize( + ["second_operand", "expected_result"], + [ + (DummyMetric(2), tensor(4)), + (2, tensor(4)), + (2.0, tensor(4.0)), + pytest.param(tensor(2), tensor(4), marks=pytest.mark.skipif(**_MARK_TORCH_MIN_1_4)), + ], +) +def test_metrics_mul(second_operand, expected_result): + first_metric = DummyMetric(2) + + final_mul = first_metric * second_operand + final_rmul = second_operand * first_metric + + assert isinstance(final_mul, CompositionalMetric) + assert isinstance(final_rmul, CompositionalMetric) + + final_mul.update() + final_rmul.update() + assert B.allclose(expected_result, final_mul.compute()) + assert B.allclose(expected_result, final_rmul.compute()) + + +@pytest.mark.parametrize( + ["second_operand", "expected_result"], + [ + (DummyMetric(2), tensor(False)), + (2, tensor(False)), + (2.0, tensor(False)), + (tensor(2), tensor(False)), + ], +) +def test_metrics_ne(second_operand, expected_result): + first_metric = DummyMetric(2) + + final_ne = first_metric != second_operand + + assert isinstance(final_ne, CompositionalMetric) + + final_ne.update() + # can't use allclose for bool tensors + assert (expected_result == final_ne.compute()).all() + + +@pytest.mark.parametrize( + ["second_operand", "expected_result"], + [(DummyMetric([1, 0, 3]), tensor([-1, -2, 3])), (tensor([1, 0, 3]), tensor([-1, -2, 3]))], +) +@pytest.mark.skipif(**_MARK_TORCH_MIN_1_5) +def test_metrics_or(second_operand, expected_result): + first_metric = DummyMetric([-1, -2, 3]) + + final_or = first_metric | second_operand + final_ror = second_operand | first_metric + + assert isinstance(final_or, CompositionalMetric) + assert isinstance(final_ror, CompositionalMetric) + + final_or.update() + final_ror.update() + assert B.allclose(expected_result, final_or.compute()) + assert B.allclose(expected_result, final_ror.compute()) + + +@pytest.mark.parametrize( + ["second_operand", "expected_result"], + [ + pytest.param(DummyMetric(2), tensor(4)), + pytest.param(2, tensor(4)), + pytest.param(2.0, tensor(4.0), marks=pytest.mark.skipif(**_MARK_TORCH_MIN_1_6)), + pytest.param(tensor(2), tensor(4)), + ], +) +def test_metrics_pow(second_operand, expected_result): + first_metric = DummyMetric(2) + + final_pow = first_metric ** second_operand + + assert isinstance(final_pow, CompositionalMetric) + + final_pow.update() + assert B.allclose(expected_result, final_pow.compute()) + + +@pytest.mark.parametrize( + ["first_operand", "expected_result"], + [(5, tensor(2)), (5.0, tensor(2.0)), (tensor(5), tensor(2))], +) +@pytest.mark.skipif(**_MARK_TORCH_MIN_1_5) +def test_metrics_rfloordiv(first_operand, expected_result): + second_operand = DummyMetric(2) + + final_rfloordiv = first_operand // second_operand + + assert isinstance(final_rfloordiv, CompositionalMetric) + + final_rfloordiv.update() + assert B.allclose(expected_result, final_rfloordiv.compute()) + + +@pytest.mark.parametrize( + ["first_operand", "expected_result"], + [pytest.param(tensor([2, 2, 2]), tensor(12), marks=pytest.mark.skipif(**_MARK_TORCH_MIN_1_4))], +) +def test_metrics_rmatmul(first_operand, expected_result): + second_operand = DummyMetric([2, 2, 2]) + + final_rmatmul = first_operand @ second_operand + + assert isinstance(final_rmatmul, CompositionalMetric) + + final_rmatmul.update() + assert B.allclose(expected_result, final_rmatmul.compute()) + + +@pytest.mark.parametrize( + ["first_operand", "expected_result"], + [pytest.param(tensor(2), tensor(2), marks=pytest.mark.skipif(**_MARK_TORCH_MIN_1_4))], +) +def test_metrics_rmod(first_operand, expected_result): + second_operand = DummyMetric(5) + + final_rmod = first_operand % second_operand + + assert isinstance(final_rmod, CompositionalMetric) + + final_rmod.update() + assert B.allclose(expected_result, final_rmod.compute()) + + +@pytest.mark.parametrize( + "first_operand,expected_result", + [ + pytest.param(DummyMetric(2), tensor(4)), + pytest.param(2, tensor(4)), + pytest.param(2.0, tensor(4.0), marks=pytest.mark.skipif(**_MARK_TORCH_MIN_1_6)), + ], +) +def test_metrics_rpow(first_operand, expected_result): + second_operand = DummyMetric(2) + + final_rpow = first_operand ** second_operand + + assert isinstance(final_rpow, CompositionalMetric) + final_rpow.update() + assert B.allclose(expected_result, final_rpow.compute()) + + +@pytest.mark.parametrize( + ["first_operand", "expected_result"], + [ + (DummyMetric(3), tensor(1)), + (3, tensor(1)), + (3.0, tensor(1.0)), + pytest.param(tensor(3), tensor(1), marks=pytest.mark.skipif(**_MARK_TORCH_MIN_1_4)), + ], +) +def test_metrics_rsub(first_operand, expected_result): + second_operand = DummyMetric(2) + + final_rsub = first_operand - second_operand + + assert isinstance(final_rsub, CompositionalMetric) + final_rsub.update() + assert B.allclose(expected_result, final_rsub.compute()) + + +@pytest.mark.parametrize( + ["first_operand", "expected_result"], + [ + (DummyMetric(6), tensor(2.0)), + (6, tensor(2.0)), + (6.0, tensor(2.0)), + (tensor(6), tensor(2.0)), + ], +) +@pytest.mark.skipif(**_MARK_TORCH_MIN_1_5) +def test_metrics_rtruediv(first_operand, expected_result): + second_operand = DummyMetric(3) + + final_rtruediv = first_operand / second_operand + + assert isinstance(final_rtruediv, CompositionalMetric) + final_rtruediv.update() + assert B.allclose(expected_result, final_rtruediv.compute()) + + +@pytest.mark.parametrize( + ["second_operand", "expected_result"], + [ + (DummyMetric(2), tensor(1)), + (2, tensor(1)), + (2.0, tensor(1.0)), + (tensor(2), tensor(1)), + ], +) +def test_metrics_sub(second_operand, expected_result): + first_metric = DummyMetric(3) + + final_sub = first_metric - second_operand + + assert isinstance(final_sub, CompositionalMetric) + final_sub.update() + assert B.allclose(expected_result, final_sub.compute()) + + +@pytest.mark.parametrize( + ["second_operand", "expected_result"], + [ + (DummyMetric(3), tensor(2.0)), + (3, tensor(2.0)), + (3.0, tensor(2.0)), + (tensor(3), tensor(2.0)), + ], +) +@pytest.mark.skipif(**_MARK_TORCH_MIN_1_5) +def test_metrics_truediv(second_operand, expected_result): + first_metric = DummyMetric(6) + + final_truediv = first_metric / second_operand + + assert isinstance(final_truediv, CompositionalMetric) + final_truediv.update() + assert B.allclose(expected_result, final_truediv.compute()) + + +@pytest.mark.parametrize( + ["second_operand", "expected_result"], + [(DummyMetric([1, 0, 3]), tensor([-2, -2, 0])), (tensor([1, 0, 3]), tensor([-2, -2, 0]))], +) +@pytest.mark.skipif(**_MARK_TORCH_MIN_1_5) +def test_metrics_xor(second_operand, expected_result): + first_metric = DummyMetric([-1, -2, 3]) + + final_xor = first_metric ^ second_operand + final_rxor = second_operand ^ first_metric + + assert isinstance(final_xor, CompositionalMetric) + assert isinstance(final_rxor, CompositionalMetric) + + final_xor.update() + final_rxor.update() + assert B.allclose(expected_result, final_xor.compute()) + assert B.allclose(expected_result, final_rxor.compute()) + + +def test_metrics_abs(): + first_metric = DummyMetric(-1) + + final_abs = abs(first_metric) + + assert isinstance(final_abs, CompositionalMetric) + final_abs.update() + assert B.allclose(tensor(1), final_abs.compute()) + + +def test_metrics_invert(): + first_metric = DummyMetric(1) + + final_inverse = ~first_metric + assert isinstance(final_inverse, CompositionalMetric) + final_inverse.update() + assert B.allclose(tensor(-2), final_inverse.compute()) + + +def test_metrics_neg(): + first_metric = DummyMetric(1) + + final_neg = neg(first_metric) + assert isinstance(final_neg, CompositionalMetric) + final_neg.update() + assert B.allclose(tensor(-1), final_neg.compute()) + + +def test_metrics_pos(): + first_metric = DummyMetric(-1) + + final_pos = pos(first_metric) + assert isinstance(final_pos, CompositionalMetric) + final_pos.update() + assert B.allclose(tensor(1), final_pos.compute()) + + +@pytest.mark.parametrize( + ["value", "idx", "expected_result"], + [([1, 2, 3], 1, tensor(2)), ([[0, 1], [2, 3]], (1, 0), tensor(2)), ([[0, 1], [2, 3]], 1, tensor([2, 3]))], +) +def test_metrics_getitem(value, idx, expected_result): + first_metric = DummyMetric(value) + + final_getitem = first_metric[idx] + assert isinstance(final_getitem, CompositionalMetric) + final_getitem.update() + assert B.allclose(expected_result, final_getitem.compute()) + + +def test_compositional_metrics_update(): + + compos = DummyMetric(5) + DummyMetric(4) + + assert isinstance(compos, CompositionalMetric) + compos.update() + compos.update() + compos.update() + + assert isinstance(compos.metric_a, DummyMetric) + assert isinstance(compos.metric_b, DummyMetric) + + assert compos.metric_a._num_updates == 3 + assert compos.metric_b._num_updates == 3 diff --git a/RE/paddlemetric/src/tests/bases/test_ddp.py b/RE/paddlemetric/src/tests/bases/test_ddp.py new file mode 100644 index 00000000..7f713e1d --- /dev/null +++ b/RE/paddlemetric/src/tests/bases/test_ddp.py @@ -0,0 +1,241 @@ +# Copyright The PyTorch Lightning team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import os +import sys +from copy import deepcopy + +import pytest +import paddleext.torchapi as B +from paddleext.torchapi import tensor + +from tests.helpers import seed_all +from tests.helpers.testers import DummyMetric, DummyMetricSum, setup_ddp +from paddlemetrics import Metric +from paddlemetrics.utilities.distributed import gather_all_tensors +from paddlemetrics.utilities.exceptions import paddlemetricsUserError + +seed_all(42) + + +def _test_ddp_sum(rank, worldsize): + setup_ddp(rank, worldsize) + dummy = DummyMetric() + dummy._reductions = {"foo": B.sum} + dummy.foo = tensor(1) + dummy._sync_dist() + + assert dummy.foo == worldsize + + +def _test_ddp_cat(rank, worldsize): + setup_ddp(rank, worldsize) + dummy = DummyMetric() + dummy._reductions = {"foo": B.cat} + dummy.foo = [tensor([1])] + dummy._sync_dist() + + assert B.all(B.eq(dummy.foo, tensor([1, 1]))) + + +def _test_ddp_sum_cat(rank, worldsize): + setup_ddp(rank, worldsize) + dummy = DummyMetric() + dummy._reductions = {"foo": B.cat, "bar": B.sum} + dummy.foo = [tensor([1])] + dummy.bar = tensor(1) + dummy._sync_dist() + + assert B.all(B.eq(dummy.foo, tensor([1, 1]))) + assert dummy.bar == worldsize + + +def _test_ddp_gather_uneven_tensors(rank, worldsize): + setup_ddp(rank, worldsize) + tensor = B.ones(rank) + result = gather_all_tensors(tensor) + assert len(result) == worldsize + for idx in range(worldsize): + assert len(result[idx]) == idx + assert (result[idx] == B.ones_like(result[idx])).all() + + +def _test_ddp_gather_uneven_tensors_multidim(rank, worldsize): + setup_ddp(rank, worldsize) + tensor = B.ones(rank + 1, 2 - rank) + result = gather_all_tensors(tensor) + assert len(result) == worldsize + for idx in range(worldsize): + val = result[idx] + assert val.shape == (idx + 1, 2 - idx) + assert (val == B.ones_like(val)).all() + + +def _test_ddp_compositional_tensor(rank, worldsize): + setup_ddp(rank, worldsize) + dummy = DummyMetricSum() + dummy._reductions = {"x": B.sum} + dummy = dummy.clone() + dummy.clone() + dummy.update(tensor(1)) + val = dummy.compute() + assert val == 2 * worldsize + + +@pytest.mark.skipif(sys.platform == "win32", reason="DDP not available on windows") +@pytest.mark.parametrize( + "process", + [ + _test_ddp_cat, + _test_ddp_sum, + _test_ddp_sum_cat, + _test_ddp_gather_uneven_tensors, + _test_ddp_gather_uneven_tensors_multidim, + _test_ddp_compositional_tensor, + ], +) +def test_ddp(process): + B.multiprocessing.spawn(process, args=(2,), nprocs=2) + + +def _test_non_contiguous_tensors(rank, worldsize): + setup_ddp(rank, worldsize) + + class DummyCatMetric(Metric): + def __init__(self): + super().__init__() + self.add_state("x", default=[], dist_reduce_fx=None) + + def update(self, x): + self.x.append(x) + + def compute(self): + x = B.cat(self.x, dim=0) + return x.sum() + + metric = DummyCatMetric() + metric.update(B.randn(10, 5)[:, 0]) + + +@pytest.mark.skipif(sys.platform == "win32", reason="DDP not available on windows") +def test_non_contiguous_tensors(): + """Test that gather_all operation works for non contiguous tensors.""" + B.multiprocessing.spawn(_test_non_contiguous_tensors, args=(2,), nprocs=2) + + +def _test_state_dict_is_synced(rank, worldsize, tmpdir): + setup_ddp(rank, worldsize) + + class DummyCatMetric(Metric): + def __init__(self): + super().__init__() + self.add_state("x", B.tensor(0), dist_reduce_fx=B.sum) + self.add_state("c", B.tensor(0), dist_reduce_fx=B.sum) + + def update(self, x): + self.x += x + self.c += 1 + + def compute(self): + return self.x // self.c + + def __repr__(self): + return f"DummyCatMetric(x={self.x}, c={self.c})" + + metric = DummyCatMetric() + metric.persistent(True) + + def verify_metric(metric, i, world_size): + state_dict = metric.state_dict() + exp_sum = i * (i + 1) / 2 + assert state_dict["x"] == exp_sum * world_size + assert metric.x == exp_sum * world_size + assert metric.c == (i + 1) * world_size + assert state_dict["c"] == metric.c + + steps = 5 + for i in range(steps): + + if metric._is_synced: + + with pytest.raises(paddlemetricsUserError, match="The Metric shouldn't be synced when performing"): + metric(i) + + metric.unsync() + + metric(i) + + verify_metric(metric, i, 1) + + metric.sync() + assert metric._is_synced + + with pytest.raises(paddlemetricsUserError, match="The Metric has already been synced."): + metric.sync() + + verify_metric(metric, i, 2) + + metric.unsync() + assert not metric._is_synced + + with pytest.raises(paddlemetricsUserError, match="The Metric has already been un-synced."): + metric.unsync() + + with metric.sync_context(): + assert metric._is_synced + verify_metric(metric, i, 2) + + with metric.sync_context(should_unsync=False): + assert metric._is_synced + verify_metric(metric, i, 2) + + assert metric._is_synced + + metric.unsync() + assert not metric._is_synced + + metric.sync() + cache = metric._cache + metric._cache = None + + with pytest.raises(paddlemetricsUserError, match="The internal cache should exist to unsync the Metric."): + metric.unsync() + + metric._cache = cache + + def reload_state_dict(state_dict, expected_x, expected_c): + metric = DummyCatMetric() + metric.load_state_dict(state_dict) + assert metric.x == expected_x + assert metric.c == expected_c + + reload_state_dict(deepcopy(metric.state_dict()), 20, 10) + + metric.unsync() + reload_state_dict(deepcopy(metric.state_dict()), 10, 5) + + metric.sync() + + filepath = os.path.join(tmpdir, f"weights-{rank}.pt") + + B.save(metric.state_dict(), filepath) + + metric.unsync() + with metric.sync_context(): + B.save(metric.state_dict(), filepath) + + +@pytest.mark.skipif(sys.platform == "win32", reason="DDP not available on windows") +def test_state_dict_is_synced(tmpdir): + """This test asserts that metrics are synced while creating the state dict but restored after to continue + accumulation.""" + B.multiprocessing.spawn(_test_state_dict_is_synced, args=(2, tmpdir), nprocs=2) diff --git a/RE/paddlemetric/src/tests/bases/test_hashing.py b/RE/paddlemetric/src/tests/bases/test_hashing.py new file mode 100644 index 00000000..af77dbd1 --- /dev/null +++ b/RE/paddlemetric/src/tests/bases/test_hashing.py @@ -0,0 +1,22 @@ +import pytest + +from tests.helpers.testers import DummyListMetric, DummyMetric + + +@pytest.mark.parametrize( + "metric_cls", + [ + DummyMetric, + DummyListMetric, + ], +) +def test_metric_hashing(metric_cls): + """Tests that hases are different. + + See the Metric's hash function for details on why this is required. + """ + instance_1 = metric_cls() + instance_2 = metric_cls() + + assert hash(instance_1) != hash(instance_2) + assert id(instance_1) != id(instance_2) diff --git a/RE/paddlemetric/src/tests/bases/test_metric.py b/RE/paddlemetric/src/tests/bases/test_metric.py new file mode 100644 index 00000000..a57eeb80 --- /dev/null +++ b/RE/paddlemetric/src/tests/bases/test_metric.py @@ -0,0 +1,356 @@ +# Copyright The PyTorch Lightning team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import pickle +from collections import OrderedDict + +import cloudpickle +import numpy as np +import pytest +import pangu.core.backend as torch +from pangu.core.backend import Tensor, nn, tensor + +from tests.helpers import _LIGHTNING_GREATER_EQUAL_1_3, seed_all +from tests.helpers.testers import DummyListMetric, DummyMetric, DummyMetricMultiOutput, DummyMetricSum +from paddlemetrics.utilities.imports import _LIGHTNING_AVAILABLE, _TORCH_LOWER_1_6 + +seed_all(42) + + +def test_inherit(): + DummyMetric() + + +def test_add_state(): + a = DummyMetric() + + a.add_state("a", tensor(0), "sum") + assert a._reductions["a"](tensor([1, 1])) == 2 + + a.add_state("b", tensor(0), "mean") + assert np.allclose(a._reductions["b"](tensor([1.0, 2.0])).numpy(), 1.5) + + a.add_state("c", tensor(0), "cat") + assert a._reductions["c"]([tensor([1]), tensor([1])]).shape == (2,) + + with pytest.raises(ValueError): + a.add_state("d1", tensor(0), "xyz") + + with pytest.raises(ValueError): + a.add_state("d2", tensor(0), 42) + + with pytest.raises(ValueError): + a.add_state("d3", [tensor(0)], "sum") + + with pytest.raises(ValueError): + a.add_state("d4", 42, "sum") + + def custom_fx(_): + return -1 + + a.add_state("e", tensor(0), custom_fx) + assert a._reductions["e"](tensor([1, 1])) == -1 + + +def test_add_state_persistent(): + a = DummyMetric() + + a.add_state("a", tensor(0), "sum", persistent=True) + assert "a" in a.state_dict() + + a.add_state("b", tensor(0), "sum", persistent=False) + + if _TORCH_LOWER_1_6: + assert "b" not in a.state_dict() + + +def test_reset(): + class A(DummyMetric): + pass + + class B(DummyListMetric): + pass + + a = A() + assert a.x == 0 + a.x = tensor(5) + a.reset() + assert a.x == 0 + + b = B() + assert isinstance(b.x, list) and len(b.x) == 0 + b.x = tensor(5) + b.reset() + assert isinstance(b.x, list) and len(b.x) == 0 + + +def test_reset_compute(): + a = DummyMetricSum() + assert a.x == 0 + a.update(tensor(5)) + assert a.compute() == 5 + a.reset() + if not _LIGHTNING_AVAILABLE or _LIGHTNING_GREATER_EQUAL_1_3: + assert a.compute() == 0 + else: + assert a.compute() == 5 + + +def test_update(): + class A(DummyMetric): + def update(self, x): + self.x += x + + a = A() + assert a.x == 0 + assert a._computed is None + a.update(1) + assert a._computed is None + assert a.x == 1 + a.update(2) + assert a.x == 3 + assert a._computed is None + + +def test_compute(): + class A(DummyMetric): + def update(self, x): + self.x += x + + def compute(self): + return self.x + + a = A() + assert 0 == a.compute() + assert 0 == a.x + a.update(1) + assert a._computed is None + assert a.compute() == 1 + assert a._computed == 1 + a.update(2) + assert a._computed is None + assert a.compute() == 3 + assert a._computed == 3 + + # called without update, should return cached value + a._computed = 5 + assert a.compute() == 5 + + +def test_hash(): + class A(DummyMetric): + pass + + class B(DummyListMetric): + pass + + a1 = A() + a2 = A() + assert hash(a1) != hash(a2) + + b1 = B() + b2 = B() + assert hash(b1) != hash(b2) # different ids + assert isinstance(b1.x, list) and len(b1.x) == 0 + b1.x.append(tensor(5)) + assert isinstance(hash(b1), int) # <- check that nothing crashes + assert isinstance(b1.x, list) and len(b1.x) == 1 + b2.x.append(tensor(5)) + # Sanity: + assert isinstance(b2.x, list) and len(b2.x) == 1 + # Now that they have tensor contents, they should have different hashes: + assert hash(b1) != hash(b2) + + +def test_forward(): + class A(DummyMetric): + def update(self, x): + self.x += x + + def compute(self): + return self.x + + a = A() + assert a(5) == 5 + assert a._forward_cache == 5 + + assert a(8) == 8 + assert a._forward_cache == 8 + + assert a.compute() == 13 + + +def test_pickle(tmpdir): + # doesn't tests for DDP + a = DummyMetricSum() + a.update(1) + + metric_pickled = pickle.dumps(a) + metric_loaded = pickle.loads(metric_pickled) + + assert metric_loaded.compute() == 1 + + metric_loaded.update(5) + assert metric_loaded.compute() == 6 + + metric_pickled = cloudpickle.dumps(a) + metric_loaded = cloudpickle.loads(metric_pickled) + + assert metric_loaded.compute() == 1 + + +def test_state_dict(tmpdir): + """test that metric states can be removed and added to state dict.""" + metric = DummyMetric() + assert metric.state_dict() == OrderedDict() + metric.persistent(True) + assert metric.state_dict() == OrderedDict(x=0) + metric.persistent(False) + assert metric.state_dict() == OrderedDict() + + +def test_load_state_dict(tmpdir): + """test that metric states can be loaded with state dict.""" + metric = DummyMetricSum() + metric.persistent(True) + metric.update(5) + loaded_metric = DummyMetricSum() + loaded_metric.load_state_dict(metric.state_dict()) + assert metric.compute() == 5 + + +def test_child_metric_state_dict(): + """test that child metric states will be added to parent state dict.""" + + class TestModule(nn.Module): + def __init__(self): + super().__init__() + self.metric = DummyMetric() + self.metric.add_state("a", tensor(0), persistent=True) + self.metric.add_state("b", [], persistent=True) + self.metric.register_buffer("c", tensor(0)) + + module = TestModule() + expected_state_dict = { + "metric.a": tensor(0), + "metric.b": [], + "metric.c": tensor(0), + } + assert module.state_dict() == expected_state_dict + + +@pytest.mark.skipif(not B.cuda.is_available(), reason="Test requires GPU.") +def test_device_and_dtype_transfer(tmpdir): + metric = DummyMetricSum() + assert metric.x.is_cuda is False + assert metric.device == B.device("cpu") + assert metric.x.dtype == B.float32 + + metric = metric.to(device="cuda") + assert metric.x.is_cuda + assert metric.device == B.device("cuda", index=0) + + metric.set_dtype(B.double) + assert metric.x.dtype == B.float64 + metric.reset() + assert metric.x.dtype == B.float64 + + metric.set_dtype(B.half) + assert metric.x.dtype == B.float16 + metric.reset() + assert metric.x.dtype == B.float16 + + +def test_warning_on_compute_before_update(): + """test that an warning is raised if user tries to call compute before update.""" + metric = DummyMetricSum() + + # make sure everything is fine with forward + with pytest.warns(None) as record: + val = metric(1) + assert not record + + metric.reset() + + with pytest.warns(UserWarning, match=r"The ``compute`` method of metric .*"): + val = metric.compute() + assert val == 0.0 + + # after update things should be fine + metric.update(2.0) + with pytest.warns(None) as record: + val = metric.compute() + assert not record + assert val == 2.0 + + +def test_metric_scripts(): + """test that metrics are scriptable.""" + B.jit.script(DummyMetric()) + B.jit.script(DummyMetricSum()) + + +def test_metric_forward_cache_reset(): + """test that forward cache is reset when `reset` is called.""" + metric = DummyMetricSum() + _ = metric(2.0) + assert metric._forward_cache == 2.0 + metric.reset() + assert metric._forward_cache is None + + +@pytest.mark.skipif(not B.cuda.is_available(), reason="Test requires GPU.") +@pytest.mark.parametrize("metric_class", [DummyMetricSum, DummyMetricMultiOutput]) +def test_forward_and_compute_to_device(metric_class): + metric = metric_class() + metric(1) + metric.to(device="cuda") + + assert metric._forward_cache is not None + is_cuda = ( + metric._forward_cache[0].is_cuda if isinstance(metric._forward_cache, list) else metric._forward_cache.is_cuda + ) + assert is_cuda, "forward cache was not moved to the correct device" + + metric.compute() + assert metric._computed is not None + is_cuda = metric._computed[0].is_cuda if isinstance(metric._computed, list) else metric._computed.is_cuda + assert is_cuda, "computed result was not moved to the correct device" + + +@pytest.mark.skipif(not B.cuda.is_available(), reason="Test requires GPU.") +@pytest.mark.parametrize("metric_class", [DummyMetricSum, DummyMetricMultiOutput]) +def test_device_if_child_module(metric_class): + """Test that if a metric is a child module all values gets moved to the correct device.""" + + class TestModule(nn.Module): + def __init__(self): + super().__init__() + self.metric = metric_class() + self.register_buffer("dummy", B.zeros(1)) + + @property + def device(self): + return self.dummy.device + + module = TestModule() + + assert module.device == module.metric.device + if isinstance(module.metric.x, Tensor): + assert module.device == module.metric.x.device + + module.to(device="cuda") + + assert module.device == module.metric.device + if isinstance(module.metric.x, Tensor): + assert module.device == module.metric.x.device diff --git a/RE/paddlemetric/src/tests/classification/__init__.py b/RE/paddlemetric/src/tests/classification/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/RE/paddlemetric/src/tests/classification/inputs.py b/RE/paddlemetric/src/tests/classification/inputs.py new file mode 100644 index 00000000..d0bf9488 --- /dev/null +++ b/RE/paddlemetric/src/tests/classification/inputs.py @@ -0,0 +1,125 @@ +# Copyright The PyTorch Lightning team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from collections import namedtuple + +import paddleext.torchapi as B + +from tests.helpers.testers import BATCH_SIZE, EXTRA_DIM, NUM_BATCHES, NUM_CLASSES + +Input = namedtuple("Input", ["preds", "target"]) + +_input_binary_prob = Input( + preds=B.rand(NUM_BATCHES, BATCH_SIZE), target=B.randint(high=2, size=(NUM_BATCHES, BATCH_SIZE)) +) + +_input_binary = Input( + preds=B.randint(high=2, size=(NUM_BATCHES, BATCH_SIZE)), + target=B.randint(high=2, size=(NUM_BATCHES, BATCH_SIZE)), +) + +_input_binary_logits = Input( + preds=B.randn(NUM_BATCHES, BATCH_SIZE), target=B.randint(high=2, size=(NUM_BATCHES, BATCH_SIZE)) +) + +_input_multilabel_prob = Input( + preds=B.rand(NUM_BATCHES, BATCH_SIZE, NUM_CLASSES), + target=B.randint(high=2, size=(NUM_BATCHES, BATCH_SIZE, NUM_CLASSES)), +) + +_input_multilabel_multidim_prob = Input( + preds=B.rand(NUM_BATCHES, BATCH_SIZE, NUM_CLASSES, EXTRA_DIM), + target=B.randint(high=2, size=(NUM_BATCHES, BATCH_SIZE, NUM_CLASSES, EXTRA_DIM)), +) + +_input_multilabel_logits = Input( + preds=B.randn(NUM_BATCHES, BATCH_SIZE, NUM_CLASSES), + target=B.randint(high=2, size=(NUM_BATCHES, BATCH_SIZE, NUM_CLASSES)), +) + +_input_multilabel = Input( + preds=B.randint(high=2, size=(NUM_BATCHES, BATCH_SIZE, NUM_CLASSES)), + target=B.randint(high=2, size=(NUM_BATCHES, BATCH_SIZE, NUM_CLASSES)), +) + +_input_multilabel_multidim = Input( + preds=B.randint(high=2, size=(NUM_BATCHES, BATCH_SIZE, NUM_CLASSES, EXTRA_DIM)), + target=B.randint(high=2, size=(NUM_BATCHES, BATCH_SIZE, NUM_CLASSES, EXTRA_DIM)), +) + +# Generate edge multilabel edge case, where nothing matches (scores are undefined) +__temp_preds = B.randint(high=2, size=(NUM_BATCHES, BATCH_SIZE, NUM_CLASSES)) +__temp_target = B.abs(__temp_preds - 1) + +_input_multilabel_no_match = Input(preds=__temp_preds, target=__temp_target) + +__mc_prob_logits = 10 * B.randn(NUM_BATCHES, BATCH_SIZE, NUM_CLASSES) +__mc_prob_preds = __mc_prob_logits.abs() / __mc_prob_logits.abs().sum(dim=2, keepdim=True) + +_input_multiclass_prob = Input( + preds=__mc_prob_preds, target=B.randint(high=NUM_CLASSES, size=(NUM_BATCHES, BATCH_SIZE)) +) + +_input_multiclass_logits = Input( + preds=__mc_prob_logits, target=B.randint(high=NUM_CLASSES, size=(NUM_BATCHES, BATCH_SIZE)) +) + +_input_multiclass = Input( + preds=B.randint(high=NUM_CLASSES, size=(NUM_BATCHES, BATCH_SIZE)), + target=B.randint(high=NUM_CLASSES, size=(NUM_BATCHES, BATCH_SIZE)), +) + +__mdmc_prob_preds = B.rand(NUM_BATCHES, BATCH_SIZE, NUM_CLASSES, EXTRA_DIM) +__mdmc_prob_preds = __mdmc_prob_preds / __mdmc_prob_preds.sum(dim=2, keepdim=True) + +_input_multidim_multiclass_prob = Input( + preds=__mdmc_prob_preds, target=B.randint(high=NUM_CLASSES, size=(NUM_BATCHES, BATCH_SIZE, EXTRA_DIM)) +) + +_input_multidim_multiclass = Input( + preds=B.randint(high=NUM_CLASSES, size=(NUM_BATCHES, BATCH_SIZE, EXTRA_DIM)), + target=B.randint(high=NUM_CLASSES, size=(NUM_BATCHES, BATCH_SIZE, EXTRA_DIM)), +) + + +# Generate plausible-looking inputs +def generate_plausible_inputs_multilabel(num_classes=NUM_CLASSES, num_batches=NUM_BATCHES, batch_size=BATCH_SIZE): + correct_targets = B.randint(high=num_classes, size=(num_batches, batch_size)) + preds = B.rand(num_batches, batch_size, num_classes) + targets = B.zeros_like(preds, dtype=B.long) + for i in range(preds.shape[0]): + for j in range(preds.shape[1]): + targets[i, j, correct_targets[i, j]] = 1 + preds += B.rand(num_batches, batch_size, num_classes) * targets / 3 + + preds = preds / preds.sum(dim=2, keepdim=True) + + return Input(preds=preds, target=targets) + + +def generate_plausible_inputs_binary(num_batches=NUM_BATCHES, batch_size=BATCH_SIZE): + targets = B.randint(high=2, size=(num_batches, batch_size)) + preds = B.rand(num_batches, batch_size) + B.rand(num_batches, batch_size) * targets / 3 + return Input(preds=preds / (preds.max() + 0.01), target=targets) + + +_input_multilabel_prob_plausible = generate_plausible_inputs_multilabel() + +_input_binary_prob_plausible = generate_plausible_inputs_binary() + +# randomly remove one class from the input +_temp = B.randint(high=NUM_CLASSES, size=(NUM_BATCHES, BATCH_SIZE)) +_class_remove, _class_replace = B.multinomial(B.ones(NUM_CLASSES), num_samples=2, replacement=False) +_temp[_temp == _class_remove] = _class_replace + +_input_multiclass_with_missing_class = Input(_temp.clone(), _temp.clone()) diff --git a/RE/paddlemetric/src/tests/classification/test.log b/RE/paddlemetric/src/tests/classification/test.log new file mode 100644 index 00000000..3ea5a1d0 --- /dev/null +++ b/RE/paddlemetric/src/tests/classification/test.log @@ -0,0 +1,451 @@ +============================= test session starts ============================== +platform darwin -- Python 3.8.12, pytest-7.1.2, pluggy-1.0.0 +rootdir: /Users/sun/Projects/oix/baidu/ccl/paddlemetric/src/tests/classification +plugins: hydra-core-1.1.0.dev5 +collected 816 items + +test_f_beta.py ..................ssssssssssssssssss..ssssssssssssssssss..ssssssssssssssssss..............................................................................................................ssssssssss................ss..................ss..................ss..................ss....ssssssssssssssssss..ssssssssssssssssss..ssssssssssssssssss..............................................................................................................ssssssssss................ss..................ss..................ss..................ss....ssssssssssssssssss..ssssssssssssssssss..ssssssssssssssssss..............................................................................................................ssssssssss................ss..................ss..................ss..................ss../Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/comet_ml/monkey_patching.py:19: DeprecationWarning: the imp module is deprecated in favour of importlib; see the module's documentation for alternative uses + import imp +/Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/comet_ml/monkey_patching.py:19: DeprecationWarning: the imp module is deprecated in favour of importlib; see the module's documentation for alternative uses + import imp +.................... + +=============================== warnings summary =============================== +../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:19 + /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:19: DeprecationWarning: Call to deprecated create function FileDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool. + DESCRIPTOR = _descriptor.FileDescriptor( + +../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:33 + /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:33: DeprecationWarning: Call to deprecated create function EnumValueDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool. + _descriptor.EnumValueDescriptor( + +../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:37 + /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:37: DeprecationWarning: Call to deprecated create function EnumValueDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool. + _descriptor.EnumValueDescriptor( + +../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:41 + /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:41: DeprecationWarning: Call to deprecated create function EnumValueDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool. + _descriptor.EnumValueDescriptor( + +../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:45 + /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:45: DeprecationWarning: Call to deprecated create function EnumValueDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool. + _descriptor.EnumValueDescriptor( + +../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:49 + /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:49: DeprecationWarning: Call to deprecated create function EnumValueDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool. + _descriptor.EnumValueDescriptor( + +../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:53 + /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:53: DeprecationWarning: Call to deprecated create function EnumValueDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool. + _descriptor.EnumValueDescriptor( + +../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:57 + /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:57: DeprecationWarning: Call to deprecated create function EnumValueDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool. + _descriptor.EnumValueDescriptor( + +../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:61 + /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:61: DeprecationWarning: Call to deprecated create function EnumValueDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool. + _descriptor.EnumValueDescriptor( + +../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:65 + /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:65: DeprecationWarning: Call to deprecated create function EnumValueDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool. + _descriptor.EnumValueDescriptor( + +../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:69 + /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:69: DeprecationWarning: Call to deprecated create function EnumValueDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool. + _descriptor.EnumValueDescriptor( + +../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:73 + /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:73: DeprecationWarning: Call to deprecated create function EnumValueDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool. + _descriptor.EnumValueDescriptor( + +../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:77 + /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:77: DeprecationWarning: Call to deprecated create function EnumValueDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool. + _descriptor.EnumValueDescriptor( + +../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:81 + /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:81: DeprecationWarning: Call to deprecated create function EnumValueDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool. + _descriptor.EnumValueDescriptor( + +../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:27 + /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:27: DeprecationWarning: Call to deprecated create function EnumDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool. + _ATTRTYPE = _descriptor.EnumDescriptor( + +../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:115 + /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:115: DeprecationWarning: Call to deprecated create function EnumValueDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool. + _descriptor.EnumValueDescriptor( + +../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:119 + /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:119: DeprecationWarning: Call to deprecated create function EnumValueDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool. + _descriptor.EnumValueDescriptor( + +../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:123 + /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:123: DeprecationWarning: Call to deprecated create function EnumValueDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool. + _descriptor.EnumValueDescriptor( + +../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:127 + /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:127: DeprecationWarning: Call to deprecated create function EnumValueDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool. + _descriptor.EnumValueDescriptor( + +../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:131 + /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:131: DeprecationWarning: Call to deprecated create function EnumValueDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool. + _descriptor.EnumValueDescriptor( + +../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:135 + /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:135: DeprecationWarning: Call to deprecated create function EnumValueDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool. + _descriptor.EnumValueDescriptor( + +../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:139 + /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:139: DeprecationWarning: Call to deprecated create function EnumValueDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool. + _descriptor.EnumValueDescriptor( + +../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:143 + /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:143: DeprecationWarning: Call to deprecated create function EnumValueDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool. + _descriptor.EnumValueDescriptor( + +../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:147 + /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:147: DeprecationWarning: Call to deprecated create function EnumValueDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool. + _descriptor.EnumValueDescriptor( + +../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:151 + /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:151: DeprecationWarning: Call to deprecated create function EnumValueDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool. + _descriptor.EnumValueDescriptor( + +../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:155 + /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:155: DeprecationWarning: Call to deprecated create function EnumValueDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool. + _descriptor.EnumValueDescriptor( + +../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:159 + /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:159: DeprecationWarning: Call to deprecated create function EnumValueDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool. + _descriptor.EnumValueDescriptor( + +../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:163 + /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:163: DeprecationWarning: Call to deprecated create function EnumValueDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool. + _descriptor.EnumValueDescriptor( + +../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:167 + /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:167: DeprecationWarning: Call to deprecated create function EnumValueDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool. + _descriptor.EnumValueDescriptor( + +../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:171 + /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:171: DeprecationWarning: Call to deprecated create function EnumValueDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool. + _descriptor.EnumValueDescriptor( + +../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:175 + /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:175: DeprecationWarning: Call to deprecated create function EnumValueDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool. + _descriptor.EnumValueDescriptor( + +../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:179 + /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:179: DeprecationWarning: Call to deprecated create function EnumValueDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool. + _descriptor.EnumValueDescriptor( + +../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:183 + /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:183: DeprecationWarning: Call to deprecated create function EnumValueDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool. + _descriptor.EnumValueDescriptor( + +../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:187 + /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:187: DeprecationWarning: Call to deprecated create function EnumValueDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool. + _descriptor.EnumValueDescriptor( + +../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:191 + /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:191: DeprecationWarning: Call to deprecated create function EnumValueDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool. + _descriptor.EnumValueDescriptor( + +../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:195 + /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:195: DeprecationWarning: Call to deprecated create function EnumValueDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool. + _descriptor.EnumValueDescriptor( + +../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:199 + /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:199: DeprecationWarning: Call to deprecated create function EnumValueDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool. + _descriptor.EnumValueDescriptor( + +../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:203 + /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:203: DeprecationWarning: Call to deprecated create function EnumValueDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool. + _descriptor.EnumValueDescriptor( + +../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:207 + /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:207: DeprecationWarning: Call to deprecated create function EnumValueDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool. + _descriptor.EnumValueDescriptor( + +../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:211 + /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:211: DeprecationWarning: Call to deprecated create function EnumValueDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool. + _descriptor.EnumValueDescriptor( + +../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:215 + /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:215: DeprecationWarning: Call to deprecated create function EnumValueDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool. + _descriptor.EnumValueDescriptor( + +../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:219 + /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:219: DeprecationWarning: Call to deprecated create function EnumValueDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool. + _descriptor.EnumValueDescriptor( + +../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:223 + /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:223: DeprecationWarning: Call to deprecated create function EnumValueDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool. + _descriptor.EnumValueDescriptor( + +../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:227 + /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:227: DeprecationWarning: Call to deprecated create function EnumValueDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool. + _descriptor.EnumValueDescriptor( + +../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:109 + /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:109: DeprecationWarning: Call to deprecated create function EnumDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool. + _VARTYPE_TYPE = _descriptor.EnumDescriptor( + +../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:247 + /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:247: DeprecationWarning: Call to deprecated create function FieldDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool. + _descriptor.FieldDescriptor( + +../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:240 + /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:240: DeprecationWarning: Call to deprecated create function Descriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool. + _VERSION = _descriptor.Descriptor( + +../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:278 + /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:278: DeprecationWarning: Call to deprecated create function FieldDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool. + _descriptor.FieldDescriptor( + +../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:285 + /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:285: DeprecationWarning: Call to deprecated create function FieldDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool. + _descriptor.FieldDescriptor( + +../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:292 + /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:292: DeprecationWarning: Call to deprecated create function FieldDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool. + _descriptor.FieldDescriptor( + +../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:299 + /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:299: DeprecationWarning: Call to deprecated create function FieldDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool. + _descriptor.FieldDescriptor( + +../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:271 + /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:271: DeprecationWarning: Call to deprecated create function Descriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool. + _PROCESSMESHDESC = _descriptor.Descriptor( + +../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:330 + /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:330: DeprecationWarning: Call to deprecated create function FieldDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool. + _descriptor.FieldDescriptor( + +../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:337 + /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:337: DeprecationWarning: Call to deprecated create function FieldDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool. + _descriptor.FieldDescriptor( + +../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:344 + /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:344: DeprecationWarning: Call to deprecated create function FieldDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool. + _descriptor.FieldDescriptor( + +../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:351 + /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:351: DeprecationWarning: Call to deprecated create function FieldDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool. + _descriptor.FieldDescriptor( + +../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:358 + /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:358: DeprecationWarning: Call to deprecated create function FieldDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool. + _descriptor.FieldDescriptor( + +../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:365 + /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:365: DeprecationWarning: Call to deprecated create function FieldDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool. + _descriptor.FieldDescriptor( + +../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:372 + /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:372: DeprecationWarning: Call to deprecated create function FieldDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool. + _descriptor.FieldDescriptor( + +../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:379 + /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:379: DeprecationWarning: Call to deprecated create function FieldDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool. + _descriptor.FieldDescriptor( + +../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:386 + /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:386: DeprecationWarning: Call to deprecated create function FieldDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool. + _descriptor.FieldDescriptor( + +../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:393 + /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:393: DeprecationWarning: Call to deprecated create function FieldDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool. + _descriptor.FieldDescriptor( + +../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:400 + /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:400: DeprecationWarning: Call to deprecated create function FieldDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool. + _descriptor.FieldDescriptor( + +../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:407 + /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:407: DeprecationWarning: Call to deprecated create function FieldDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool. + _descriptor.FieldDescriptor( + +../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:414 + /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:414: DeprecationWarning: Call to deprecated create function FieldDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool. + _descriptor.FieldDescriptor( + +../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:421 + /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:421: DeprecationWarning: Call to deprecated create function FieldDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool. + _descriptor.FieldDescriptor( + +../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:428 + /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:428: DeprecationWarning: Call to deprecated create function FieldDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool. + _descriptor.FieldDescriptor( + +../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:323 + /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:323: DeprecationWarning: Call to deprecated create function Descriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool. + _OPDESC_ATTR = _descriptor.Descriptor( + +../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:458 + /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:458: DeprecationWarning: Call to deprecated create function FieldDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool. + _descriptor.FieldDescriptor( + +../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:465 + /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:465: DeprecationWarning: Call to deprecated create function FieldDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool. + _descriptor.FieldDescriptor( + +../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:451 + /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:451: DeprecationWarning: Call to deprecated create function Descriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool. + _OPDESC_VAR = _descriptor.Descriptor( + +../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:495 + /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:495: DeprecationWarning: Call to deprecated create function FieldDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool. + _descriptor.FieldDescriptor( + +../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:502 + /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:502: DeprecationWarning: Call to deprecated create function FieldDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool. + _descriptor.FieldDescriptor( + +../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:509 + /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:509: DeprecationWarning: Call to deprecated create function FieldDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool. + _descriptor.FieldDescriptor( + +../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:516 + /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:516: DeprecationWarning: Call to deprecated create function FieldDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool. + _descriptor.FieldDescriptor( + +../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:523 + /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:523: DeprecationWarning: Call to deprecated create function FieldDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool. + _descriptor.FieldDescriptor( + +../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:488 + /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:488: DeprecationWarning: Call to deprecated create function Descriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool. + _OPDESC = _descriptor.Descriptor( + +../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:554 + /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:554: DeprecationWarning: Call to deprecated create function FieldDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool. + _descriptor.FieldDescriptor( + +../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:561 + /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:561: DeprecationWarning: Call to deprecated create function FieldDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool. + _descriptor.FieldDescriptor( + +../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:568 + /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:568: DeprecationWarning: Call to deprecated create function FieldDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool. + _descriptor.FieldDescriptor( + +../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:575 + /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:575: DeprecationWarning: Call to deprecated create function FieldDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool. + _descriptor.FieldDescriptor( + +../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:582 + /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:582: DeprecationWarning: Call to deprecated create function FieldDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool. + _descriptor.FieldDescriptor( + +../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:589 + /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:589: DeprecationWarning: Call to deprecated create function FieldDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool. + _descriptor.FieldDescriptor( + +../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:596 + /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:596: DeprecationWarning: Call to deprecated create function FieldDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool. + _descriptor.FieldDescriptor( + +../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:547 + /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:547: DeprecationWarning: Call to deprecated create function Descriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool. + _OPPROTO_VAR = _descriptor.Descriptor( + +../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:626 + /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:626: DeprecationWarning: Call to deprecated create function FieldDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool. + _descriptor.FieldDescriptor( + +../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:633 + /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:633: DeprecationWarning: Call to deprecated create function FieldDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool. + _descriptor.FieldDescriptor( + +../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:640 + /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:640: DeprecationWarning: Call to deprecated create function FieldDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool. + _descriptor.FieldDescriptor( + +../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:647 + /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:647: DeprecationWarning: Call to deprecated create function FieldDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool. + _descriptor.FieldDescriptor( + +../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:654 + /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:654: DeprecationWarning: Call to deprecated create function FieldDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool. + _descriptor.FieldDescriptor( + +../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:661 + /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:661: DeprecationWarning: Call to deprecated create function FieldDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool. + _descriptor.FieldDescriptor( + +../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:619 + /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:619: DeprecationWarning: Call to deprecated create function Descriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool. + _OPPROTO_ATTR = _descriptor.Descriptor( + +../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:691 + /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:691: DeprecationWarning: Call to deprecated create function FieldDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool. + _descriptor.FieldDescriptor( + +../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:698 + /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:698: DeprecationWarning: Call to deprecated create function FieldDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool. + _descriptor.FieldDescriptor( + +../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:705 + /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:705: DeprecationWarning: Call to deprecated create function FieldDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool. + _descriptor.FieldDescriptor( + +../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:712 + /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:712: DeprecationWarning: Call to deprecated create function FieldDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool. + _descriptor.FieldDescriptor( + +../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:719 + /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:719: DeprecationWarning: Call to deprecated create function FieldDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool. + _descriptor.FieldDescriptor( + +../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:684 + /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:684: DeprecationWarning: Call to deprecated create function Descriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool. + _OPPROTO = _descriptor.Descriptor( + +../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:750 + /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:750: DeprecationWarning: Call to deprecated create function FieldDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool. + _descriptor.FieldDescriptor( + +../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:757 + /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:757: DeprecationWarning: Call to deprecated create function FieldDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool. + _descriptor.FieldDescriptor( + +../../../../../../../.envs/oix/lib/python3.8/site-packages/comet_ml/monkey_patching.py:19 + /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/comet_ml/monkey_patching.py:19: DeprecationWarning: the imp module is deprecated in favour of importlib; see the module's documentation for alternative uses + import imp + +../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/dygraph/math_op_patch.py:276: 1 warning +test_f_beta.py: 3304 warnings + /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/dygraph/math_op_patch.py:276: UserWarning: The dtype of left and right variables are not the same, left dtype is paddle.float32, but right dtype is paddle.int64, the right dtype will convert to paddle.float32 + warnings.warn( + +../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/tensor/creation.py:125: 1 warning +test_f_beta.py: 326 warnings + /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/tensor/creation.py:125: DeprecationWarning: `np.object` is a deprecated alias for the builtin `object`. To silence this warning, use `object` by itself. Doing this will not modify any behavior and is safe. + Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations + if data.dtype == np.object: + +test_f_beta.py: 2012 warnings + /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/layers/tensor.py:657: UserWarning: paddle.assign doesn't support float64 input now due to current platform protobuf data limitation, we convert it to float32 + warnings.warn( + +test_f_beta.py: 13026 warnings + /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/framework.py:1104: DeprecationWarning: `np.bool` is a deprecated alias for the builtin `bool`. To silence this warning, use `bool` by itself. Doing this will not modify any behavior and is safe. If you specifically wanted the numpy scalar type, use `np.bool_` here. + Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations + elif dtype == np.bool: + +test_f_beta.py: 794 warnings + /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/dygraph/math_op_patch.py:276: UserWarning: The dtype of left and right variables are not the same, left dtype is paddle.float32, but right dtype is paddle.int32, the right dtype will convert to paddle.float32 + warnings.warn( + +test_f_beta.py: 792 warnings + /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/dygraph/math_op_patch.py:276: UserWarning: The dtype of left and right variables are not the same, left dtype is paddle.int64, but right dtype is paddle.int32, the right dtype will convert to paddle.int64 + warnings.warn( + +test_f_beta.py: 792 warnings + /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/dygraph/math_op_patch.py:276: UserWarning: The dtype of left and right variables are not the same, left dtype is paddle.int64, but right dtype is paddle.float32, the right dtype will convert to paddle.int64 + warnings.warn( + +-- Docs: https://docs.pytest.org/en/stable/how-to/capture-warnings.html +========= 600 passed, 216 skipped, 21149 warnings in 85.12s (0:01:25) ========== diff --git a/RE/paddlemetric/src/tests/classification/test_accuracy.py b/RE/paddlemetric/src/tests/classification/test_accuracy.py new file mode 100644 index 00000000..26ec7876 --- /dev/null +++ b/RE/paddlemetric/src/tests/classification/test_accuracy.py @@ -0,0 +1,362 @@ +# Copyright The PyTorch Lightning team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from functools import partial + +import numpy as np +import pytest +import paddleext.torchapi as B +from sklearn.metrics import accuracy_score as sk_accuracy +from paddleext.torchapi import tensor + +from tests.classification.inputs import _input_binary, _input_binary_logits, _input_binary_prob +from tests.classification.inputs import _input_multiclass as _input_mcls +from tests.classification.inputs import _input_multiclass_logits as _input_mcls_logits +from tests.classification.inputs import _input_multiclass_prob as _input_mcls_prob +from tests.classification.inputs import _input_multiclass_with_missing_class as _input_miss_class +from tests.classification.inputs import _input_multidim_multiclass as _input_mdmc +from tests.classification.inputs import _input_multidim_multiclass_prob as _input_mdmc_prob +from tests.classification.inputs import _input_multilabel as _input_mlb +from tests.classification.inputs import _input_multilabel_logits as _input_mlb_logits +from tests.classification.inputs import _input_multilabel_multidim as _input_mlmd +from tests.classification.inputs import _input_multilabel_multidim_prob as _input_mlmd_prob +from tests.classification.inputs import _input_multilabel_prob as _input_mlb_prob +from tests.helpers import seed_all +from tests.helpers.testers import NUM_BATCHES, NUM_CLASSES, THRESHOLD, MetricTester +from paddlemetrics import Accuracy +from paddlemetrics.functional import accuracy +from paddlemetrics.utilities.checks import _input_format_classification +from paddlemetrics.utilities.enums import AverageMethod, DataType + +seed_all(42) + + +def _sk_accuracy(preds, target, subset_accuracy): + sk_preds, sk_target, mode = _input_format_classification(preds, target, threshold=THRESHOLD) + sk_preds, sk_target = sk_preds.numpy(), sk_target.numpy() + + if mode == DataType.MULTIDIM_MULTICLASS and not subset_accuracy: + sk_preds, sk_target = np.transpose(sk_preds, (0, 2, 1)), np.transpose(sk_target, (0, 2, 1)) + sk_preds, sk_target = sk_preds.reshape(-1, sk_preds.shape[2]), sk_target.reshape(-1, sk_target.shape[2]) + elif mode == DataType.MULTIDIM_MULTICLASS and subset_accuracy: + return np.all(sk_preds == sk_target, axis=(1, 2)).mean() + elif mode == DataType.MULTILABEL and not subset_accuracy: + sk_preds, sk_target = sk_preds.reshape(-1), sk_target.reshape(-1) + + return sk_accuracy(y_true=sk_target, y_pred=sk_preds) + + +@pytest.mark.parametrize( + "preds, target, subset_accuracy", + [ + (_input_binary_logits.preds, _input_binary_logits.target, False), + (_input_binary_prob.preds, _input_binary_prob.target, False), + (_input_binary.preds, _input_binary.target, False), + (_input_mlb_prob.preds, _input_mlb_prob.target, True), + (_input_mlb_logits.preds, _input_mlb_logits.target, False), + (_input_mlb_prob.preds, _input_mlb_prob.target, False), + (_input_mlb.preds, _input_mlb.target, True), + (_input_mlb.preds, _input_mlb.target, False), + (_input_mcls_prob.preds, _input_mcls_prob.target, False), + (_input_mcls_logits.preds, _input_mcls_logits.target, False), + (_input_mcls.preds, _input_mcls.target, False), + (_input_mdmc_prob.preds, _input_mdmc_prob.target, False), + (_input_mdmc_prob.preds, _input_mdmc_prob.target, True), + (_input_mdmc.preds, _input_mdmc.target, False), + (_input_mdmc.preds, _input_mdmc.target, True), + (_input_mlmd_prob.preds, _input_mlmd_prob.target, True), + (_input_mlmd_prob.preds, _input_mlmd_prob.target, False), + (_input_mlmd.preds, _input_mlmd.target, True), + (_input_mlmd.preds, _input_mlmd.target, False), + ], +) +class TestAccuracies(MetricTester): + @pytest.mark.parametrize("ddp", [False]) + @pytest.mark.parametrize("dist_sync_on_step", [False]) + def test_accuracy_class(self, ddp, dist_sync_on_step, preds, target, subset_accuracy): + self.run_class_metric_test( + ddp=ddp, + preds=preds, + target=target, + metric_class=Accuracy, + sk_metric=partial(_sk_accuracy, subset_accuracy=subset_accuracy), + dist_sync_on_step=dist_sync_on_step, + metric_args={"threshold": THRESHOLD, "subset_accuracy": subset_accuracy}, + ) + + def test_accuracy_fn(self, preds, target, subset_accuracy): + self.run_functional_metric_test( + preds, + target, + metric_functional=accuracy, + sk_metric=partial(_sk_accuracy, subset_accuracy=subset_accuracy), + metric_args={"threshold": THRESHOLD, "subset_accuracy": subset_accuracy}, + ) + + def test_accuracy_differentiability(self, preds, target, subset_accuracy): + self.run_differentiability_test( + preds=preds, + target=target, + metric_module=Accuracy, + metric_functional=accuracy, + metric_args={"threshold": THRESHOLD, "subset_accuracy": subset_accuracy}, + ) + + +_l1to4 = [0.1, 0.2, 0.3, 0.4] +_l1to4t3 = np.array([_l1to4, _l1to4, _l1to4]) +_l1to4t3_mcls = [_l1to4t3.T, _l1to4t3.T, _l1to4t3.T] + +# The preds in these examples always put highest probability on class 3, second highest on class 2, +# third highest on class 1, and lowest on class 0 +_topk_preds_mcls = tensor([_l1to4t3, _l1to4t3]).float() +_topk_target_mcls = tensor([[1, 2, 3], [2, 1, 0]]) + +# This is like for MC case, but one sample in each batch is sabotaged with 0 class prediction :) +_topk_preds_mdmc = tensor([_l1to4t3_mcls, _l1to4t3_mcls]).float() +_topk_target_mdmc = tensor([[[1, 1, 0], [2, 2, 2], [3, 3, 3]], [[2, 2, 0], [1, 1, 1], [0, 0, 0]]]) + +# Multilabel +_ml_t1 = [0.8, 0.2, 0.8, 0.2] +_ml_t2 = [_ml_t1, _ml_t1] +_ml_ta2 = [[1, 0, 1, 1], [0, 1, 1, 0]] +_av_preds_ml = tensor([_ml_t2, _ml_t2]).float() +_av_target_ml = tensor([_ml_ta2, _ml_ta2]) + + +# Replace with a proper sk_metric test once sklearn 0.24 hits :) +@pytest.mark.parametrize( + "preds, target, exp_result, k, subset_accuracy", + [ + (_topk_preds_mcls, _topk_target_mcls, 1 / 6, 1, False), + (_topk_preds_mcls, _topk_target_mcls, 3 / 6, 2, False), + (_topk_preds_mcls, _topk_target_mcls, 5 / 6, 3, False), + (_topk_preds_mcls, _topk_target_mcls, 1 / 6, 1, True), + (_topk_preds_mcls, _topk_target_mcls, 3 / 6, 2, True), + (_topk_preds_mcls, _topk_target_mcls, 5 / 6, 3, True), + (_topk_preds_mdmc, _topk_target_mdmc, 1 / 6, 1, False), + (_topk_preds_mdmc, _topk_target_mdmc, 8 / 18, 2, False), + (_topk_preds_mdmc, _topk_target_mdmc, 13 / 18, 3, False), + (_topk_preds_mdmc, _topk_target_mdmc, 1 / 6, 1, True), + (_topk_preds_mdmc, _topk_target_mdmc, 2 / 6, 2, True), + (_topk_preds_mdmc, _topk_target_mdmc, 3 / 6, 3, True), + (_av_preds_ml, _av_target_ml, 5 / 8, None, False), + (_av_preds_ml, _av_target_ml, 0, None, True), + ], +) +def test_topk_accuracy(preds, target, exp_result, k, subset_accuracy): + topk = Accuracy(top_k=k, subset_accuracy=subset_accuracy) + + for batch in range(preds.shape[0]): + topk(preds[batch], target[batch]) + + assert topk.compute() == exp_result + + # Test functional + total_samples = target.shape[0] * target.shape[1] + + preds = preds.view(total_samples, 4, -1) + target = target.view(total_samples, -1) + + assert accuracy(preds, target, top_k=k, subset_accuracy=subset_accuracy) == exp_result + + +# Only MC and MDMC with probs input type should be accepted for top_k +@pytest.mark.parametrize( + "preds, target", + [ + (_input_binary_prob.preds, _input_binary_prob.target), + (_input_binary.preds, _input_binary.target), + (_input_mlb_prob.preds, _input_mlb_prob.target), + (_input_mlb.preds, _input_mlb.target), + (_input_mcls.preds, _input_mcls.target), + (_input_mdmc.preds, _input_mdmc.target), + (_input_mlmd_prob.preds, _input_mlmd_prob.target), + (_input_mlmd.preds, _input_mlmd.target), + ], +) +def test_topk_accuracy_wrong_input_types(preds, target): + topk = Accuracy(top_k=1) + + with pytest.raises(ValueError): + topk(preds[0], target[0]) + + with pytest.raises(ValueError): + accuracy(preds[0], target[0], top_k=1) + + +@pytest.mark.parametrize( + "average, mdmc_average, num_classes, inputs, ignore_index, top_k, threshold", + [ + ("unknown", None, None, _input_binary, None, None, 0.5), + ("micro", "unknown", None, _input_binary, None, None, 0.5), + ("macro", None, None, _input_binary, None, None, 0.5), + ("micro", None, None, _input_mdmc_prob, None, None, 0.5), + ("micro", None, None, _input_binary_prob, 0, None, 0.5), + ("micro", None, None, _input_mcls_prob, NUM_CLASSES, None, 0.5), + ("micro", None, NUM_CLASSES, _input_mcls_prob, NUM_CLASSES, None, 0.5), + (None, None, None, _input_mcls_prob, None, 0, 0.5), + (None, None, None, _input_mcls_prob, None, None, 1.5), + ], +) +def test_wrong_params(average, mdmc_average, num_classes, inputs, ignore_index, top_k, threshold): + preds, target = inputs.preds, inputs.target + + with pytest.raises(ValueError): + acc = Accuracy( + average=average, + mdmc_average=mdmc_average, + num_classes=num_classes, + ignore_index=ignore_index, + threshold=threshold, + top_k=top_k, + ) + acc(preds[0], target[0]) + acc.compute() + + with pytest.raises(ValueError): + accuracy( + preds[0], + target[0], + average=average, + mdmc_average=mdmc_average, + num_classes=num_classes, + ignore_index=ignore_index, + threshold=threshold, + top_k=top_k, + ) + + +@pytest.mark.parametrize( + "preds_mc, target_mc, preds_ml, target_ml", + [ + ( + tensor([0, 1, 1, 1]), + tensor([2, 2, 1, 1]), + tensor([[0.8, 0.2, 0.8, 0.7], [0.6, 0.4, 0.6, 0.5]]), + tensor([[1, 0, 1, 1], [0, 0, 1, 0]]), + ) + ], +) +def test_different_modes(preds_mc, target_mc, preds_ml, target_ml): + acc = Accuracy() + acc(preds_mc, target_mc) + with pytest.raises(ValueError, match="^[You cannot use]"): + acc(preds_ml, target_ml) + + +_bin_t1 = [0.7, 0.6, 0.2, 0.1] +_av_preds_bin = tensor([_bin_t1, _bin_t1]).float() +_av_target_bin = tensor([[1, 0, 0, 0], [0, 1, 1, 0]]) + + +@pytest.mark.parametrize( + "preds, target, num_classes, exp_result, average, mdmc_average", + [ + (_topk_preds_mcls, _topk_target_mcls, 4, 1 / 4, "macro", None), + (_topk_preds_mcls, _topk_target_mcls, 4, 1 / 6, "weighted", None), + (_topk_preds_mcls, _topk_target_mcls, 4, [0.0, 0.0, 0.0, 1.0], "none", None), + (_topk_preds_mcls, _topk_target_mcls, 4, 1 / 6, "samples", None), + (_topk_preds_mdmc, _topk_target_mdmc, 4, 1 / 24, "macro", "samplewise"), + (_topk_preds_mdmc, _topk_target_mdmc, 4, 1 / 6, "weighted", "samplewise"), + (_topk_preds_mdmc, _topk_target_mdmc, 4, [0.0, 0.0, 0.0, 1 / 6], "none", "samplewise"), + (_topk_preds_mdmc, _topk_target_mdmc, 4, 1 / 6, "samples", "samplewise"), + (_topk_preds_mdmc, _topk_target_mdmc, 4, 1 / 6, "samples", "global"), + (_av_preds_ml, _av_target_ml, 4, 5 / 8, "macro", None), + (_av_preds_ml, _av_target_ml, 4, 0.70000005, "weighted", None), + (_av_preds_ml, _av_target_ml, 4, [1 / 2, 1 / 2, 1.0, 1 / 2], "none", None), + (_av_preds_ml, _av_target_ml, 4, 5 / 8, "samples", None), + ], +) +def test_average_accuracy(preds, target, num_classes, exp_result, average, mdmc_average): + acc = Accuracy(num_classes=num_classes, average=average, mdmc_average=mdmc_average) + + for batch in range(preds.shape[0]): + acc(preds[batch], target[batch]) + + assert B.allclose(acc.compute(), tensor(exp_result)) + + # Test functional + total_samples = target.shape[0] * target.shape[1] + + preds = preds.view(total_samples, num_classes, -1) + target = target.view(total_samples, -1) + + acc_score = accuracy(preds, target, num_classes=num_classes, average=average, mdmc_average=mdmc_average) + assert B.allclose(acc_score, tensor(exp_result)) + + +@pytest.mark.parametrize( + "preds, target, num_classes, exp_result, average, multiclass", + [ + (_av_preds_bin, _av_target_bin, 2, 19 / 30, "macro", True), + (_av_preds_bin, _av_target_bin, 2, 5 / 8, "weighted", True), + (_av_preds_bin, _av_target_bin, 2, [3 / 5, 2 / 3], "none", True), + (_av_preds_bin, _av_target_bin, 2, 5 / 8, "samples", True), + ], +) +def test_average_accuracy_bin(preds, target, num_classes, exp_result, average, multiclass): + acc = Accuracy(num_classes=num_classes, average=average, multiclass=multiclass) + + for batch in range(preds.shape[0]): + acc(preds[batch], target[batch]) + + assert (acc.compute() == tensor(exp_result)).all() + + # Test functional + total_samples = target.shape[0] * target.shape[1] + + preds = preds.view(total_samples, -1) + target = target.view(total_samples, -1) + acc_score = accuracy(preds, target, num_classes=num_classes, average=average, multiclass=multiclass) + assert (acc_score == tensor(exp_result)).all() + + +@pytest.mark.parametrize("metric_class, metric_fn", [(Accuracy, accuracy)]) +@pytest.mark.parametrize( + "ignore_index, expected", [(None, B.tensor([1.0, np.nan])), (0, B.tensor([np.nan, np.nan]))] +) +def test_class_not_present(metric_class, metric_fn, ignore_index, expected): + """This tests that when metric is computed per class and a given class is not present in both the `preds` and + `target`, the resulting score is `nan`.""" + preds = B.tensor([0, 0, 0]) + target = B.tensor([0, 0, 0]) + num_classes = 2 + + # test functional + result_fn = metric_fn(preds, target, average=AverageMethod.NONE, num_classes=num_classes, ignore_index=ignore_index) + assert B.allclose(expected, result_fn, equal_nan=True) + + # test class + cl_metric = metric_class(average=AverageMethod.NONE, num_classes=num_classes, ignore_index=ignore_index) + cl_metric(preds, target) + result_cl = cl_metric.compute() + assert B.allclose(expected, result_cl, equal_nan=True) + + +@pytest.mark.parametrize("average", ["micro", "macro", "weighted"]) +def test_same_input(average): + preds = _input_miss_class.preds + target = _input_miss_class.target + preds_flat = B.cat(list(preds), dim=0) + target_flat = B.cat(list(target), dim=0) + + mc = Accuracy(num_classes=NUM_CLASSES, average=average) + for i in range(NUM_BATCHES): + mc.update(preds[i], target[i]) + class_res = mc.compute() + func_res = accuracy(preds_flat, target_flat, num_classes=NUM_CLASSES, average=average) + sk_res = sk_accuracy(target_flat, preds_flat) + + assert B.allclose(class_res, B.tensor(sk_res).float()) + assert B.allclose(func_res, B.tensor(sk_res).float()) diff --git a/RE/paddlemetric/src/tests/classification/test_auc.py b/RE/paddlemetric/src/tests/classification/test_auc.py new file mode 100644 index 00000000..df6e3ff7 --- /dev/null +++ b/RE/paddlemetric/src/tests/classification/test_auc.py @@ -0,0 +1,106 @@ +# Copyright The PyTorch Lightning team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from collections import namedtuple +from functools import partial + +import numpy as np +import pytest +from sklearn.metrics import auc as _sk_auc +from paddleext.torchapi import tensor + +from tests.helpers import seed_all +from tests.helpers.testers import NUM_BATCHES, MetricTester +from paddlemetrics.classification.auc import AUC +from paddlemetrics.functional import auc + +seed_all(42) + + +def sk_auc(x, y, reorder=False): + x = x.flatten() + y = y.flatten() + if reorder: + idx = np.argsort(x, kind="stable") + x = x[idx] + y = y[idx] + return _sk_auc(x, y) + + +Input = namedtuple("Input", ["x", "y"]) + +_examples = [] +# generate already ordered samples, sorted in both directions +for batch_size in (8, 4049): + for i in range(4): + x = np.random.rand(NUM_BATCHES * batch_size) + y = np.random.rand(NUM_BATCHES * batch_size) + idx = np.argsort(x, kind="stable") + x = x[idx] if i % 2 == 0 else x[idx[::-1]] + y = y[idx] if i % 2 == 0 else x[idx[::-1]] + x = x.reshape(NUM_BATCHES, batch_size) + y = y.reshape(NUM_BATCHES, batch_size) + _examples.append(Input(x=tensor(x), y=tensor(y))) + + +@pytest.mark.parametrize("x, y", _examples) +class TestAUC(MetricTester): + @pytest.mark.parametrize("ddp", [False]) + @pytest.mark.parametrize("dist_sync_on_step", [True, False]) + def test_auc(self, x, y, ddp, dist_sync_on_step): + self.run_class_metric_test( + ddp=ddp, + preds=x, + target=y, + metric_class=AUC, + sk_metric=sk_auc, + dist_sync_on_step=dist_sync_on_step, + ) + + @pytest.mark.parametrize("reorder", [True, False]) + def test_auc_functional(self, x, y, reorder): + self.run_functional_metric_test( + x, y, metric_functional=auc, sk_metric=partial(sk_auc, reorder=reorder), metric_args={"reorder": reorder} + ) + + @pytest.mark.parametrize("reorder", [True, False]) + def test_auc_differentiability(self, x, y, reorder): + self.run_differentiability_test( + preds=x, target=y, metric_module=AUC, metric_functional=auc, metric_args={"reorder": reorder} + ) + + +@pytest.mark.parametrize("unsqueeze_x", (True, False)) +@pytest.mark.parametrize("unsqueeze_y", (True, False)) +@pytest.mark.parametrize( + ["x", "y", "expected"], + [ + pytest.param([0, 1], [0, 1], 0.5), + pytest.param([1, 0], [0, 1], 0.5), + pytest.param([1, 0, 0], [0, 1, 1], 0.5), + pytest.param([0, 1], [1, 1], 1), + pytest.param([0, 0.5, 1], [0, 0.5, 1], 0.5), + ], +) +def test_auc(x, y, expected, unsqueeze_x, unsqueeze_y): + x = tensor(x) + y = tensor(y) + + if unsqueeze_x: + x = x.unsqueeze(-1) + + if unsqueeze_y: + y = y.unsqueeze(-1) + + # Test Area Under Curve (AUC) computation + assert auc(x, y, reorder=True) == expected diff --git a/RE/paddlemetric/src/tests/classification/test_auroc.py b/RE/paddlemetric/src/tests/classification/test_auroc.py new file mode 100644 index 00000000..36b43611 --- /dev/null +++ b/RE/paddlemetric/src/tests/classification/test_auroc.py @@ -0,0 +1,218 @@ +# Copyright The PyTorch Lightning team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from functools import partial + +import pytest +import paddleext.torchapi as B +from sklearn.metrics import roc_auc_score as sk_roc_auc_score + +from tests.classification.inputs import _input_binary_prob +from tests.classification.inputs import _input_multiclass_prob as _input_mcls_prob +from tests.classification.inputs import _input_multidim_multiclass_prob as _input_mdmc_prob +from tests.classification.inputs import _input_multilabel_multidim_prob as _input_mlmd_prob +from tests.classification.inputs import _input_multilabel_prob as _input_mlb_prob +from tests.helpers import seed_all +from tests.helpers.testers import NUM_CLASSES, MetricTester +from paddlemetrics.classification.auroc import AUROC +from paddlemetrics.functional import auroc +from paddlemetrics.utilities.imports import _TORCH_LOWER_1_6 + +seed_all(42) + + +def _sk_auroc_binary_prob(preds, target, num_classes, average="macro", max_fpr=None, multi_class="ovr"): + # todo: `multi_class` is unused + sk_preds = preds.view(-1).numpy() + sk_target = target.view(-1).numpy() + return sk_roc_auc_score(y_true=sk_target, y_score=sk_preds, average=average, max_fpr=max_fpr) + + +def _sk_auroc_multiclass_prob(preds, target, num_classes, average="macro", max_fpr=None, multi_class="ovr"): + sk_preds = preds.reshape(-1, num_classes).numpy() + sk_target = target.view(-1).numpy() + return sk_roc_auc_score( + y_true=sk_target, + y_score=sk_preds, + average=average, + max_fpr=max_fpr, + multi_class=multi_class, + ) + + +def _sk_auroc_multidim_multiclass_prob(preds, target, num_classes, average="macro", max_fpr=None, multi_class="ovr"): + sk_preds = preds.transpose(0, 1).reshape(num_classes, -1).transpose(0, 1).numpy() + sk_target = target.view(-1).numpy() + return sk_roc_auc_score( + y_true=sk_target, + y_score=sk_preds, + average=average, + max_fpr=max_fpr, + multi_class=multi_class, + ) + + +def _sk_auroc_multilabel_prob(preds, target, num_classes, average="macro", max_fpr=None, multi_class="ovr"): + sk_preds = preds.reshape(-1, num_classes).numpy() + sk_target = target.reshape(-1, num_classes).numpy() + return sk_roc_auc_score( + y_true=sk_target, + y_score=sk_preds, + average=average, + max_fpr=max_fpr, + multi_class=multi_class, + ) + + +def _sk_auroc_multilabel_multidim_prob(preds, target, num_classes, average="macro", max_fpr=None, multi_class="ovr"): + sk_preds = preds.transpose(0, 1).reshape(num_classes, -1).transpose(0, 1).numpy() + sk_target = target.transpose(0, 1).reshape(num_classes, -1).transpose(0, 1).numpy() + return sk_roc_auc_score( + y_true=sk_target, + y_score=sk_preds, + average=average, + max_fpr=max_fpr, + multi_class=multi_class, + ) + + +@pytest.mark.parametrize("average", ["macro", "weighted", "micro"]) +@pytest.mark.parametrize("max_fpr", [None, 0.8, 0.5]) +@pytest.mark.parametrize( + "preds, target, sk_metric, num_classes", + [ + (_input_binary_prob.preds, _input_binary_prob.target, _sk_auroc_binary_prob, 1), + (_input_mcls_prob.preds, _input_mcls_prob.target, _sk_auroc_multiclass_prob, NUM_CLASSES), + (_input_mdmc_prob.preds, _input_mdmc_prob.target, _sk_auroc_multidim_multiclass_prob, NUM_CLASSES), + (_input_mlb_prob.preds, _input_mlb_prob.target, _sk_auroc_multilabel_prob, NUM_CLASSES), + (_input_mlmd_prob.preds, _input_mlmd_prob.target, _sk_auroc_multilabel_multidim_prob, NUM_CLASSES), + ], +) +class TestAUROC(MetricTester): + @pytest.mark.parametrize("ddp", [True, False]) + @pytest.mark.parametrize("dist_sync_on_step", [True, False]) + def test_auroc(self, preds, target, sk_metric, num_classes, average, max_fpr, ddp, dist_sync_on_step): + # max_fpr different from None is not support in multi class + if max_fpr is not None and num_classes != 1: + pytest.skip("max_fpr parameter not support for multi class or multi label") + + # max_fpr only supported for torch v1.6 or higher + if max_fpr is not None and _TORCH_LOWER_1_6: + pytest.skip("requires torch v1.6 or higher to test max_fpr argument") + + # average='micro' only supported for multilabel + if average == "micro" and preds.ndim > 2 and preds.ndim == target.ndim + 1: + pytest.skip("micro argument only support for multilabel input") + + self.run_class_metric_test( + ddp=ddp, + preds=preds, + target=target, + metric_class=AUROC, + sk_metric=partial(sk_metric, num_classes=num_classes, average=average, max_fpr=max_fpr), + dist_sync_on_step=dist_sync_on_step, + metric_args={"num_classes": num_classes, "average": average, "max_fpr": max_fpr}, + ) + + def test_auroc_functional(self, preds, target, sk_metric, num_classes, average, max_fpr): + # max_fpr different from None is not support in multi class + if max_fpr is not None and num_classes != 1: + pytest.skip("max_fpr parameter not support for multi class or multi label") + + # max_fpr only supported for torch v1.6 or higher + if max_fpr is not None and _TORCH_LOWER_1_6: + pytest.skip("requires torch v1.6 or higher to test max_fpr argument") + + # average='micro' only supported for multilabel + if average == "micro" and preds.ndim > 2 and preds.ndim == target.ndim + 1: + pytest.skip("micro argument only support for multilabel input") + + self.run_functional_metric_test( + preds, + target, + metric_functional=auroc, + sk_metric=partial(sk_metric, num_classes=num_classes, average=average, max_fpr=max_fpr), + metric_args={"num_classes": num_classes, "average": average, "max_fpr": max_fpr}, + ) + + def test_auroc_differentiability(self, preds, target, sk_metric, num_classes, average, max_fpr): + # max_fpr different from None is not support in multi class + if max_fpr is not None and num_classes != 1: + pytest.skip("max_fpr parameter not support for multi class or multi label") + + # max_fpr only supported for torch v1.6 or higher + if max_fpr is not None and _TORCH_LOWER_1_6: + pytest.skip("requires torch v1.6 or higher to test max_fpr argument") + + # average='micro' only supported for multilabel + if average == "micro" and preds.ndim > 2 and preds.ndim == target.ndim + 1: + pytest.skip("micro argument only support for multilabel input") + + self.run_differentiability_test( + preds=preds, + target=target, + metric_module=AUROC, + metric_functional=auroc, + metric_args={"num_classes": num_classes, "average": average, "max_fpr": max_fpr}, + ) + + +def test_error_on_different_mode(): + """test that an error is raised if the user pass in data of different modes (binary, multi-label, multi- + class)""" + metric = AUROC() + # pass in multi-class data + metric.update(B.randn(10, 5).softmax(dim=-1), B.randint(0, 5, (10,))) + with pytest.raises(ValueError, match=r"The mode of data.* should be constant.*"): + # pass in multi-label data + metric.update(B.rand(10, 5), B.randint(0, 2, (10, 5))) + + +def test_error_multiclass_no_num_classes(): + with pytest.raises( + ValueError, match="Detected input to `multiclass` but you did not provide `num_classes` argument" + ): + _ = auroc(B.randn(20, 3).softmax(dim=-1), B.randint(3, (20,))) + + +def test_weighted_with_empty_classes(): + """Tests that weighted multiclass AUROC calculation yields the same results if a new but empty class exists. + + Tests that the proper warnings and errors are raised + """ + preds = B.tensor( + [ + [0.90, 0.05, 0.05], + [0.05, 0.90, 0.05], + [0.05, 0.05, 0.90], + [0.85, 0.05, 0.10], + [0.10, 0.10, 0.80], + ] + ) + target = B.tensor([0, 1, 1, 2, 2]) + num_classes = 3 + _auroc = auroc(preds, target, average="weighted", num_classes=num_classes) + + # Add in a class with zero observations at second to last index + preds = B.cat( + (preds[:, : num_classes - 1], B.rand_like(preds[:, 0:1]), preds[:, num_classes - 1 :]), axis=1 + ) + # Last class (2) gets moved to 3 + target[target == num_classes - 1] = num_classes + with pytest.warns(UserWarning, match="Class 2 had 0 observations, omitted from AUROC calculation"): + _auroc_empty_class = auroc(preds, target, average="weighted", num_classes=num_classes + 1) + assert _auroc == _auroc_empty_class + + target = B.zeros_like(target) + with pytest.raises(ValueError, match="Found 1 non-empty class in `multiclass` AUROC calculation"): + _ = auroc(preds, target, average="weighted", num_classes=num_classes + 1) diff --git a/RE/paddlemetric/src/tests/classification/test_average_precision.py b/RE/paddlemetric/src/tests/classification/test_average_precision.py new file mode 100644 index 00000000..aea088cc --- /dev/null +++ b/RE/paddlemetric/src/tests/classification/test_average_precision.py @@ -0,0 +1,170 @@ +# Copyright The PyTorch Lightning team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from functools import partial + +import numpy as np +import pytest +from sklearn.metrics import average_precision_score as sk_average_precision_score +from paddleext.torchapi import tensor + +from tests.classification.inputs import _input_binary_prob +from tests.classification.inputs import _input_multiclass_prob as _input_mcls_prob +from tests.classification.inputs import _input_multidim_multiclass_prob as _input_mdmc_prob +from tests.classification.inputs import _input_multilabel +from tests.helpers import seed_all +from tests.helpers.testers import NUM_CLASSES, MetricTester +from paddlemetrics.classification.average_precision import AveragePrecision +from paddlemetrics.functional import average_precision + +seed_all(42) + + +def _sk_average_precision_score(y_true, probas_pred, num_classes=1, average=None): + if num_classes == 1: + return sk_average_precision_score(y_true, probas_pred) + + res = [] + for i in range(num_classes): + y_true_temp = np.zeros_like(y_true) + y_true_temp[y_true == i] = 1 + res.append(sk_average_precision_score(y_true_temp, probas_pred[:, i])) + + if average == "macro": + return np.array(res).mean() + if average == "weighted": + weights = np.bincount(y_true) if y_true.max() > 1 else y_true.sum(axis=0) + weights = weights / sum(weights) + return (np.array(res) * weights).sum() + + return res + + +def _sk_avg_prec_binary_prob(preds, target, num_classes=1, average=None): + sk_preds = preds.view(-1).numpy() + sk_target = target.view(-1).numpy() + + return _sk_average_precision_score(y_true=sk_target, probas_pred=sk_preds, num_classes=num_classes, average=average) + + +def _sk_avg_prec_multiclass_prob(preds, target, num_classes=1, average=None): + sk_preds = preds.reshape(-1, num_classes).numpy() + sk_target = target.view(-1).numpy() + + return _sk_average_precision_score(y_true=sk_target, probas_pred=sk_preds, num_classes=num_classes, average=average) + + +def _sk_avg_prec_multilabel_prob(preds, target, num_classes=1, average=None): + sk_preds = preds.reshape(-1, num_classes).numpy() + sk_target = target.view(-1, num_classes).numpy() + return sk_average_precision_score(sk_target, sk_preds, average=average) + + +def _sk_avg_prec_multidim_multiclass_prob(preds, target, num_classes=1, average=None): + sk_preds = preds.transpose(0, 1).reshape(num_classes, -1).transpose(0, 1).numpy() + sk_target = target.view(-1).numpy() + return _sk_average_precision_score(y_true=sk_target, probas_pred=sk_preds, num_classes=num_classes, average=average) + + +@pytest.mark.parametrize( + "preds, target, sk_metric, num_classes", + [ + (_input_binary_prob.preds, _input_binary_prob.target, _sk_avg_prec_binary_prob, 1), + (_input_mcls_prob.preds, _input_mcls_prob.target, _sk_avg_prec_multiclass_prob, NUM_CLASSES), + (_input_mdmc_prob.preds, _input_mdmc_prob.target, _sk_avg_prec_multidim_multiclass_prob, NUM_CLASSES), + (_input_multilabel.preds, _input_multilabel.target, _sk_avg_prec_multilabel_prob, NUM_CLASSES), + ], +) +@pytest.mark.parametrize("average", ["micro", "macro", "weighted", None]) +class TestAveragePrecision(MetricTester): + @pytest.mark.parametrize("ddp", [True, False]) + @pytest.mark.parametrize("dist_sync_on_step", [True, False]) + def test_average_precision(self, preds, target, sk_metric, num_classes, average, ddp, dist_sync_on_step): + if target.max() > 1 and average == "micro": + pytest.skip("average=micro and multiclass input cannot be used together") + + self.run_class_metric_test( + ddp=ddp, + preds=preds, + target=target, + metric_class=AveragePrecision, + sk_metric=partial(sk_metric, num_classes=num_classes, average=average), + dist_sync_on_step=dist_sync_on_step, + metric_args={"num_classes": num_classes, "average": average}, + ) + + def test_average_precision_functional(self, preds, target, sk_metric, num_classes, average): + if target.max() > 1 and average == "micro": + pytest.skip("average=micro and multiclass input cannot be used together") + + self.run_functional_metric_test( + preds=preds, + target=target, + metric_functional=average_precision, + sk_metric=partial(sk_metric, num_classes=num_classes, average=average), + metric_args={"num_classes": num_classes, "average": average}, + ) + + def test_average_precision_differentiability(self, preds, sk_metric, target, num_classes, average): + self.run_differentiability_test( + preds=preds, + target=target, + metric_module=AveragePrecision, + metric_functional=average_precision, + metric_args={"num_classes": num_classes}, + ) + + +@pytest.mark.parametrize( + ["scores", "target", "expected_score"], + [ + # Check the average_precision_score of a constant predictor is + # the TPR + # Generate a dataset with 25% of positives + # And a constant score + # The precision is then the fraction of positive whatever the recall + # is, as there is only one threshold: + pytest.param(tensor([1, 1, 1, 1]), tensor([0, 0, 0, 1]), 0.25), + # With threshold 0.8 : 1 TP and 2 TN and one FN + pytest.param(tensor([0.6, 0.7, 0.8, 9]), tensor([1, 0, 0, 1]), 0.75), + ], +) +def test_average_precision(scores, target, expected_score): + assert average_precision(scores, target) == expected_score + + +def test_average_precision_warnings_and_errors(): + """Test that the correct errors and warnings gets raised.""" + + # check average argument + with pytest.raises(ValueError, match="Expected argument `average` to be one .*"): + AveragePrecision(num_classes=5, average="samples") + + # check that micro average cannot be used with multilabel input + pred = tensor( + [ + [0.75, 0.05, 0.05, 0.05, 0.05], + [0.05, 0.75, 0.05, 0.05, 0.05], + [0.05, 0.05, 0.75, 0.05, 0.05], + [0.05, 0.05, 0.05, 0.75, 0.05], + ] + ) + target = tensor([0, 1, 3, 2]) + average_precision = AveragePrecision(num_classes=5, average="micro") + with pytest.raises(ValueError, match="Cannot use `micro` average with multi-class input"): + average_precision(pred, target) + + # check that warning is thrown when average=macro and nan is encoutered in individual scores + average_precision = AveragePrecision(num_classes=5, average="macro") + with pytest.warns(UserWarning, match="Average precision score for one or more classes was `nan`.*"): + average_precision(pred, target) diff --git a/RE/paddlemetric/src/tests/classification/test_binned_precision_recall.py b/RE/paddlemetric/src/tests/classification/test_binned_precision_recall.py new file mode 100644 index 00000000..a1ea3376 --- /dev/null +++ b/RE/paddlemetric/src/tests/classification/test_binned_precision_recall.py @@ -0,0 +1,129 @@ +# Copyright The PyTorch Lightning team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from functools import partial +from typing import Tuple + +import numpy as np +import pytest +import paddleext.torchapi as B +from sklearn.metrics import average_precision_score as _sk_average_precision_score +from sklearn.metrics import precision_recall_curve as _sk_precision_recall_curve +from paddleext.torchapi import Tensor + +from tests.classification.inputs import _input_binary_prob +from tests.classification.inputs import _input_binary_prob_plausible as _input_binary_prob_ok +from tests.classification.inputs import _input_multilabel_prob as _input_mlb_prob +from tests.classification.inputs import _input_multilabel_prob_plausible as _input_mlb_prob_ok +from tests.helpers import seed_all +from tests.helpers.testers import NUM_CLASSES, MetricTester +from paddlemetrics.classification.binned_precision_recall import BinnedAveragePrecision, BinnedRecallAtFixedPrecision + +seed_all(42) + + +def recall_at_precision_x_multilabel(predictions: Tensor, targets: Tensor, min_precision: float) -> Tuple[float, float]: + precision, recall, thresholds = _sk_precision_recall_curve(targets, predictions) + + try: + tuple_all = [(r, p, t) for p, r, t in zip(precision, recall, thresholds) if p >= min_precision] + max_recall, _, best_threshold = max(tuple_all) + except ValueError: + max_recall, best_threshold = 0, 1e6 + + return float(max_recall), float(best_threshold) + + +def _sk_prec_recall_mclass_prob(predictions, targets, num_classes, min_precision): + max_recalls = B.zeros(num_classes) + best_thresholds = B.zeros(num_classes) + + for i in range(num_classes): + max_recalls[i], best_thresholds[i] = recall_at_precision_x_multilabel( + predictions[:, i], targets[:, i], min_precision + ) + return max_recalls, best_thresholds + + +def _sk_prec_recall_binary_prob(predictions, targets, num_classes, min_precision): + return recall_at_precision_x_multilabel(predictions, targets, min_precision) + + +def _sk_avg_prec_multiclass(predictions, targets, num_classes): + # replace nan with 0 + return np.nan_to_num(_sk_average_precision_score(targets, predictions, average=None)) + + +@pytest.mark.parametrize( + "preds, target, sk_metric, num_classes", + [ + (_input_binary_prob.preds, _input_binary_prob.target, _sk_prec_recall_binary_prob, 1), + (_input_binary_prob_ok.preds, _input_binary_prob_ok.target, _sk_prec_recall_binary_prob, 1), + (_input_mlb_prob_ok.preds, _input_mlb_prob_ok.target, _sk_prec_recall_mclass_prob, NUM_CLASSES), + (_input_mlb_prob.preds, _input_mlb_prob.target, _sk_prec_recall_mclass_prob, NUM_CLASSES), + ], +) +class TestBinnedRecallAtPrecision(MetricTester): + atol = 0.02 + + @pytest.mark.parametrize("ddp", [True, False]) + @pytest.mark.parametrize("dist_sync_on_step", [True, False]) + @pytest.mark.parametrize("min_precision", [0.05, 0.1, 0.3, 0.5, 0.8, 0.95]) + def test_binned_recall_at_precision( + self, preds, target, sk_metric, num_classes, ddp, dist_sync_on_step, min_precision + ): + # rounding will simulate binning for both implementations + preds = Tensor(np.round(preds.numpy(), 2)) + 1e-6 + + self.run_class_metric_test( + ddp=ddp, + preds=preds, + target=target, + metric_class=BinnedRecallAtFixedPrecision, + sk_metric=partial(sk_metric, num_classes=num_classes, min_precision=min_precision), + dist_sync_on_step=dist_sync_on_step, + metric_args={ + "num_classes": num_classes, + "min_precision": min_precision, + "thresholds": 101, + }, + ) + + +@pytest.mark.parametrize( + "preds, target, sk_metric, num_classes", + [ + (_input_binary_prob.preds, _input_binary_prob.target, _sk_avg_prec_multiclass, 1), + (_input_binary_prob_ok.preds, _input_binary_prob_ok.target, _sk_avg_prec_multiclass, 1), + (_input_mlb_prob_ok.preds, _input_mlb_prob_ok.target, _sk_avg_prec_multiclass, NUM_CLASSES), + (_input_mlb_prob.preds, _input_mlb_prob.target, _sk_avg_prec_multiclass, NUM_CLASSES), + ], +) +class TestBinnedAveragePrecision(MetricTester): + @pytest.mark.parametrize("ddp", [True, False]) + @pytest.mark.parametrize("dist_sync_on_step", [True, False]) + @pytest.mark.parametrize("thresholds", (301, B.linspace(0.0, 1.0, 101))) + def test_binned_average_precision(self, preds, target, sk_metric, num_classes, ddp, dist_sync_on_step, thresholds): + # rounding will simulate binning for both implementations + preds = Tensor(np.round(preds.numpy(), 2)) + 1e-6 + + self.run_class_metric_test( + ddp=ddp, + preds=preds, + target=target, + metric_class=BinnedAveragePrecision, + sk_metric=partial(sk_metric, num_classes=num_classes), + dist_sync_on_step=dist_sync_on_step, + metric_args={"num_classes": num_classes, "thresholds": thresholds}, + ) diff --git a/RE/paddlemetric/src/tests/classification/test_calibration_error.py b/RE/paddlemetric/src/tests/classification/test_calibration_error.py new file mode 100644 index 00000000..f0a470fc --- /dev/null +++ b/RE/paddlemetric/src/tests/classification/test_calibration_error.py @@ -0,0 +1,114 @@ +import functools +import re + +import numpy as np +import pytest + +from tests.classification.inputs import _input_binary_prob +from tests.classification.inputs import _input_multiclass_prob as _input_mcls_prob +from tests.classification.inputs import _input_multidim_multiclass_prob as _input_mdmc_prob +from tests.classification.inputs import _input_multilabel_prob as _input_mlb_prob +from tests.helpers import seed_all + +# TODO: replace this with official sklearn implementation after next sklearn release +from tests.helpers.non_sklearn_metrics import calibration_error as sk_calib +from tests.helpers.testers import THRESHOLD, MetricTester +from paddlemetrics import CalibrationError +from paddlemetrics.functional import calibration_error +from paddlemetrics.utilities.checks import _input_format_classification +from paddlemetrics.utilities.enums import DataType + +seed_all(42) + + +def _sk_calibration(preds, target, n_bins, norm, debias=False): + _, _, mode = _input_format_classification(preds, target, threshold=THRESHOLD) + sk_preds, sk_target = preds.numpy(), target.numpy() + + if mode == DataType.MULTICLASS: + # binary label is whether or not the predicted class is correct + sk_target = np.equal(np.argmax(sk_preds, axis=1), sk_target) + sk_preds = np.max(sk_preds, axis=1) + elif mode == DataType.MULTIDIM_MULTICLASS: + # reshape from shape (N, C, ...) to (N*EXTRA_DIMS, C) + sk_preds = np.transpose(sk_preds, axes=(0, 2, 1)) + sk_preds = sk_preds.reshape(np.prod(sk_preds.shape[:-1]), sk_preds.shape[-1]) + # reshape from shape (N, ...) to (N*EXTRA_DIMS,) + # binary label is whether or not the predicted class is correct + sk_target = np.equal(np.argmax(sk_preds, axis=1), sk_target.flatten()) + sk_preds = np.max(sk_preds, axis=1) + return sk_calib(y_true=sk_target, y_prob=sk_preds, norm=norm, n_bins=n_bins, reduce_bias=debias) + + +@pytest.mark.parametrize("n_bins", [10, 15, 20]) +@pytest.mark.parametrize("norm", ["l1", "l2", "max"]) +@pytest.mark.parametrize( + "preds, target", + [ + (_input_binary_prob.preds, _input_binary_prob.target), + (_input_mcls_prob.preds, _input_mcls_prob.target), + (_input_mdmc_prob.preds, _input_mdmc_prob.target), + ], +) +class TestCE(MetricTester): + @pytest.mark.parametrize("ddp", [True, False]) + @pytest.mark.parametrize("dist_sync_on_step", [True, False]) + def test_ce(self, preds, target, n_bins, ddp, dist_sync_on_step, norm): + self.run_class_metric_test( + ddp=ddp, + preds=preds, + target=target, + metric_class=CalibrationError, + sk_metric=functools.partial(_sk_calibration, n_bins=n_bins, norm=norm), + dist_sync_on_step=dist_sync_on_step, + metric_args={"n_bins": n_bins, "norm": norm}, + ) + + def test_ce_functional(self, preds, target, n_bins, norm): + self.run_functional_metric_test( + preds, + target, + metric_functional=calibration_error, + sk_metric=functools.partial(_sk_calibration, n_bins=n_bins, norm=norm), + metric_args={"n_bins": n_bins, "norm": norm}, + ) + + +@pytest.mark.parametrize("preds, targets", [(_input_mlb_prob.preds, _input_mlb_prob.target)]) +def test_invalid_input(preds, targets): + for p, t in zip(preds, targets): + with pytest.raises( + ValueError, + match=re.escape( + f"Calibration error is not well-defined for data with size {p.size()} and targets {t.size()}." + ), + ): + calibration_error(p, t) + + +@pytest.mark.parametrize( + "preds, target", + [ + (_input_binary_prob.preds, _input_binary_prob.target), + (_input_mcls_prob.preds, _input_mcls_prob.target), + (_input_mdmc_prob.preds, _input_mdmc_prob.target), + ], +) +def test_invalid_norm(preds, target): + with pytest.raises(ValueError, match="Norm l3 is not supported. Please select from l1, l2, or max. "): + calibration_error(preds, target, norm="l3") + + +@pytest.mark.parametrize("n_bins", [-10, -1, "fsd"]) +@pytest.mark.parametrize( + "preds, targets", + [ + (_input_binary_prob.preds, _input_binary_prob.target), + (_input_mcls_prob.preds, _input_mcls_prob.target), + (_input_mdmc_prob.preds, _input_mdmc_prob.target), + ], +) +def test_invalid_bins(preds, targets, n_bins): + for p, t in zip(preds, targets): + with pytest.raises(ValueError, match=f"Expected argument `n_bins` to be a int larger than 0 but got {n_bins}"): + calibration_error(p, t, n_bins=n_bins) diff --git a/RE/paddlemetric/src/tests/classification/test_cohen_kappa.py b/RE/paddlemetric/src/tests/classification/test_cohen_kappa.py new file mode 100644 index 00000000..d79cc8d8 --- /dev/null +++ b/RE/paddlemetric/src/tests/classification/test_cohen_kappa.py @@ -0,0 +1,133 @@ +from functools import partial + +import numpy as np +import pytest +import paddleext.torchapi as B +from sklearn.metrics import cohen_kappa_score as sk_cohen_kappa + +from tests.classification.inputs import _input_binary, _input_binary_prob +from tests.classification.inputs import _input_multiclass as _input_mcls +from tests.classification.inputs import _input_multiclass_prob as _input_mcls_prob +from tests.classification.inputs import _input_multidim_multiclass as _input_mdmc +from tests.classification.inputs import _input_multidim_multiclass_prob as _input_mdmc_prob +from tests.classification.inputs import _input_multilabel as _input_mlb +from tests.classification.inputs import _input_multilabel_prob as _input_mlb_prob +from tests.helpers import seed_all +from tests.helpers.testers import NUM_CLASSES, THRESHOLD, MetricTester +from paddlemetrics.classification.cohen_kappa import CohenKappa +from paddlemetrics.functional.classification.cohen_kappa import cohen_kappa + +seed_all(42) + + +def _sk_cohen_kappa_binary_prob(preds, target, weights=None): + sk_preds = (preds.view(-1).numpy() >= THRESHOLD).astype(np.uint8) + sk_target = target.view(-1).numpy() + + return sk_cohen_kappa(y1=sk_target, y2=sk_preds, weights=weights) + + +def _sk_cohen_kappa_binary(preds, target, weights=None): + sk_preds = preds.view(-1).numpy() + sk_target = target.view(-1).numpy() + + return sk_cohen_kappa(y1=sk_target, y2=sk_preds, weights=weights) + + +def _sk_cohen_kappa_multilabel_prob(preds, target, weights=None): + sk_preds = (preds.view(-1).numpy() >= THRESHOLD).astype(np.uint8) + sk_target = target.view(-1).numpy() + + return sk_cohen_kappa(y1=sk_target, y2=sk_preds, weights=weights) + + +def _sk_cohen_kappa_multilabel(preds, target, weights=None): + sk_preds = preds.view(-1).numpy() + sk_target = target.view(-1).numpy() + + return sk_cohen_kappa(y1=sk_target, y2=sk_preds, weights=weights) + + +def _sk_cohen_kappa_multiclass_prob(preds, target, weights=None): + sk_preds = B.argmax(preds, dim=len(preds.shape) - 1).view(-1).numpy() + sk_target = target.view(-1).numpy() + + return sk_cohen_kappa(y1=sk_target, y2=sk_preds, weights=weights) + + +def _sk_cohen_kappa_multiclass(preds, target, weights=None): + sk_preds = preds.view(-1).numpy() + sk_target = target.view(-1).numpy() + + return sk_cohen_kappa(y1=sk_target, y2=sk_preds, weights=weights) + + +def _sk_cohen_kappa_multidim_multiclass_prob(preds, target, weights=None): + sk_preds = B.argmax(preds, dim=len(preds.shape) - 2).view(-1).numpy() + sk_target = target.view(-1).numpy() + + return sk_cohen_kappa(y1=sk_target, y2=sk_preds, weights=weights) + + +def _sk_cohen_kappa_multidim_multiclass(preds, target, weights=None): + sk_preds = preds.view(-1).numpy() + sk_target = target.view(-1).numpy() + + return sk_cohen_kappa(y1=sk_target, y2=sk_preds, weights=weights) + + +@pytest.mark.parametrize("weights", ["linear", "quadratic", None]) +@pytest.mark.parametrize( + "preds, target, sk_metric, num_classes", + [ + (_input_binary_prob.preds, _input_binary_prob.target, _sk_cohen_kappa_binary_prob, 2), + (_input_binary.preds, _input_binary.target, _sk_cohen_kappa_binary, 2), + (_input_mlb_prob.preds, _input_mlb_prob.target, _sk_cohen_kappa_multilabel_prob, 2), + (_input_mlb.preds, _input_mlb.target, _sk_cohen_kappa_multilabel, 2), + (_input_mcls_prob.preds, _input_mcls_prob.target, _sk_cohen_kappa_multiclass_prob, NUM_CLASSES), + (_input_mcls.preds, _input_mcls.target, _sk_cohen_kappa_multiclass, NUM_CLASSES), + (_input_mdmc_prob.preds, _input_mdmc_prob.target, _sk_cohen_kappa_multidim_multiclass_prob, NUM_CLASSES), + (_input_mdmc.preds, _input_mdmc.target, _sk_cohen_kappa_multidim_multiclass, NUM_CLASSES), + ], +) +class TestCohenKappa(MetricTester): + atol = 1e-5 + + @pytest.mark.parametrize("ddp", [True, False]) + @pytest.mark.parametrize("dist_sync_on_step", [True, False]) + def test_cohen_kappa(self, weights, preds, target, sk_metric, num_classes, ddp, dist_sync_on_step): + self.run_class_metric_test( + ddp=ddp, + preds=preds, + target=target, + metric_class=CohenKappa, + sk_metric=partial(sk_metric, weights=weights), + dist_sync_on_step=dist_sync_on_step, + metric_args={"num_classes": num_classes, "threshold": THRESHOLD, "weights": weights}, + ) + + def test_cohen_kappa_functional(self, weights, preds, target, sk_metric, num_classes): + self.run_functional_metric_test( + preds, + target, + metric_functional=cohen_kappa, + sk_metric=partial(sk_metric, weights=weights), + metric_args={"num_classes": num_classes, "threshold": THRESHOLD, "weights": weights}, + ) + + def test_cohen_kappa_differentiability(self, preds, target, sk_metric, weights, num_classes): + self.run_differentiability_test( + preds=preds, + target=target, + metric_module=CohenKappa, + metric_functional=cohen_kappa, + metric_args={"num_classes": num_classes, "threshold": THRESHOLD, "weights": weights}, + ) + + +def test_warning_on_wrong_weights(tmpdir): + preds = B.randint(3, size=(20,)) + target = B.randint(3, size=(20,)) + + with pytest.raises(ValueError, match=".* ``weights`` but should be either None, 'linear' or 'quadratic'"): + cohen_kappa(preds, target, num_classes=3, weights="unknown_arg") diff --git a/RE/paddlemetric/src/tests/classification/test_confusion_matrix.py b/RE/paddlemetric/src/tests/classification/test_confusion_matrix.py new file mode 100644 index 00000000..9ae6fa81 --- /dev/null +++ b/RE/paddlemetric/src/tests/classification/test_confusion_matrix.py @@ -0,0 +1,188 @@ +# Copyright The PyTorch Lightning team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from functools import partial + +import numpy as np +import pytest +import paddleext.torchapi as B +from sklearn.metrics import confusion_matrix as sk_confusion_matrix +from sklearn.metrics import multilabel_confusion_matrix as sk_multilabel_confusion_matrix + +from tests.classification.inputs import _input_binary, _input_binary_logits, _input_binary_prob +from tests.classification.inputs import _input_multiclass as _input_mcls +from tests.classification.inputs import _input_multiclass_logits as _input_mcls_logits +from tests.classification.inputs import _input_multiclass_prob as _input_mcls_prob +from tests.classification.inputs import _input_multidim_multiclass as _input_mdmc +from tests.classification.inputs import _input_multidim_multiclass_prob as _input_mdmc_prob +from tests.classification.inputs import _input_multilabel as _input_mlb +from tests.classification.inputs import _input_multilabel_logits as _input_mlb_logits +from tests.classification.inputs import _input_multilabel_prob as _input_mlb_prob +from tests.helpers import seed_all +from tests.helpers.testers import NUM_CLASSES, THRESHOLD, MetricTester +from paddlemetrics.classification.confusion_matrix import ConfusionMatrix +from paddlemetrics.functional import confusion_matrix + +seed_all(42) + + +def _sk_cm_binary_prob(preds, target, normalize=None): + sk_preds = (preds.view(-1).numpy() >= THRESHOLD).astype(np.uint8) + sk_target = target.view(-1).numpy() + + return sk_confusion_matrix(y_true=sk_target, y_pred=sk_preds, normalize=normalize) + + +def _sk_cm_binary(preds, target, normalize=None): + sk_preds = preds.view(-1).numpy() + sk_target = target.view(-1).numpy() + + return sk_confusion_matrix(y_true=sk_target, y_pred=sk_preds, normalize=normalize) + + +def _sk_cm_multilabel_prob(preds, target, normalize=None): + sk_preds = (preds.numpy() >= THRESHOLD).astype(np.uint8) + sk_target = target.numpy() + + cm = sk_multilabel_confusion_matrix(y_true=sk_target, y_pred=sk_preds) + if normalize is not None: + if normalize == "true": + cm = cm / cm.sum(axis=1, keepdims=True) + elif normalize == "pred": + cm = cm / cm.sum(axis=0, keepdims=True) + elif normalize == "all": + cm = cm / cm.sum() + cm[np.isnan(cm)] = 0 + return cm + + +def _sk_cm_multilabel(preds, target, normalize=None): + sk_preds = preds.numpy() + sk_target = target.numpy() + + cm = sk_multilabel_confusion_matrix(y_true=sk_target, y_pred=sk_preds) + if normalize is not None: + if normalize == "true": + cm = cm / cm.sum(axis=1, keepdims=True) + elif normalize == "pred": + cm = cm / cm.sum(axis=0, keepdims=True) + elif normalize == "all": + cm = cm / cm.sum() + cm[np.isnan(cm)] = 0 + return cm + + +def _sk_cm_multiclass_prob(preds, target, normalize=None): + sk_preds = B.argmax(preds, dim=len(preds.shape) - 1).view(-1).numpy() + sk_target = target.view(-1).numpy() + + return sk_confusion_matrix(y_true=sk_target, y_pred=sk_preds, normalize=normalize) + + +def _sk_cm_multiclass(preds, target, normalize=None): + sk_preds = preds.view(-1).numpy() + sk_target = target.view(-1).numpy() + + return sk_confusion_matrix(y_true=sk_target, y_pred=sk_preds, normalize=normalize) + + +def _sk_cm_multidim_multiclass_prob(preds, target, normalize=None): + sk_preds = B.argmax(preds, dim=len(preds.shape) - 2).view(-1).numpy() + sk_target = target.view(-1).numpy() + + return sk_confusion_matrix(y_true=sk_target, y_pred=sk_preds, normalize=normalize) + + +def _sk_cm_multidim_multiclass(preds, target, normalize=None): + sk_preds = preds.view(-1).numpy() + sk_target = target.view(-1).numpy() + + return sk_confusion_matrix(y_true=sk_target, y_pred=sk_preds, normalize=normalize) + + +@pytest.mark.parametrize("normalize", ["true", "pred", "all", None]) +@pytest.mark.parametrize( + "preds, target, sk_metric, num_classes, multilabel", + [ + (_input_binary_prob.preds, _input_binary_prob.target, _sk_cm_binary_prob, 2, False), + (_input_binary_logits.preds, _input_binary_logits.target, _sk_cm_binary_prob, 2, False), + (_input_binary.preds, _input_binary.target, _sk_cm_binary, 2, False), + (_input_mlb_prob.preds, _input_mlb_prob.target, _sk_cm_multilabel_prob, NUM_CLASSES, True), + (_input_mlb_logits.preds, _input_mlb_logits.target, _sk_cm_multilabel_prob, NUM_CLASSES, True), + (_input_mlb.preds, _input_mlb.target, _sk_cm_multilabel, NUM_CLASSES, True), + (_input_mcls_prob.preds, _input_mcls_prob.target, _sk_cm_multiclass_prob, NUM_CLASSES, False), + (_input_mcls_logits.preds, _input_mcls_logits.target, _sk_cm_multiclass_prob, NUM_CLASSES, False), + (_input_mcls.preds, _input_mcls.target, _sk_cm_multiclass, NUM_CLASSES, False), + (_input_mdmc_prob.preds, _input_mdmc_prob.target, _sk_cm_multidim_multiclass_prob, NUM_CLASSES, False), + (_input_mdmc.preds, _input_mdmc.target, _sk_cm_multidim_multiclass, NUM_CLASSES, False), + ], +) +class TestConfusionMatrix(MetricTester): + @pytest.mark.parametrize("ddp", [True, False]) + @pytest.mark.parametrize("dist_sync_on_step", [True, False]) + def test_confusion_matrix( + self, normalize, preds, target, sk_metric, num_classes, multilabel, ddp, dist_sync_on_step + ): + self.run_class_metric_test( + ddp=ddp, + preds=preds, + target=target, + metric_class=ConfusionMatrix, + sk_metric=partial(sk_metric, normalize=normalize), + dist_sync_on_step=dist_sync_on_step, + metric_args={ + "num_classes": num_classes, + "threshold": THRESHOLD, + "normalize": normalize, + "multilabel": multilabel, + }, + ) + + def test_confusion_matrix_functional(self, normalize, preds, target, sk_metric, num_classes, multilabel): + self.run_functional_metric_test( + preds=preds, + target=target, + metric_functional=confusion_matrix, + sk_metric=partial(sk_metric, normalize=normalize), + metric_args={ + "num_classes": num_classes, + "threshold": THRESHOLD, + "normalize": normalize, + "multilabel": multilabel, + }, + ) + + def test_confusion_matrix_differentiability(self, normalize, preds, target, sk_metric, num_classes, multilabel): + self.run_differentiability_test( + preds=preds, + target=target, + metric_module=ConfusionMatrix, + metric_functional=confusion_matrix, + metric_args={ + "num_classes": num_classes, + "threshold": THRESHOLD, + "normalize": normalize, + "multilabel": multilabel, + }, + ) + + +def test_warning_on_nan(tmpdir): + preds = B.randint(3, size=(20,)) + target = B.randint(3, size=(20,)) + + with pytest.warns( + UserWarning, + match=".* nan values found in confusion matrix have been replaced with zeros.", + ): + confusion_matrix(preds, target, num_classes=5, normalize="true") diff --git a/RE/paddlemetric/src/tests/classification/test_f_beta.py b/RE/paddlemetric/src/tests/classification/test_f_beta.py new file mode 100644 index 00000000..741c0d46 --- /dev/null +++ b/RE/paddlemetric/src/tests/classification/test_f_beta.py @@ -0,0 +1,451 @@ +# Copyright The PyTorch Lightning team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from functools import partial +from typing import Callable, Optional + +import numpy as np +import pytest +import paddleext.torchapi as B +from sklearn.metrics import f1_score, fbeta_score +from paddleext.torchapi import Tensor + +from tests.classification.inputs import _input_binary, _input_binary_logits, _input_binary_prob +from tests.classification.inputs import _input_multiclass as _input_mcls +from tests.classification.inputs import _input_multiclass_logits as _input_mcls_logits +from tests.classification.inputs import _input_multiclass_prob as _input_mcls_prob +from tests.classification.inputs import _input_multiclass_with_missing_class as _input_miss_class +from tests.classification.inputs import _input_multidim_multiclass as _input_mdmc +from tests.classification.inputs import _input_multidim_multiclass_prob as _input_mdmc_prob +from tests.classification.inputs import _input_multilabel as _input_mlb +from tests.classification.inputs import _input_multilabel_logits as _input_mlb_logits +from tests.classification.inputs import _input_multilabel_prob as _input_mlb_prob +from tests.helpers import seed_all +from tests.helpers.testers import NUM_BATCHES, NUM_CLASSES, THRESHOLD, MetricTester +from paddlemetrics import F1, FBeta, Metric +from paddlemetrics.functional import f1, fbeta +from paddlemetrics.utilities.checks import _input_format_classification +from paddlemetrics.utilities.enums import AverageMethod + +seed_all(42) + + +def _sk_fbeta_f1(preds, target, sk_fn, num_classes, average, multiclass, ignore_index, mdmc_average=None): + if average == "none": + average = None + if num_classes == 1: + average = "binary" + + labels = list(range(num_classes)) + try: + labels.remove(ignore_index) + except ValueError: + pass + + sk_preds, sk_target, _ = _input_format_classification( + preds, target, THRESHOLD, num_classes=num_classes, multiclass=multiclass + ) + sk_preds, sk_target = sk_preds.numpy(), sk_target.numpy() + sk_scores = sk_fn(sk_target, sk_preds, average=average, zero_division=0, labels=labels) + + if len(labels) != num_classes and not average: + sk_scores = np.insert(sk_scores, ignore_index, np.nan) + + return sk_scores + + +def _sk_fbeta_f1_multidim_multiclass( + preds, target, sk_fn, num_classes, average, multiclass, ignore_index, mdmc_average +): + preds, target, _ = _input_format_classification( + preds, target, threshold=THRESHOLD, num_classes=num_classes, multiclass=multiclass + ) + + if mdmc_average == "global": + preds = B.transpose(preds, 1, 2).reshape(-1, preds.shape[1]) + target = B.transpose(target, 1, 2).reshape(-1, target.shape[1]) + + return _sk_fbeta_f1(preds, target, sk_fn, num_classes, average, False, ignore_index) + if mdmc_average == "samplewise": + scores = [] + + for i in range(preds.shape[0]): + pred_i = preds[i, ...].T + target_i = target[i, ...].T + scores_i = _sk_fbeta_f1(pred_i, target_i, sk_fn, num_classes, average, False, ignore_index) + + scores.append(np.expand_dims(scores_i, 0)) + + return np.concatenate(scores).mean(axis=0) + + +@pytest.mark.parametrize( + "metric_class, metric_fn", + [ + (partial(FBeta, beta=2.0), partial(fbeta, beta=2.0)), + (F1, f1), + ], +) +@pytest.mark.parametrize( + "average, mdmc_average, num_classes, ignore_index, match_str", + [ + ("wrong", None, None, None, "`average`"), + ("micro", "wrong", None, None, "`mdmc"), + ("macro", None, None, None, "number of classes"), + ("macro", None, 1, 0, "ignore_index"), + ], +) +def test_wrong_params(metric_class, metric_fn, average, mdmc_average, num_classes, ignore_index, match_str): + with pytest.raises(ValueError, match=match_str): + metric_class( + average=average, + mdmc_average=mdmc_average, + num_classes=num_classes, + ignore_index=ignore_index, + ) + + with pytest.raises(ValueError, match=match_str): + metric_fn( + _input_binary.preds[0], + _input_binary.target[0], + average=average, + mdmc_average=mdmc_average, + num_classes=num_classes, + ignore_index=ignore_index, + ) + + +@pytest.mark.parametrize( + "metric_class, metric_fn", + [ + (partial(FBeta, beta=2.0), partial(fbeta, beta=2.0)), + (F1, f1), + ], +) +def test_zero_division(metric_class, metric_fn): + """Test that zero_division works correctly (currently should just set to 0).""" + + preds = B.tensor([1, 2, 1, 1]) + target = B.tensor([2, 0, 2, 1]) + + cl_metric = metric_class(average="none", num_classes=3) + cl_metric(preds, target) + + result_cl = cl_metric.compute() + result_fn = metric_fn(preds, target, average="none", num_classes=3) + + assert result_cl[0] == result_fn[0] == 0 + + +@pytest.mark.parametrize( + "metric_class, metric_fn", + [ + (partial(FBeta, beta=2.0), partial(fbeta, beta=2.0)), + (F1, f1), + ], +) +def test_no_support(metric_class, metric_fn): + """This tests a rare edge case, where there is only one class present. + + in target, and ignore_index is set to exactly that class - and the + average method is equal to 'weighted'. + + This would mean that the sum of weights equals zero, and would, without + taking care of this case, return NaN. However, the reduction function + should catch that and set the metric to equal the value of zero_division + in this case (zero_division is for now not configurable and equals 0). + """ + + preds = B.tensor([1, 1, 0, 0]) + target = B.tensor([0, 0, 0, 0]) + + cl_metric = metric_class(average="weighted", num_classes=2, ignore_index=0) + cl_metric(preds, target) + + result_cl = cl_metric.compute() + result_fn = metric_fn(preds, target, average="weighted", num_classes=2, ignore_index=0) + + assert result_cl == result_fn == 0 + + +@pytest.mark.parametrize("metric_class, metric_fn", [(partial(FBeta, beta=2.0), partial(fbeta, beta=2.0)), (F1, f1)]) +@pytest.mark.parametrize( + "ignore_index, expected", [(None, B.tensor([1.0, np.nan])), (0, B.tensor([np.nan, np.nan]))] +) +def test_class_not_present(metric_class, metric_fn, ignore_index, expected): + """This tests that when metric is computed per class and a given class is not present in both the `preds` and + `target`, the resulting score is `nan`.""" + preds = B.tensor([0, 0, 0]) + target = B.tensor([0, 0, 0]) + num_classes = 2 + + # test functional + result_fn = metric_fn(preds, target, average=AverageMethod.NONE, num_classes=num_classes, ignore_index=ignore_index) + assert B.allclose(expected, result_fn, equal_nan=True) + + # test class + cl_metric = metric_class(average=AverageMethod.NONE, num_classes=num_classes, ignore_index=ignore_index) + cl_metric(preds, target) + result_cl = cl_metric.compute() + assert B.allclose(expected, result_cl, equal_nan=True) + + +@pytest.mark.parametrize( + "metric_class, metric_fn, sk_fn", + [(partial(FBeta, beta=2.0), partial(fbeta, beta=2.0), partial(fbeta_score, beta=2.0)), (F1, f1, f1_score)], +) +@pytest.mark.parametrize("average", ["micro", "macro", None, "weighted", "samples"]) +@pytest.mark.parametrize("ignore_index", [None, 0]) +@pytest.mark.parametrize( + "preds, target, num_classes, multiclass, mdmc_average, sk_wrapper", + [ + (_input_binary_logits.preds, _input_binary_logits.target, 1, None, None, _sk_fbeta_f1), + (_input_binary_prob.preds, _input_binary_prob.target, 1, None, None, _sk_fbeta_f1), + (_input_binary.preds, _input_binary.target, 1, False, None, _sk_fbeta_f1), + (_input_mlb_logits.preds, _input_mlb_logits.target, NUM_CLASSES, None, None, _sk_fbeta_f1), + (_input_mlb_prob.preds, _input_mlb_prob.target, NUM_CLASSES, None, None, _sk_fbeta_f1), + (_input_mlb.preds, _input_mlb.target, NUM_CLASSES, False, None, _sk_fbeta_f1), + (_input_mcls_logits.preds, _input_mcls_logits.target, NUM_CLASSES, None, None, _sk_fbeta_f1), + (_input_mcls_prob.preds, _input_mcls_prob.target, NUM_CLASSES, None, None, _sk_fbeta_f1), + (_input_mcls.preds, _input_mcls.target, NUM_CLASSES, None, None, _sk_fbeta_f1), + (_input_mdmc.preds, _input_mdmc.target, NUM_CLASSES, None, "global", _sk_fbeta_f1_multidim_multiclass), + ( + _input_mdmc_prob.preds, + _input_mdmc_prob.target, + NUM_CLASSES, + None, + "global", + _sk_fbeta_f1_multidim_multiclass, + ), + (_input_mdmc.preds, _input_mdmc.target, NUM_CLASSES, None, "samplewise", _sk_fbeta_f1_multidim_multiclass), + ( + _input_mdmc_prob.preds, + _input_mdmc_prob.target, + NUM_CLASSES, + None, + "samplewise", + _sk_fbeta_f1_multidim_multiclass, + ), + ], +) +class TestFBeta(MetricTester): + @pytest.mark.parametrize("ddp", [False]) + @pytest.mark.parametrize("dist_sync_on_step", [False]) + def test_fbeta_f1( + self, + ddp: bool, + dist_sync_on_step: bool, + preds: Tensor, + target: Tensor, + sk_wrapper: Callable, + metric_class: Metric, + metric_fn: Callable, + sk_fn: Callable, + multiclass: Optional[bool], + num_classes: Optional[int], + average: str, + mdmc_average: Optional[str], + ignore_index: Optional[int], + ): + if num_classes == 1 and average != "micro": + pytest.skip("Only test binary data for 'micro' avg (equivalent of 'binary' in sklearn)") + + if ignore_index is not None and preds.ndim == 2: + pytest.skip("Skipping ignore_index test with binary inputs.") + + if average == "weighted" and ignore_index is not None and mdmc_average is not None: + pytest.skip("Ignore special case where we are ignoring entire sample for 'weighted' average") + + self.run_class_metric_test( + ddp=ddp, + preds=preds, + target=target, + metric_class=metric_class, + sk_metric=partial( + sk_wrapper, + sk_fn=sk_fn, + average=average, + num_classes=num_classes, + multiclass=multiclass, + ignore_index=ignore_index, + mdmc_average=mdmc_average, + ), + dist_sync_on_step=dist_sync_on_step, + metric_args={ + "num_classes": num_classes, + "average": average, + "threshold": THRESHOLD, + "multiclass": multiclass, + "ignore_index": ignore_index, + "mdmc_average": mdmc_average, + }, + check_dist_sync_on_step=True, + check_batch=True, + ) + + def test_fbeta_f1_functional( + self, + preds: Tensor, + target: Tensor, + sk_wrapper: Callable, + metric_class: Metric, + metric_fn: Callable, + sk_fn: Callable, + multiclass: Optional[bool], + num_classes: Optional[int], + average: str, + mdmc_average: Optional[str], + ignore_index: Optional[int], + ): + if num_classes == 1 and average != "micro": + pytest.skip("Only test binary data for 'micro' avg (equivalent of 'binary' in sklearn)") + + if ignore_index is not None and preds.ndim == 2: + pytest.skip("Skipping ignore_index test with binary inputs.") + + if average == "weighted" and ignore_index is not None and mdmc_average is not None: + pytest.skip("Ignore special case where we are ignoring entire sample for 'weighted' average") + + self.run_functional_metric_test( + preds, + target, + metric_functional=metric_fn, + sk_metric=partial( + sk_wrapper, + sk_fn=sk_fn, + average=average, + num_classes=num_classes, + multiclass=multiclass, + ignore_index=ignore_index, + mdmc_average=mdmc_average, + ), + metric_args={ + "num_classes": num_classes, + "average": average, + "threshold": THRESHOLD, + "multiclass": multiclass, + "ignore_index": ignore_index, + "mdmc_average": mdmc_average, + }, + ) + + def test_fbeta_f1_differentiability( + self, + preds: Tensor, + target: Tensor, + sk_wrapper: Callable, + metric_class: Metric, + metric_fn: Callable, + sk_fn: Callable, + multiclass: Optional[bool], + num_classes: Optional[int], + average: str, + mdmc_average: Optional[str], + ignore_index: Optional[int], + ): + if num_classes == 1 and average != "micro": + pytest.skip("Only test binary data for 'micro' avg (equivalent of 'binary' in sklearn)") + + if ignore_index is not None and preds.ndim == 2: + pytest.skip("Skipping ignore_index test with binary inputs.") + + if average == "weighted" and ignore_index is not None and mdmc_average is not None: + pytest.skip("Ignore special case where we are ignoring entire sample for 'weighted' average") + + self.run_differentiability_test( + preds, + target, + metric_functional=metric_fn, + metric_module=metric_class, + metric_args={ + "num_classes": num_classes, + "average": average, + "threshold": THRESHOLD, + "multiclass": multiclass, + "ignore_index": ignore_index, + "mdmc_average": mdmc_average, + }, + ) + + +_mc_k_target = B.tensor([0, 1, 2]) +_mc_k_preds = B.tensor([[0.35, 0.4, 0.25], [0.1, 0.5, 0.4], [0.2, 0.1, 0.7]]) +_ml_k_target = B.tensor([[0, 1, 0], [1, 1, 0], [0, 0, 0]]) +_ml_k_preds = B.tensor([[0.9, 0.2, 0.75], [0.1, 0.7, 0.8], [0.6, 0.1, 0.7]]) + + +@pytest.mark.parametrize( + "metric_class, metric_fn", + [ + (partial(FBeta, beta=2.0), partial(fbeta, beta=2.0)), + (F1, fbeta), + ], +) +@pytest.mark.parametrize( + "k, preds, target, average, expected_fbeta, expected_f1", + [ + (1, _mc_k_preds, _mc_k_target, "micro", B.tensor(2 / 3), B.tensor(2 / 3)), + (2, _mc_k_preds, _mc_k_target, "micro", B.tensor(5 / 6), B.tensor(2 / 3)), + (1, _ml_k_preds, _ml_k_target, "micro", B.tensor(0.0), B.tensor(0.0)), + (2, _ml_k_preds, _ml_k_target, "micro", B.tensor(5 / 18), B.tensor(2 / 9)), + ], +) +def test_top_k( + metric_class, + metric_fn, + k: int, + preds: Tensor, + target: Tensor, + average: str, + expected_fbeta: Tensor, + expected_f1: Tensor, +): + """A simple test to check that top_k works as expected. + + Just a sanity check, the tests in StatScores should already guarantee the corectness of results. + """ + class_metric = metric_class(top_k=k, average=average, num_classes=3) + class_metric.update(preds, target) + + if class_metric.beta != 1.0: + result = expected_fbeta + else: + result = expected_f1 + + assert B.isclose(class_metric.compute(), result) + assert B.isclose(metric_fn(preds, target, top_k=k, average=average, num_classes=3), result) + + +@pytest.mark.parametrize("ignore_index", [None, 2]) +@pytest.mark.parametrize("average", ["micro", "macro", "weighted"]) +@pytest.mark.parametrize( + "metric_class, metric_functional, sk_fn", + [(partial(FBeta, beta=2.0), partial(fbeta, beta=2.0), partial(fbeta_score, beta=2.0)), (F1, f1, f1_score)], +) +def test_same_input(metric_class, metric_functional, sk_fn, average, ignore_index): + preds = _input_miss_class.preds + target = _input_miss_class.target + preds_flat = B.cat(list(preds), dim=0) + target_flat = B.cat(list(target), dim=0) + + mc = metric_class(num_classes=NUM_CLASSES, average=average, ignore_index=ignore_index) + for i in range(NUM_BATCHES): + mc.update(preds[i], target[i]) + class_res = mc.compute() + func_res = metric_functional( + preds_flat, target_flat, num_classes=NUM_CLASSES, average=average, ignore_index=ignore_index + ) + sk_res = sk_fn(target_flat, preds_flat, average=average, zero_division=0) + + assert B.allclose(class_res, B.tensor(sk_res).float()) + assert B.allclose(func_res, B.tensor(sk_res).float()) diff --git a/RE/paddlemetric/src/tests/classification/test_hamming_distance.py b/RE/paddlemetric/src/tests/classification/test_hamming_distance.py new file mode 100644 index 00000000..a1ca480b --- /dev/null +++ b/RE/paddlemetric/src/tests/classification/test_hamming_distance.py @@ -0,0 +1,106 @@ +# Copyright The PyTorch Lightning team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import pytest +from sklearn.metrics import hamming_loss as sk_hamming_loss + +from tests.classification.inputs import _input_binary, _input_binary_logits, _input_binary_prob +from tests.classification.inputs import _input_multiclass as _input_mcls +from tests.classification.inputs import _input_multiclass_logits as _input_mcls_logits +from tests.classification.inputs import _input_multiclass_prob as _input_mcls_prob +from tests.classification.inputs import _input_multidim_multiclass as _input_mdmc +from tests.classification.inputs import _input_multidim_multiclass_prob as _input_mdmc_prob +from tests.classification.inputs import _input_multilabel as _input_mlb +from tests.classification.inputs import _input_multilabel_logits as _input_mlb_logits +from tests.classification.inputs import _input_multilabel_multidim as _input_mlmd +from tests.classification.inputs import _input_multilabel_multidim_prob as _input_mlmd_prob +from tests.classification.inputs import _input_multilabel_prob as _input_mlb_prob +from tests.helpers import seed_all +from tests.helpers.testers import THRESHOLD, MetricTester +from paddlemetrics import HammingDistance +from paddlemetrics.functional import hamming_distance +from paddlemetrics.utilities.checks import _input_format_classification + +seed_all(42) + + +def _sk_hamming_loss(preds, target): + sk_preds, sk_target, _ = _input_format_classification(preds, target, threshold=THRESHOLD) + sk_preds, sk_target = sk_preds.numpy(), sk_target.numpy() + sk_preds, sk_target = sk_preds.reshape(sk_preds.shape[0], -1), sk_target.reshape(sk_target.shape[0], -1) + + return sk_hamming_loss(y_true=sk_target, y_pred=sk_preds) + + +@pytest.mark.parametrize( + "preds, target", + [ + (_input_binary_logits.preds, _input_binary_logits.target), + (_input_binary_prob.preds, _input_binary_prob.target), + (_input_binary.preds, _input_binary.target), + (_input_mlb_logits.preds, _input_mlb_logits.target), + (_input_mlb_prob.preds, _input_mlb_prob.target), + (_input_mlb.preds, _input_mlb.target), + (_input_mcls_logits.preds, _input_mcls_logits.target), + (_input_mcls_prob.preds, _input_mcls_prob.target), + (_input_mcls.preds, _input_mcls.target), + (_input_mdmc_prob.preds, _input_mdmc_prob.target), + (_input_mdmc.preds, _input_mdmc.target), + (_input_mlmd_prob.preds, _input_mlmd_prob.target), + (_input_mlmd.preds, _input_mlmd.target), + ], +) +class TestHammingDistance(MetricTester): + @pytest.mark.parametrize("ddp", [True, False]) + @pytest.mark.parametrize("dist_sync_on_step", [False, True]) + def test_hamming_distance_class(self, ddp, dist_sync_on_step, preds, target): + self.run_class_metric_test( + ddp=ddp, + preds=preds, + target=target, + metric_class=HammingDistance, + sk_metric=_sk_hamming_loss, + dist_sync_on_step=dist_sync_on_step, + metric_args={"threshold": THRESHOLD}, + ) + + def test_hamming_distance_fn(self, preds, target): + self.run_functional_metric_test( + preds=preds, + target=target, + metric_functional=hamming_distance, + sk_metric=_sk_hamming_loss, + metric_args={"threshold": THRESHOLD}, + ) + + def test_hamming_distance_differentiability(self, preds, target): + self.run_differentiability_test( + preds=preds, + target=target, + metric_module=HammingDistance, + metric_functional=hamming_distance, + metric_args={"threshold": THRESHOLD}, + ) + + +@pytest.mark.parametrize("threshold", [1.5]) +def test_wrong_params(threshold): + preds, target = _input_mcls_prob.preds, _input_mcls_prob.target + + with pytest.raises(ValueError): + ham_dist = HammingDistance(threshold=threshold) + ham_dist(preds, target) + ham_dist.compute() + + with pytest.raises(ValueError): + hamming_distance(preds, target, threshold=threshold) diff --git a/RE/paddlemetric/src/tests/classification/test_hinge.py b/RE/paddlemetric/src/tests/classification/test_hinge.py new file mode 100644 index 00000000..7adbbb78 --- /dev/null +++ b/RE/paddlemetric/src/tests/classification/test_hinge.py @@ -0,0 +1,156 @@ +# Copyright The PyTorch Lightning team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from functools import partial + +import numpy as np +import pytest +import paddleext.torchapi as B +from sklearn.metrics import hinge_loss as sk_hinge +from sklearn.preprocessing import OneHotEncoder + +from tests.classification.inputs import Input +from tests.helpers.testers import BATCH_SIZE, NUM_BATCHES, NUM_CLASSES, MetricTester +from paddlemetrics import Hinge +from paddlemetrics.functional import hinge +from paddlemetrics.functional.classification.hinge import MulticlassMode + +B.manual_seed(42) + +_input_binary = Input( + preds=B.randn(NUM_BATCHES, BATCH_SIZE), target=B.randint(high=2, size=(NUM_BATCHES, BATCH_SIZE)) +) + +_input_binary_single = Input(preds=B.randn((NUM_BATCHES, 1)), target=B.randint(high=2, size=(NUM_BATCHES, 1))) + +_input_multiclass = Input( + preds=B.randn(NUM_BATCHES, BATCH_SIZE, NUM_CLASSES), + target=B.randint(high=NUM_CLASSES, size=(NUM_BATCHES, BATCH_SIZE)), +) + + +def _sk_hinge(preds, target, squared, multiclass_mode): + sk_preds, sk_target = preds.numpy(), target.numpy() + + if multiclass_mode == MulticlassMode.ONE_VS_ALL: + enc = OneHotEncoder() + enc.fit(sk_target.reshape(-1, 1)) + sk_target = enc.transform(sk_target.reshape(-1, 1)).toarray() + + if sk_preds.ndim == 1 or multiclass_mode == MulticlassMode.ONE_VS_ALL: + sk_target = 2 * sk_target - 1 + + if squared or sk_target.max() != 1 or sk_target.min() != -1: + # Squared not an option in sklearn and infers classes incorrectly with single element, so adapted from source + if sk_preds.ndim == 1 or multiclass_mode == MulticlassMode.ONE_VS_ALL: + margin = sk_target * sk_preds + else: + mask = np.ones_like(sk_preds, dtype=bool) + mask[np.arange(sk_target.shape[0]), sk_target] = False + margin = sk_preds[~mask] + margin -= np.max(sk_preds[mask].reshape(sk_target.shape[0], -1), axis=1) + measures = 1 - margin + measures = np.clip(measures, 0, None) + + if squared: + measures = measures ** 2 + return measures.mean(axis=0) + if multiclass_mode == MulticlassMode.ONE_VS_ALL: + result = np.zeros(sk_preds.shape[1]) + for i in range(result.shape[0]): + result[i] = sk_hinge(y_true=sk_target[:, i], pred_decision=sk_preds[:, i]) + return result + + return sk_hinge(y_true=sk_target, pred_decision=sk_preds) + + +@pytest.mark.parametrize( + "preds, target, squared, multiclass_mode", + [ + (_input_binary.preds, _input_binary.target, False, None), + (_input_binary.preds, _input_binary.target, True, None), + (_input_binary_single.preds, _input_binary_single.target, False, None), + (_input_binary_single.preds, _input_binary_single.target, True, None), + (_input_multiclass.preds, _input_multiclass.target, False, MulticlassMode.CRAMMER_SINGER), + (_input_multiclass.preds, _input_multiclass.target, True, MulticlassMode.CRAMMER_SINGER), + (_input_multiclass.preds, _input_multiclass.target, False, MulticlassMode.ONE_VS_ALL), + (_input_multiclass.preds, _input_multiclass.target, True, MulticlassMode.ONE_VS_ALL), + ], +) +class TestHinge(MetricTester): + @pytest.mark.parametrize("ddp", [True, False]) + @pytest.mark.parametrize("dist_sync_on_step", [True, False]) + def test_hinge_class(self, ddp, dist_sync_on_step, preds, target, squared, multiclass_mode): + self.run_class_metric_test( + ddp=ddp, + preds=preds, + target=target, + metric_class=Hinge, + sk_metric=partial(_sk_hinge, squared=squared, multiclass_mode=multiclass_mode), + dist_sync_on_step=dist_sync_on_step, + metric_args={ + "squared": squared, + "multiclass_mode": multiclass_mode, + }, + ) + + def test_hinge_fn(self, preds, target, squared, multiclass_mode): + self.run_functional_metric_test( + preds=preds, + target=target, + metric_functional=partial(hinge, squared=squared, multiclass_mode=multiclass_mode), + sk_metric=partial(_sk_hinge, squared=squared, multiclass_mode=multiclass_mode), + ) + + def test_hinge_differentiability(self, preds, target, squared, multiclass_mode): + self.run_differentiability_test( + preds=preds, + target=target, + metric_module=Hinge, + metric_functional=partial(hinge, squared=squared, multiclass_mode=multiclass_mode), + ) + + +_input_multi_target = Input(preds=B.randn(BATCH_SIZE), target=B.randint(high=2, size=(BATCH_SIZE, 2))) + +_input_binary_different_sizes = Input( + preds=B.randn(BATCH_SIZE * 2), target=B.randint(high=2, size=(BATCH_SIZE,)) +) + +_input_multi_different_sizes = Input( + preds=B.randn(BATCH_SIZE * 2, NUM_CLASSES), target=B.randint(high=NUM_CLASSES, size=(BATCH_SIZE,)) +) + +_input_extra_dim = Input( + preds=B.randn(BATCH_SIZE, NUM_CLASSES, 2), target=B.randint(high=2, size=(BATCH_SIZE,)) +) + + +@pytest.mark.parametrize( + "preds, target, multiclass_mode", + [ + (_input_multi_target.preds, _input_multi_target.target, None), + (_input_binary_different_sizes.preds, _input_binary_different_sizes.target, None), + (_input_multi_different_sizes.preds, _input_multi_different_sizes.target, None), + (_input_extra_dim.preds, _input_extra_dim.target, None), + (_input_multiclass.preds[0], _input_multiclass.target[0], "invalid_mode"), + ], +) +def test_bad_inputs_fn(preds, target, multiclass_mode): + with pytest.raises(ValueError): + _ = hinge(preds, target, multiclass_mode=multiclass_mode) + + +def test_bad_inputs_class(): + with pytest.raises(ValueError): + Hinge(multiclass_mode="invalid_mode") diff --git a/RE/paddlemetric/src/tests/classification/test_inputs.py b/RE/paddlemetric/src/tests/classification/test_inputs.py new file mode 100644 index 00000000..4f924af2 --- /dev/null +++ b/RE/paddlemetric/src/tests/classification/test_inputs.py @@ -0,0 +1,312 @@ +# Copyright The PyTorch Lightning team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import pytest +import paddleext.torchapi as B +from paddleext.torchapi import Tensor, rand, randint, tensor + +from tests.classification.inputs import Input +from tests.classification.inputs import _input_binary as _bin +from tests.classification.inputs import _input_binary_prob as _bin_prob +from tests.classification.inputs import _input_multiclass as _mc +from tests.classification.inputs import _input_multiclass_prob as _mc_prob +from tests.classification.inputs import _input_multidim_multiclass as _mdmc +from tests.classification.inputs import _input_multidim_multiclass_prob as _mdmc_prob +from tests.classification.inputs import _input_multilabel as _ml +from tests.classification.inputs import _input_multilabel_multidim as _mlmd +from tests.classification.inputs import _input_multilabel_multidim_prob as _mlmd_prob +from tests.classification.inputs import _input_multilabel_prob as _ml_prob +from tests.helpers import seed_all +from tests.helpers.testers import BATCH_SIZE, EXTRA_DIM, NUM_BATCHES, NUM_CLASSES, THRESHOLD +from paddlemetrics.utilities.checks import _input_format_classification +from paddlemetrics.utilities.data import select_topk, to_onehot +from paddlemetrics.utilities.enums import DataType + +seed_all(42) + +# Some additional inputs to test on +_ml_prob_half = Input(_ml_prob.preds.half(), _ml_prob.target) + +_mc_prob_2cls_preds = rand(NUM_BATCHES, BATCH_SIZE, 2) +_mc_prob_2cls_preds /= _mc_prob_2cls_preds.sum(dim=2, keepdim=True) +_mc_prob_2cls = Input(_mc_prob_2cls_preds, randint(high=2, size=(NUM_BATCHES, BATCH_SIZE))) + +_mdmc_prob_many_dims_preds = rand(NUM_BATCHES, BATCH_SIZE, NUM_CLASSES, EXTRA_DIM, EXTRA_DIM) +_mdmc_prob_many_dims_preds /= _mdmc_prob_many_dims_preds.sum(dim=2, keepdim=True) +_mdmc_prob_many_dims = Input( + _mdmc_prob_many_dims_preds, + randint(high=2, size=(NUM_BATCHES, BATCH_SIZE, EXTRA_DIM, EXTRA_DIM)), +) + +_mdmc_prob_2cls_preds = rand(NUM_BATCHES, BATCH_SIZE, 2, EXTRA_DIM) +_mdmc_prob_2cls_preds /= _mdmc_prob_2cls_preds.sum(dim=2, keepdim=True) +_mdmc_prob_2cls = Input(_mdmc_prob_2cls_preds, randint(high=2, size=(NUM_BATCHES, BATCH_SIZE, EXTRA_DIM))) + +# Some utils +T = Tensor + + +def _idn(x): + return x + + +def _usq(x): + return x.unsqueeze(-1) + + +def _thrs(x): + return x >= THRESHOLD + + +def _rshp1(x): + return x.reshape(x.shape[0], -1) + + +def _rshp2(x): + return x.reshape(x.shape[0], x.shape[1], -1) + + +def _onehot(x): + return to_onehot(x, NUM_CLASSES) + + +def _onehot2(x): + return to_onehot(x, 2) + + +def _top1(x): + return select_topk(x, 1) + + +def _top2(x): + return select_topk(x, 2) + + +# To avoid ugly black line wrapping +def _ml_preds_tr(x): + return _rshp1(_thrs(x)) + + +def _onehot_rshp1(x): + return _onehot(_rshp1(x)) + + +def _onehot2_rshp1(x): + return _onehot2(_rshp1(x)) + + +def _top1_rshp2(x): + return _top1(_rshp2(x)) + + +def _top2_rshp2(x): + return _top2(_rshp2(x)) + + +def _probs_to_mc_preds_tr(x): + return _onehot2(_thrs(x)) + + +def _mlmd_prob_to_mc_preds_tr(x): + return _onehot2(_rshp1(_thrs(x))) + + +######################## +# Test correct inputs +######################## + + +@pytest.mark.parametrize( + "inputs, num_classes, multiclass, top_k, exp_mode, post_preds, post_target", + [ + ############################# + # Test usual expected cases + (_bin, None, False, None, "multi-class", _usq, _usq), + (_bin, 1, False, None, "multi-class", _usq, _usq), + (_bin_prob, None, None, None, "binary", lambda x: _usq(_thrs(x)), _usq), + (_ml_prob, None, None, None, "multi-label", _thrs, _idn), + (_ml, None, False, None, "multi-dim multi-class", _idn, _idn), + (_ml_prob, None, None, None, "multi-label", _ml_preds_tr, _rshp1), + (_ml_prob, None, None, 2, "multi-label", _top2, _rshp1), + (_mlmd, None, False, None, "multi-dim multi-class", _rshp1, _rshp1), + (_mc, NUM_CLASSES, None, None, "multi-class", _onehot, _onehot), + (_mc_prob, None, None, None, "multi-class", _top1, _onehot), + (_mc_prob, None, None, 2, "multi-class", _top2, _onehot), + (_mdmc, NUM_CLASSES, None, None, "multi-dim multi-class", _onehot, _onehot), + (_mdmc_prob, None, None, None, "multi-dim multi-class", _top1_rshp2, _onehot), + (_mdmc_prob, None, None, 2, "multi-dim multi-class", _top2_rshp2, _onehot), + (_mdmc_prob_many_dims, None, None, None, "multi-dim multi-class", _top1_rshp2, _onehot_rshp1), + (_mdmc_prob_many_dims, None, None, 2, "multi-dim multi-class", _top2_rshp2, _onehot_rshp1), + ########################### + # Test some special cases + # Make sure that half precision works, i.e. is converted to full precision + (_ml_prob_half, None, None, None, "multi-label", lambda x: _ml_preds_tr(x.float()), _rshp1), + # Binary as multiclass + (_bin, None, None, None, "multi-class", _onehot2, _onehot2), + # Binary probs as multiclass + (_bin_prob, None, True, None, "binary", _probs_to_mc_preds_tr, _onehot2), + # Multilabel as multiclass + (_ml, None, True, None, "multi-dim multi-class", _onehot2, _onehot2), + # Multilabel probs as multiclass + (_ml_prob, None, True, None, "multi-label", _probs_to_mc_preds_tr, _onehot2), + # Multidim multilabel as multiclass + (_mlmd, None, True, None, "multi-dim multi-class", _onehot2_rshp1, _onehot2_rshp1), + # Multidim multilabel probs as multiclass + (_mlmd_prob, None, True, None, "multi-label", _mlmd_prob_to_mc_preds_tr, _onehot2_rshp1), + # Multiclass prob with 2 classes as binary + (_mc_prob_2cls, None, False, None, "multi-class", lambda x: _top1(x)[:, [1]], _usq), + # Multi-dim multi-class with 2 classes as multi-label + (_mdmc_prob_2cls, None, False, None, "multi-dim multi-class", lambda x: _top1(x)[:, 1], _idn), + ], +) +def test_usual_cases(inputs, num_classes, multiclass, top_k, exp_mode, post_preds, post_target): + def __get_data_type_enum(str_exp_mode): + return next(DataType[n] for n in dir(DataType) if DataType[n] == str_exp_mode) + + for exp_mode in (exp_mode, __get_data_type_enum(exp_mode)): + preds_out, target_out, mode = _input_format_classification( + preds=inputs.preds[0], + target=inputs.target[0], + threshold=THRESHOLD, + num_classes=num_classes, + multiclass=multiclass, + top_k=top_k, + ) + + assert mode == exp_mode + assert B.equal(preds_out, post_preds(inputs.preds[0]).int()) + assert B.equal(target_out, post_target(inputs.target[0]).int()) + + # Test that things work when batch_size = 1 + preds_out, target_out, mode = _input_format_classification( + preds=inputs.preds[0][[0], ...], + target=inputs.target[0][[0], ...], + threshold=THRESHOLD, + num_classes=num_classes, + multiclass=multiclass, + top_k=top_k, + ) + + assert mode == exp_mode + assert B.equal(preds_out, post_preds(inputs.preds[0][[0], ...]).int()) + assert B.equal(target_out, post_target(inputs.target[0][[0], ...]).int()) + + +# Test that threshold is correctly applied +def test_threshold(): + target = T([1, 1, 1]).int() + preds_probs = T([0.5 - 1e-5, 0.5, 0.5 + 1e-5]) + + preds_probs_out, _, _ = _input_format_classification(preds_probs, target, threshold=0.5) + + assert B.equal(tensor([0, 1, 1], dtype=B.int), preds_probs_out.squeeze().int()) + + +######################################################################## +# Test incorrect inputs +######################################################################## + + +@pytest.mark.parametrize( + "preds, target, num_classes, multiclass", + [ + # Target not integer + (randint(high=2, size=(7,)), randint(high=2, size=(7,)).float(), None, None), + # Target negative + (randint(high=2, size=(7,)), -randint(high=2, size=(7,)), None, None), + # Preds negative integers + (-randint(high=2, size=(7,)), randint(high=2, size=(7,)), None, None), + # multiclass=False and target > 1 + (rand(size=(7,)), randint(low=2, high=4, size=(7,)), None, False), + # multiclass=False and preds integers with > 1 + (randint(low=2, high=4, size=(7,)), randint(high=2, size=(7,)), None, False), + # Wrong batch size + (randint(high=2, size=(8,)), randint(high=2, size=(7,)), None, None), + # Completely wrong shape + (randint(high=2, size=(7,)), randint(high=2, size=(7, 4)), None, None), + # Same #dims, different shape + (randint(high=2, size=(7, 3)), randint(high=2, size=(7, 4)), None, None), + # Same shape and preds floats, target not binary + (rand(size=(7, 3)), randint(low=2, high=4, size=(7, 3)), None, None), + # #dims in preds = 1 + #dims in target, C shape not second or last + (rand(size=(7, 3, 4, 3)), randint(high=4, size=(7, 3, 3)), None, None), + # #dims in preds = 1 + #dims in target, preds not float + (randint(high=2, size=(7, 3, 3, 4)), randint(high=4, size=(7, 3, 3)), None, None), + # multiclass=False, with C dimension > 2 + (_mc_prob.preds[0], randint(high=2, size=(BATCH_SIZE,)), None, False), + # Max target larger or equal to C dimension + (_mc_prob.preds[0], randint(low=NUM_CLASSES + 1, high=100, size=(BATCH_SIZE,)), None, None), + # C dimension not equal to num_classes + (_mc_prob.preds[0], _mc_prob.target[0], NUM_CLASSES + 1, None), + # Max target larger than num_classes (with #dim preds = 1 + #dims target) + (_mc_prob.preds[0], randint(low=NUM_CLASSES + 1, high=100, size=(BATCH_SIZE, NUM_CLASSES)), 4, None), + # Max target larger than num_classes (with #dim preds = #dims target) + (randint(high=4, size=(7, 3)), randint(low=5, high=7, size=(7, 3)), 4, None), + # Num_classes=1, but multiclass not false + (randint(high=2, size=(7,)), randint(high=2, size=(7,)), 1, None), + # multiclass=False, but implied class dimension (for multi-label, from shape) != num_classes + (randint(high=2, size=(7, 3, 3)), randint(high=2, size=(7, 3, 3)), 4, False), + # Multilabel input with implied class dimension != num_classes + (rand(size=(7, 3, 3)), randint(high=2, size=(7, 3, 3)), 4, False), + # Multilabel input with multiclass=True, but num_classes != 2 (or None) + (rand(size=(7, 3)), randint(high=2, size=(7, 3)), 4, True), + # Binary input, num_classes > 2 + (rand(size=(7,)), randint(high=2, size=(7,)), 4, None), + # Binary input, num_classes == 2 and multiclass not True + (rand(size=(7,)), randint(high=2, size=(7,)), 2, None), + (rand(size=(7,)), randint(high=2, size=(7,)), 2, False), + # Binary input, num_classes == 1 and multiclass=True + (rand(size=(7,)), randint(high=2, size=(7,)), 1, True), + ], +) +def test_incorrect_inputs(preds, target, num_classes, multiclass): + with pytest.raises(ValueError): + _input_format_classification( + preds=preds, target=target, threshold=THRESHOLD, num_classes=num_classes, multiclass=multiclass + ) + + +@pytest.mark.parametrize( + "preds, target, num_classes, multiclass, top_k", + [ + # Topk set with non (md)mc or ml prob data + (_bin.preds[0], _bin.target[0], None, None, 2), + (_bin_prob.preds[0], _bin_prob.target[0], None, None, 2), + (_mc.preds[0], _mc.target[0], None, None, 2), + (_ml.preds[0], _ml.target[0], None, None, 2), + (_mlmd.preds[0], _mlmd.target[0], None, None, 2), + (_mdmc.preds[0], _mdmc.target[0], None, None, 2), + # top_k = 0 + (_mc_prob_2cls.preds[0], _mc_prob_2cls.target[0], None, None, 0), + # top_k = float + (_mc_prob_2cls.preds[0], _mc_prob_2cls.target[0], None, None, 0.123), + # top_k =2 with 2 classes, multiclass=False + (_mc_prob_2cls.preds[0], _mc_prob_2cls.target[0], None, False, 2), + # top_k = number of classes (C dimension) + (_mc_prob.preds[0], _mc_prob.target[0], None, None, NUM_CLASSES), + # multiclass = True for ml prob inputs, top_k set + (_ml_prob.preds[0], _ml_prob.target[0], None, True, 2), + # top_k = num_classes for ml prob inputs + (_ml_prob.preds[0], _ml_prob.target[0], None, True, NUM_CLASSES), + ], +) +def test_incorrect_inputs_topk(preds, target, num_classes, multiclass, top_k): + with pytest.raises(ValueError): + _input_format_classification( + preds=preds, + target=target, + threshold=THRESHOLD, + num_classes=num_classes, + multiclass=multiclass, + top_k=top_k, + ) diff --git a/RE/paddlemetric/src/tests/classification/test_iou.py b/RE/paddlemetric/src/tests/classification/test_iou.py new file mode 100644 index 00000000..af22d787 --- /dev/null +++ b/RE/paddlemetric/src/tests/classification/test_iou.py @@ -0,0 +1,235 @@ +# Copyright The PyTorch Lightning team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from functools import partial + +import numpy as np +import pytest +import paddleext.torchapi as B +from sklearn.metrics import jaccard_score as sk_jaccard_score +from paddleext.torchapi import Tensor, tensor + +from tests.classification.inputs import _input_binary, _input_binary_prob +from tests.classification.inputs import _input_multiclass as _input_mcls +from tests.classification.inputs import _input_multiclass_prob as _input_mcls_prob +from tests.classification.inputs import _input_multidim_multiclass as _input_mdmc +from tests.classification.inputs import _input_multidim_multiclass_prob as _input_mdmc_prob +from tests.classification.inputs import _input_multilabel as _input_mlb +from tests.classification.inputs import _input_multilabel_prob as _input_mlb_prob +from tests.helpers.testers import NUM_CLASSES, THRESHOLD, MetricTester +from paddlemetrics.classification.iou import IoU +from paddlemetrics.functional import iou + + +def _sk_iou_binary_prob(preds, target, average=None): + sk_preds = (preds.view(-1).numpy() >= THRESHOLD).astype(np.uint8) + sk_target = target.view(-1).numpy() + + return sk_jaccard_score(y_true=sk_target, y_pred=sk_preds, average=average) + + +def _sk_iou_binary(preds, target, average=None): + sk_preds = preds.view(-1).numpy() + sk_target = target.view(-1).numpy() + + return sk_jaccard_score(y_true=sk_target, y_pred=sk_preds, average=average) + + +def _sk_iou_multilabel_prob(preds, target, average=None): + sk_preds = (preds.view(-1).numpy() >= THRESHOLD).astype(np.uint8) + sk_target = target.view(-1).numpy() + + return sk_jaccard_score(y_true=sk_target, y_pred=sk_preds, average=average) + + +def _sk_iou_multilabel(preds, target, average=None): + sk_preds = preds.view(-1).numpy() + sk_target = target.view(-1).numpy() + + return sk_jaccard_score(y_true=sk_target, y_pred=sk_preds, average=average) + + +def _sk_iou_multiclass_prob(preds, target, average=None): + sk_preds = B.argmax(preds, dim=len(preds.shape) - 1).view(-1).numpy() + sk_target = target.view(-1).numpy() + + return sk_jaccard_score(y_true=sk_target, y_pred=sk_preds, average=average) + + +def _sk_iou_multiclass(preds, target, average=None): + sk_preds = preds.view(-1).numpy() + sk_target = target.view(-1).numpy() + + return sk_jaccard_score(y_true=sk_target, y_pred=sk_preds, average=average) + + +def _sk_iou_multidim_multiclass_prob(preds, target, average=None): + sk_preds = B.argmax(preds, dim=len(preds.shape) - 2).view(-1).numpy() + sk_target = target.view(-1).numpy() + + return sk_jaccard_score(y_true=sk_target, y_pred=sk_preds, average=average) + + +def _sk_iou_multidim_multiclass(preds, target, average=None): + sk_preds = preds.view(-1).numpy() + sk_target = target.view(-1).numpy() + + return sk_jaccard_score(y_true=sk_target, y_pred=sk_preds, average=average) + + +@pytest.mark.parametrize("reduction", ["elementwise_mean", "none"]) +@pytest.mark.parametrize( + "preds, target, sk_metric, num_classes", + [ + (_input_binary_prob.preds, _input_binary_prob.target, _sk_iou_binary_prob, 2), + (_input_binary.preds, _input_binary.target, _sk_iou_binary, 2), + (_input_mlb_prob.preds, _input_mlb_prob.target, _sk_iou_multilabel_prob, 2), + (_input_mlb.preds, _input_mlb.target, _sk_iou_multilabel, 2), + (_input_mcls_prob.preds, _input_mcls_prob.target, _sk_iou_multiclass_prob, NUM_CLASSES), + (_input_mcls.preds, _input_mcls.target, _sk_iou_multiclass, NUM_CLASSES), + (_input_mdmc_prob.preds, _input_mdmc_prob.target, _sk_iou_multidim_multiclass_prob, NUM_CLASSES), + (_input_mdmc.preds, _input_mdmc.target, _sk_iou_multidim_multiclass, NUM_CLASSES), + ], +) +class TestIoU(MetricTester): + @pytest.mark.parametrize("ddp", [True, False]) + @pytest.mark.parametrize("dist_sync_on_step", [True, False]) + def test_iou(self, reduction, preds, target, sk_metric, num_classes, ddp, dist_sync_on_step): + average = "macro" if reduction == "elementwise_mean" else None # convert tags + self.run_class_metric_test( + ddp=ddp, + preds=preds, + target=target, + metric_class=IoU, + sk_metric=partial(sk_metric, average=average), + dist_sync_on_step=dist_sync_on_step, + metric_args={"num_classes": num_classes, "threshold": THRESHOLD, "reduction": reduction}, + ) + + def test_iou_functional(self, reduction, preds, target, sk_metric, num_classes): + average = "macro" if reduction == "elementwise_mean" else None # convert tags + self.run_functional_metric_test( + preds, + target, + metric_functional=iou, + sk_metric=partial(sk_metric, average=average), + metric_args={"num_classes": num_classes, "threshold": THRESHOLD, "reduction": reduction}, + ) + + def test_iou_differentiability(self, reduction, preds, target, sk_metric, num_classes): + self.run_differentiability_test( + preds=preds, + target=target, + metric_module=IoU, + metric_functional=iou, + metric_args={"num_classes": num_classes, "threshold": THRESHOLD, "reduction": reduction}, + ) + + +@pytest.mark.parametrize( + ["half_ones", "reduction", "ignore_index", "expected"], + [ + pytest.param(False, "none", None, Tensor([1, 1, 1])), + pytest.param(False, "elementwise_mean", None, Tensor([1])), + pytest.param(False, "none", 0, Tensor([1, 1])), + pytest.param(True, "none", None, Tensor([0.5, 0.5, 0.5])), + pytest.param(True, "elementwise_mean", None, Tensor([0.5])), + pytest.param(True, "none", 0, Tensor([2 / 3, 1 / 2])), + ], +) +def test_iou(half_ones, reduction, ignore_index, expected): + preds = (B.arange(120) % 3).view(-1, 1) + target = (B.arange(120) % 3).view(-1, 1) + if half_ones: + preds[:60] = 1 + iou_val = iou( + preds=preds, + target=target, + ignore_index=ignore_index, + reduction=reduction, + ) + assert B.allclose(iou_val, expected, atol=1e-9) + + +# test `absent_score` +@pytest.mark.parametrize( + ["pred", "target", "ignore_index", "absent_score", "num_classes", "expected"], + [ + # Note that -1 is used as the absent_score in almost all tests here to distinguish it from the range of valid + # scores the function can return ([0., 1.] range, inclusive). + # 2 classes, class 0 is correct everywhere, class 1 is absent. + pytest.param([0], [0], None, -1.0, 2, [1.0, -1.0]), + pytest.param([0, 0], [0, 0], None, -1.0, 2, [1.0, -1.0]), + # absent_score not applied if only class 0 is present and it's the only class. + pytest.param([0], [0], None, -1.0, 1, [1.0]), + # 2 classes, class 1 is correct everywhere, class 0 is absent. + pytest.param([1], [1], None, -1.0, 2, [-1.0, 1.0]), + pytest.param([1, 1], [1, 1], None, -1.0, 2, [-1.0, 1.0]), + # When 0 index ignored, class 0 does not get a score (not even the absent_score). + pytest.param([1], [1], 0, -1.0, 2, [1.0]), + # 3 classes. Only 0 and 2 are present, and are perfectly predicted. 1 should get absent_score. + pytest.param([0, 2], [0, 2], None, -1.0, 3, [1.0, -1.0, 1.0]), + pytest.param([2, 0], [2, 0], None, -1.0, 3, [1.0, -1.0, 1.0]), + # 3 classes. Only 0 and 1 are present, and are perfectly predicted. 2 should get absent_score. + pytest.param([0, 1], [0, 1], None, -1.0, 3, [1.0, 1.0, -1.0]), + pytest.param([1, 0], [1, 0], None, -1.0, 3, [1.0, 1.0, -1.0]), + # 3 classes, class 0 is 0.5 IoU, class 1 is 0 IoU (in pred but not target; should not get absent_score), class + # 2 is absent. + pytest.param([0, 1], [0, 0], None, -1.0, 3, [0.5, 0.0, -1.0]), + # 3 classes, class 0 is 0.5 IoU, class 1 is 0 IoU (in target but not pred; should not get absent_score), class + # 2 is absent. + pytest.param([0, 0], [0, 1], None, -1.0, 3, [0.5, 0.0, -1.0]), + # Sanity checks with absent_score of 1.0. + pytest.param([0, 2], [0, 2], None, 1.0, 3, [1.0, 1.0, 1.0]), + pytest.param([0, 2], [0, 2], 0, 1.0, 3, [1.0, 1.0]), + ], +) +def test_iou_absent_score(pred, target, ignore_index, absent_score, num_classes, expected): + iou_val = iou( + preds=tensor(pred), + target=tensor(target), + ignore_index=ignore_index, + absent_score=absent_score, + num_classes=num_classes, + reduction="none", + ) + assert B.allclose(iou_val, tensor(expected).to(iou_val)) + + +# example data taken from +# https://github.com/scikit-learn/scikit-learn/blob/master/sklearn/metrics/tests/test_ranking.py +@pytest.mark.parametrize( + ["pred", "target", "ignore_index", "num_classes", "reduction", "expected"], + [ + # Ignoring an index outside of [0, num_classes-1] should have no effect. + pytest.param([0, 1, 1, 2, 2], [0, 1, 2, 2, 2], None, 3, "none", [1, 1 / 2, 2 / 3]), + pytest.param([0, 1, 1, 2, 2], [0, 1, 2, 2, 2], -1, 3, "none", [1, 1 / 2, 2 / 3]), + pytest.param([0, 1, 1, 2, 2], [0, 1, 2, 2, 2], 255, 3, "none", [1, 1 / 2, 2 / 3]), + # Ignoring a valid index drops only that index from the result. + pytest.param([0, 1, 1, 2, 2], [0, 1, 2, 2, 2], 0, 3, "none", [1 / 2, 2 / 3]), + pytest.param([0, 1, 1, 2, 2], [0, 1, 2, 2, 2], 1, 3, "none", [1, 2 / 3]), + pytest.param([0, 1, 1, 2, 2], [0, 1, 2, 2, 2], 2, 3, "none", [1, 1]), + # When reducing to mean or sum, the ignored index does not contribute to the output. + pytest.param([0, 1, 1, 2, 2], [0, 1, 2, 2, 2], 0, 3, "elementwise_mean", [7 / 12]), + pytest.param([0, 1, 1, 2, 2], [0, 1, 2, 2, 2], 0, 3, "sum", [7 / 6]), + ], +) +def test_iou_ignore_index(pred, target, ignore_index, num_classes, reduction, expected): + iou_val = iou( + preds=tensor(pred), + target=tensor(target), + ignore_index=ignore_index, + num_classes=num_classes, + reduction=reduction, + ) + assert B.allclose(iou_val, tensor(expected).to(iou_val)) diff --git a/RE/paddlemetric/src/tests/classification/test_kl_divergence.py b/RE/paddlemetric/src/tests/classification/test_kl_divergence.py new file mode 100644 index 00000000..b5137c3a --- /dev/null +++ b/RE/paddlemetric/src/tests/classification/test_kl_divergence.py @@ -0,0 +1,114 @@ +# Copyright The PyTorch Lightning team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from collections import namedtuple +from functools import partial +from typing import Optional + +import numpy as np +import pytest +import paddleext.torchapi as B +from scipy.stats import entropy +from paddleext.torchapi import Tensor + +from tests.helpers import seed_all +from tests.helpers.testers import BATCH_SIZE, EXTRA_DIM, NUM_BATCHES, MetricTester +from paddlemetrics.classification import KLDivergence +from paddlemetrics.functional import kl_divergence + +seed_all(42) + +Input = namedtuple("Input", ["p", "q"]) + +_probs_inputs = Input( + p=B.rand(NUM_BATCHES, BATCH_SIZE, EXTRA_DIM), + q=B.rand(NUM_BATCHES, BATCH_SIZE, EXTRA_DIM), +) + +_log_probs_inputs = Input( + p=B.rand(NUM_BATCHES, BATCH_SIZE, EXTRA_DIM).softmax(dim=-1).log(), + q=B.rand(NUM_BATCHES, BATCH_SIZE, EXTRA_DIM).softmax(dim=-1).log(), +) + + +def _sk_metric(p: Tensor, q: Tensor, log_prob: bool, reduction: Optional[str] = "mean"): + if log_prob: + p = p.softmax(dim=-1) + q = q.softmax(dim=-1) + res = entropy(p, q, axis=1) + if reduction == "mean": + return np.mean(res) + if reduction == "sum": + return np.sum(res) + return res + + +@pytest.mark.parametrize("reduction", ["mean", "sum"]) +@pytest.mark.parametrize( + "p, q, log_prob", [(_probs_inputs.p, _probs_inputs.q, False), (_log_probs_inputs.p, _log_probs_inputs.q, True)] +) +class TestKLDivergence(MetricTester): + atol = 1e-6 + + @pytest.mark.parametrize("ddp", [True, False]) + @pytest.mark.parametrize("dist_sync_on_step", [True, False]) + def test_kldivergence(self, reduction, p, q, log_prob, ddp, dist_sync_on_step): + self.run_class_metric_test( + ddp, + p, + q, + KLDivergence, + partial(_sk_metric, log_prob=log_prob, reduction=reduction), + dist_sync_on_step, + metric_args=dict(log_prob=log_prob, reduction=reduction), + ) + + def test_kldivergence_functional(self, reduction, p, q, log_prob): + # todo: `num_outputs` is unused + self.run_functional_metric_test( + p, + q, + kl_divergence, + partial(_sk_metric, log_prob=log_prob, reduction=reduction), + metric_args=dict(log_prob=log_prob, reduction=reduction), + ) + + def test_kldivergence_differentiability(self, reduction, p, q, log_prob): + self.run_differentiability_test( + p, + q, + metric_module=KLDivergence, + metric_functional=kl_divergence, + metric_args=dict(log_prob=log_prob, reduction=reduction), + ) + + # KLDivergence half + cpu does not work due to missing support in B.clamp + @pytest.mark.xfail(reason="KLDivergence metric does not support cpu + half precision") + def test_kldivergence_half_cpu(self, reduction, p, q, log_prob): + self.run_precision_test_cpu(p, q, KLDivergence, kl_divergence, {"log_prob": log_prob, "reduction": reduction}) + + @pytest.mark.skipif(not B.cuda.is_available(), reason="test requires cuda") + def test_r2_half_gpu(self, reduction, p, q, log_prob): + self.run_precision_test_gpu(p, q, KLDivergence, kl_divergence, {"log_prob": log_prob, "reduction": reduction}) + + +def test_error_on_different_shape(): + metric = KLDivergence() + with pytest.raises(RuntimeError, match="Predictions and targets are expected to have the same shape"): + metric(B.randn(100), B.randn(50)) + + +def test_error_on_multidim_tensors(): + metric = KLDivergence() + with pytest.raises(ValueError, match="Expected both p and q distribution to be 2D but got 3 and 3 respectively"): + metric(B.randn(10, 20, 5), B.randn(10, 20, 5)) diff --git a/RE/paddlemetric/src/tests/classification/test_matthews_corrcoef.py b/RE/paddlemetric/src/tests/classification/test_matthews_corrcoef.py new file mode 100644 index 00000000..ce1a5a90 --- /dev/null +++ b/RE/paddlemetric/src/tests/classification/test_matthews_corrcoef.py @@ -0,0 +1,142 @@ +# Copyright The PyTorch Lightning team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import numpy as np +import pytest +import paddleext.torchapi as B +from sklearn.metrics import matthews_corrcoef as sk_matthews_corrcoef + +from tests.classification.inputs import _input_binary, _input_binary_prob +from tests.classification.inputs import _input_multiclass as _input_mcls +from tests.classification.inputs import _input_multiclass_prob as _input_mcls_prob +from tests.classification.inputs import _input_multidim_multiclass as _input_mdmc +from tests.classification.inputs import _input_multidim_multiclass_prob as _input_mdmc_prob +from tests.classification.inputs import _input_multilabel as _input_mlb +from tests.classification.inputs import _input_multilabel_prob as _input_mlb_prob +from tests.helpers import seed_all +from tests.helpers.testers import NUM_CLASSES, THRESHOLD, MetricTester +from paddlemetrics.classification.matthews_corrcoef import MatthewsCorrcoef +from paddlemetrics.functional.classification.matthews_corrcoef import matthews_corrcoef + +seed_all(42) + + +def _sk_matthews_corrcoef_binary_prob(preds, target): + sk_preds = (preds.view(-1).numpy() >= THRESHOLD).astype(np.uint8) + sk_target = target.view(-1).numpy() + + return sk_matthews_corrcoef(y_true=sk_target, y_pred=sk_preds) + + +def _sk_matthews_corrcoef_binary(preds, target): + sk_preds = preds.view(-1).numpy() + sk_target = target.view(-1).numpy() + + return sk_matthews_corrcoef(y_true=sk_target, y_pred=sk_preds) + + +def _sk_matthews_corrcoef_multilabel_prob(preds, target): + sk_preds = (preds.view(-1).numpy() >= THRESHOLD).astype(np.uint8) + sk_target = target.view(-1).numpy() + + return sk_matthews_corrcoef(y_true=sk_target, y_pred=sk_preds) + + +def _sk_matthews_corrcoef_multilabel(preds, target): + sk_preds = preds.view(-1).numpy() + sk_target = target.view(-1).numpy() + + return sk_matthews_corrcoef(y_true=sk_target, y_pred=sk_preds) + + +def _sk_matthews_corrcoef_multiclass_prob(preds, target): + sk_preds = B.argmax(preds, dim=len(preds.shape) - 1).view(-1).numpy() + sk_target = target.view(-1).numpy() + + return sk_matthews_corrcoef(y_true=sk_target, y_pred=sk_preds) + + +def _sk_matthews_corrcoef_multiclass(preds, target): + sk_preds = preds.view(-1).numpy() + sk_target = target.view(-1).numpy() + + return sk_matthews_corrcoef(y_true=sk_target, y_pred=sk_preds) + + +def _sk_matthews_corrcoef_multidim_multiclass_prob(preds, target): + sk_preds = B.argmax(preds, dim=len(preds.shape) - 2).view(-1).numpy() + sk_target = target.view(-1).numpy() + + return sk_matthews_corrcoef(y_true=sk_target, y_pred=sk_preds) + + +def _sk_matthews_corrcoef_multidim_multiclass(preds, target): + sk_preds = preds.view(-1).numpy() + sk_target = target.view(-1).numpy() + + return sk_matthews_corrcoef(y_true=sk_target, y_pred=sk_preds) + + +@pytest.mark.parametrize( + "preds, target, sk_metric, num_classes", + [ + (_input_binary_prob.preds, _input_binary_prob.target, _sk_matthews_corrcoef_binary_prob, 2), + (_input_binary.preds, _input_binary.target, _sk_matthews_corrcoef_binary, 2), + (_input_mlb_prob.preds, _input_mlb_prob.target, _sk_matthews_corrcoef_multilabel_prob, 2), + (_input_mlb.preds, _input_mlb.target, _sk_matthews_corrcoef_multilabel, 2), + (_input_mcls_prob.preds, _input_mcls_prob.target, _sk_matthews_corrcoef_multiclass_prob, NUM_CLASSES), + (_input_mcls.preds, _input_mcls.target, _sk_matthews_corrcoef_multiclass, NUM_CLASSES), + (_input_mdmc_prob.preds, _input_mdmc_prob.target, _sk_matthews_corrcoef_multidim_multiclass_prob, NUM_CLASSES), + (_input_mdmc.preds, _input_mdmc.target, _sk_matthews_corrcoef_multidim_multiclass, NUM_CLASSES), + ], +) +class TestMatthewsCorrCoef(MetricTester): + @pytest.mark.parametrize("ddp", [True, False]) + @pytest.mark.parametrize("dist_sync_on_step", [True, False]) + def test_matthews_corrcoef(self, preds, target, sk_metric, num_classes, ddp, dist_sync_on_step): + self.run_class_metric_test( + ddp=ddp, + preds=preds, + target=target, + metric_class=MatthewsCorrcoef, + sk_metric=sk_metric, + dist_sync_on_step=dist_sync_on_step, + metric_args={ + "num_classes": num_classes, + "threshold": THRESHOLD, + }, + ) + + def test_matthews_corrcoef_functional(self, preds, target, sk_metric, num_classes): + self.run_functional_metric_test( + preds, + target, + metric_functional=matthews_corrcoef, + sk_metric=sk_metric, + metric_args={ + "num_classes": num_classes, + "threshold": THRESHOLD, + }, + ) + + def test_matthews_corrcoef_differentiability(self, preds, target, sk_metric, num_classes): + self.run_differentiability_test( + preds=preds, + target=target, + metric_module=MatthewsCorrcoef, + metric_functional=matthews_corrcoef, + metric_args={ + "num_classes": num_classes, + "threshold": THRESHOLD, + }, + ) diff --git a/RE/paddlemetric/src/tests/classification/test_precision_recall.py b/RE/paddlemetric/src/tests/classification/test_precision_recall.py new file mode 100644 index 00000000..981b44ab --- /dev/null +++ b/RE/paddlemetric/src/tests/classification/test_precision_recall.py @@ -0,0 +1,461 @@ +# Copyright The PyTorch Lightning team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from functools import partial +from typing import Callable, Optional + +import numpy as np +import pytest +import paddleext.torchapi as B +from sklearn.metrics import precision_score, recall_score +from paddleext.torchapi import Tensor, tensor + +from tests.classification.inputs import _input_binary, _input_binary_logits, _input_binary_prob +from tests.classification.inputs import _input_multiclass as _input_mcls +from tests.classification.inputs import _input_multiclass_logits as _input_mcls_logits +from tests.classification.inputs import _input_multiclass_prob as _input_mcls_prob +from tests.classification.inputs import _input_multiclass_with_missing_class as _input_miss_class +from tests.classification.inputs import _input_multidim_multiclass as _input_mdmc +from tests.classification.inputs import _input_multidim_multiclass_prob as _input_mdmc_prob +from tests.classification.inputs import _input_multilabel as _input_mlb +from tests.classification.inputs import _input_multilabel_logits as _input_mlb_logits +from tests.classification.inputs import _input_multilabel_prob as _input_mlb_prob +from tests.helpers import seed_all +from tests.helpers.testers import NUM_BATCHES, NUM_CLASSES, THRESHOLD, MetricTester +from paddlemetrics import Metric, Precision, Recall +from paddlemetrics.functional import precision, precision_recall, recall +from paddlemetrics.utilities.checks import _input_format_classification +from paddlemetrics.utilities.enums import AverageMethod + +seed_all(42) + + +def _sk_prec_recall(preds, target, sk_fn, num_classes, average, multiclass, ignore_index, mdmc_average=None): + # todo: `mdmc_average` is unused + if average == "none": + average = None + if num_classes == 1: + average = "binary" + + labels = list(range(num_classes)) + try: + labels.remove(ignore_index) + except ValueError: + pass + + sk_preds, sk_target, _ = _input_format_classification( + preds, target, THRESHOLD, num_classes=num_classes, multiclass=multiclass + ) + sk_preds, sk_target = sk_preds.numpy(), sk_target.numpy() + + sk_scores = sk_fn(sk_target, sk_preds, average=average, zero_division=0, labels=labels) + + if len(labels) != num_classes and not average: + sk_scores = np.insert(sk_scores, ignore_index, np.nan) + + return sk_scores + + +def _sk_prec_recall_multidim_multiclass( + preds, target, sk_fn, num_classes, average, multiclass, ignore_index, mdmc_average +): + preds, target, _ = _input_format_classification( + preds, target, threshold=THRESHOLD, num_classes=num_classes, multiclass=multiclass + ) + + if mdmc_average == "global": + preds = B.transpose(preds, 1, 2).reshape(-1, preds.shape[1]) + target = B.transpose(target, 1, 2).reshape(-1, target.shape[1]) + + return _sk_prec_recall(preds, target, sk_fn, num_classes, average, False, ignore_index) + if mdmc_average == "samplewise": + scores = [] + + for i in range(preds.shape[0]): + pred_i = preds[i, ...].T + target_i = target[i, ...].T + scores_i = _sk_prec_recall(pred_i, target_i, sk_fn, num_classes, average, False, ignore_index) + + scores.append(np.expand_dims(scores_i, 0)) + + return np.concatenate(scores).mean(axis=0) + + +@pytest.mark.parametrize("metric, fn_metric", [(Precision, precision), (Recall, recall)]) +@pytest.mark.parametrize( + "average, mdmc_average, num_classes, ignore_index, match_str", + [ + ("wrong", None, None, None, "`average`"), + ("micro", "wrong", None, None, "`mdmc"), + ("macro", None, None, None, "number of classes"), + ("macro", None, 1, 0, "ignore_index"), + ], +) +def test_wrong_params(metric, fn_metric, average, mdmc_average, num_classes, ignore_index, match_str): + with pytest.raises(ValueError, match=match_str): + metric( + average=average, + mdmc_average=mdmc_average, + num_classes=num_classes, + ignore_index=ignore_index, + ) + + with pytest.raises(ValueError, match=match_str): + fn_metric( + _input_binary.preds[0], + _input_binary.target[0], + average=average, + mdmc_average=mdmc_average, + num_classes=num_classes, + ignore_index=ignore_index, + ) + + with pytest.raises(ValueError, match=match_str): + precision_recall( + _input_binary.preds[0], + _input_binary.target[0], + average=average, + mdmc_average=mdmc_average, + num_classes=num_classes, + ignore_index=ignore_index, + ) + + +@pytest.mark.parametrize("metric_class, metric_fn", [(Recall, recall), (Precision, precision)]) +def test_zero_division(metric_class, metric_fn): + """Test that zero_division works correctly (currently should just set to 0).""" + + preds = tensor([0, 2, 1, 1]) + target = tensor([2, 1, 2, 1]) + + cl_metric = metric_class(average="none", num_classes=3) + cl_metric(preds, target) + + result_cl = cl_metric.compute() + result_fn = metric_fn(preds, target, average="none", num_classes=3) + + assert result_cl[0] == result_fn[0] == 0 + + +@pytest.mark.parametrize("metric_class, metric_fn", [(Recall, recall), (Precision, precision)]) +def test_no_support(metric_class, metric_fn): + """This tests a rare edge case, where there is only one class present. + + in target, and ignore_index is set to exactly that class - and the + average method is equal to 'weighted'. + + This would mean that the sum of weights equals zero, and would, without + taking care of this case, return NaN. However, the reduction function + should catch that and set the metric to equal the value of zero_division + in this case (zero_division is for now not configurable and equals 0). + """ + + preds = tensor([1, 1, 0, 0]) + target = tensor([0, 0, 0, 0]) + + cl_metric = metric_class(average="weighted", num_classes=2, ignore_index=0) + cl_metric(preds, target) + + result_cl = cl_metric.compute() + result_fn = metric_fn(preds, target, average="weighted", num_classes=2, ignore_index=0) + + assert result_cl == result_fn == 0 + + +@pytest.mark.parametrize( + "metric_class, metric_fn, sk_fn", [(Recall, recall, recall_score), (Precision, precision, precision_score)] +) +@pytest.mark.parametrize("average", ["micro", "macro", None, "weighted", "samples"]) +@pytest.mark.parametrize("ignore_index", [None, 0]) +@pytest.mark.parametrize( + "preds, target, num_classes, multiclass, mdmc_average, sk_wrapper", + [ + (_input_binary_logits.preds, _input_binary_logits.target, 1, None, None, _sk_prec_recall), + (_input_binary_prob.preds, _input_binary_prob.target, 1, None, None, _sk_prec_recall), + (_input_binary.preds, _input_binary.target, 1, False, None, _sk_prec_recall), + (_input_mlb_logits.preds, _input_mlb_logits.target, NUM_CLASSES, None, None, _sk_prec_recall), + (_input_mlb_prob.preds, _input_mlb_prob.target, NUM_CLASSES, None, None, _sk_prec_recall), + (_input_mlb.preds, _input_mlb.target, NUM_CLASSES, False, None, _sk_prec_recall), + (_input_mcls_logits.preds, _input_mcls_logits.target, NUM_CLASSES, None, None, _sk_prec_recall), + (_input_mcls_prob.preds, _input_mcls_prob.target, NUM_CLASSES, None, None, _sk_prec_recall), + (_input_mcls.preds, _input_mcls.target, NUM_CLASSES, None, None, _sk_prec_recall), + (_input_mdmc.preds, _input_mdmc.target, NUM_CLASSES, None, "global", _sk_prec_recall_multidim_multiclass), + ( + _input_mdmc_prob.preds, + _input_mdmc_prob.target, + NUM_CLASSES, + None, + "global", + _sk_prec_recall_multidim_multiclass, + ), + (_input_mdmc.preds, _input_mdmc.target, NUM_CLASSES, None, "samplewise", _sk_prec_recall_multidim_multiclass), + ( + _input_mdmc_prob.preds, + _input_mdmc_prob.target, + NUM_CLASSES, + None, + "samplewise", + _sk_prec_recall_multidim_multiclass, + ), + ], +) +class TestPrecisionRecall(MetricTester): + @pytest.mark.parametrize("ddp", [False]) + @pytest.mark.parametrize("dist_sync_on_step", [False]) + def test_precision_recall_class( + self, + ddp: bool, + dist_sync_on_step: bool, + preds: Tensor, + target: Tensor, + sk_wrapper: Callable, + metric_class: Metric, + metric_fn: Callable, + sk_fn: Callable, + multiclass: Optional[bool], + num_classes: Optional[int], + average: str, + mdmc_average: Optional[str], + ignore_index: Optional[int], + ): + # todo: `metric_fn` is unused + if num_classes == 1 and average != "micro": + pytest.skip("Only test binary data for 'micro' avg (equivalent of 'binary' in sklearn)") + + if ignore_index is not None and preds.ndim == 2: + pytest.skip("Skipping ignore_index test with binary inputs.") + + if average == "weighted" and ignore_index is not None and mdmc_average is not None: + pytest.skip("Ignore special case where we are ignoring entire sample for 'weighted' average") + + self.run_class_metric_test( + ddp=ddp, + preds=preds, + target=target, + metric_class=metric_class, + sk_metric=partial( + sk_wrapper, + sk_fn=sk_fn, + average=average, + num_classes=num_classes, + multiclass=multiclass, + ignore_index=ignore_index, + mdmc_average=mdmc_average, + ), + dist_sync_on_step=dist_sync_on_step, + metric_args={ + "num_classes": num_classes, + "average": average, + "threshold": THRESHOLD, + "multiclass": multiclass, + "ignore_index": ignore_index, + "mdmc_average": mdmc_average, + }, + check_dist_sync_on_step=True, + check_batch=True, + ) + + def test_precision_recall_fn( + self, + preds: Tensor, + target: Tensor, + sk_wrapper: Callable, + metric_class: Metric, + metric_fn: Callable, + sk_fn: Callable, + multiclass: Optional[bool], + num_classes: Optional[int], + average: str, + mdmc_average: Optional[str], + ignore_index: Optional[int], + ): + # todo: `metric_class` is unused + if num_classes == 1 and average != "micro": + pytest.skip("Only test binary data for 'micro' avg (equivalent of 'binary' in sklearn)") + + if ignore_index is not None and preds.ndim == 2: + pytest.skip("Skipping ignore_index test with binary inputs.") + + if average == "weighted" and ignore_index is not None and mdmc_average is not None: + pytest.skip("Ignore special case where we are ignoring entire sample for 'weighted' average") + + self.run_functional_metric_test( + preds, + target, + metric_functional=metric_fn, + sk_metric=partial( + sk_wrapper, + sk_fn=sk_fn, + average=average, + num_classes=num_classes, + multiclass=multiclass, + ignore_index=ignore_index, + mdmc_average=mdmc_average, + ), + metric_args={ + "num_classes": num_classes, + "average": average, + "threshold": THRESHOLD, + "multiclass": multiclass, + "ignore_index": ignore_index, + "mdmc_average": mdmc_average, + }, + ) + + def test_precision_recall_differentiability( + self, + preds: Tensor, + target: Tensor, + sk_wrapper: Callable, + metric_class: Metric, + metric_fn: Callable, + sk_fn: Callable, + multiclass: Optional[bool], + num_classes: Optional[int], + average: str, + mdmc_average: Optional[str], + ignore_index: Optional[int], + ): + # todo: `metric_class` is unused + if num_classes == 1 and average != "micro": + pytest.skip("Only test binary data for 'micro' avg (equivalent of 'binary' in sklearn)") + + if ignore_index is not None and preds.ndim == 2: + pytest.skip("Skipping ignore_index test with binary inputs.") + + if average == "weighted" and ignore_index is not None and mdmc_average is not None: + pytest.skip("Ignore special case where we are ignoring entire sample for 'weighted' average") + + self.run_differentiability_test( + preds=preds, + target=target, + metric_module=metric_class, + metric_functional=metric_fn, + metric_args={ + "num_classes": num_classes, + "average": average, + "threshold": THRESHOLD, + "multiclass": multiclass, + "ignore_index": ignore_index, + "mdmc_average": mdmc_average, + }, + ) + + +@pytest.mark.parametrize("average", ["micro", "macro", None, "weighted", "samples"]) +def test_precision_recall_joint(average): + """A simple test of the joint precision_recall metric. + + No need to test this thorougly, as it is just a combination of precision and recall, which are already tested + thoroughly. + """ + + precision_result = precision( + _input_mcls_prob.preds[0], _input_mcls_prob.target[0], average=average, num_classes=NUM_CLASSES + ) + recall_result = recall( + _input_mcls_prob.preds[0], _input_mcls_prob.target[0], average=average, num_classes=NUM_CLASSES + ) + + prec_recall_result = precision_recall( + _input_mcls_prob.preds[0], _input_mcls_prob.target[0], average=average, num_classes=NUM_CLASSES + ) + + assert B.allclose(precision_result, prec_recall_result[0]) + assert B.allclose(recall_result, prec_recall_result[1]) + + +_mc_k_target = tensor([0, 1, 2]) +_mc_k_preds = tensor([[0.35, 0.4, 0.25], [0.1, 0.5, 0.4], [0.2, 0.1, 0.7]]) +_ml_k_target = tensor([[0, 1, 0], [1, 1, 0], [0, 0, 0]]) +_ml_k_preds = tensor([[0.9, 0.2, 0.75], [0.1, 0.7, 0.8], [0.6, 0.1, 0.7]]) + + +@pytest.mark.parametrize("metric_class, metric_fn", [(Recall, recall), (Precision, precision)]) +@pytest.mark.parametrize( + "k, preds, target, average, expected_prec, expected_recall", + [ + (1, _mc_k_preds, _mc_k_target, "micro", tensor(2 / 3), tensor(2 / 3)), + (2, _mc_k_preds, _mc_k_target, "micro", tensor(1 / 2), tensor(1.0)), + (1, _ml_k_preds, _ml_k_target, "micro", tensor(0.0), tensor(0.0)), + (2, _ml_k_preds, _ml_k_target, "micro", tensor(1 / 6), tensor(1 / 3)), + ], +) +def test_top_k( + metric_class, + metric_fn, + k: int, + preds: Tensor, + target: Tensor, + average: str, + expected_prec: Tensor, + expected_recall: Tensor, +): + """A simple test to check that top_k works as expected. + + Just a sanity check, the tests in StatScores should already guarantee the correctness of results. + """ + + class_metric = metric_class(top_k=k, average=average, num_classes=3) + class_metric.update(preds, target) + + if metric_class.__name__ == "Precision": + result = expected_prec + else: + result = expected_recall + + assert B.equal(class_metric.compute(), result) + assert B.equal(metric_fn(preds, target, top_k=k, average=average, num_classes=3), result) + + +@pytest.mark.parametrize("metric_class, metric_fn", [(Precision, precision), (Recall, recall)]) +@pytest.mark.parametrize( + "ignore_index, expected", [(None, B.tensor([1.0, np.nan])), (0, B.tensor([np.nan, np.nan]))] +) +def test_class_not_present(metric_class, metric_fn, ignore_index, expected): + """This tests that when metric is computed per class and a given class is not present in both the `preds` and + `target`, the resulting score is `nan`.""" + preds = B.tensor([0, 0, 0]) + target = B.tensor([0, 0, 0]) + num_classes = 2 + + # test functional + result_fn = metric_fn(preds, target, average=AverageMethod.NONE, num_classes=num_classes, ignore_index=ignore_index) + assert B.allclose(expected, result_fn, equal_nan=True) + + # test class + cl_metric = metric_class(average=AverageMethod.NONE, num_classes=num_classes, ignore_index=ignore_index) + cl_metric(preds, target) + result_cl = cl_metric.compute() + assert B.allclose(expected, result_cl, equal_nan=True) + + +@pytest.mark.parametrize("average", ["micro", "macro", "weighted"]) +@pytest.mark.parametrize( + "metric_class, metric_functional, sk_fn", [(Precision, precision, precision_score), (Recall, recall, recall_score)] +) +def test_same_input(metric_class, metric_functional, sk_fn, average): + preds = _input_miss_class.preds + target = _input_miss_class.target + preds_flat = B.cat(list(preds), dim=0) + target_flat = B.cat(list(target), dim=0) + + mc = metric_class(num_classes=NUM_CLASSES, average=average) + for i in range(NUM_BATCHES): + mc.update(preds[i], target[i]) + class_res = mc.compute() + func_res = metric_functional(preds_flat, target_flat, num_classes=NUM_CLASSES, average=average) + sk_res = sk_fn(target_flat, preds_flat, average=average, zero_division=1) + + assert B.allclose(class_res, B.tensor(sk_res).float()) + assert B.allclose(func_res, B.tensor(sk_res).float()) diff --git a/RE/paddlemetric/src/tests/classification/test_precision_recall_curve.py b/RE/paddlemetric/src/tests/classification/test_precision_recall_curve.py new file mode 100644 index 00000000..acd555ca --- /dev/null +++ b/RE/paddlemetric/src/tests/classification/test_precision_recall_curve.py @@ -0,0 +1,121 @@ +# Copyright The PyTorch Lightning team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from functools import partial + +import numpy as np +import pytest +import paddleext.torchapi as B +from sklearn.metrics import precision_recall_curve as sk_precision_recall_curve +from paddleext.torchapi import tensor + +from tests.classification.inputs import _input_binary_prob +from tests.classification.inputs import _input_multiclass_prob as _input_mcls_prob +from tests.classification.inputs import _input_multidim_multiclass_prob as _input_mdmc_prob +from tests.helpers import seed_all +from tests.helpers.testers import NUM_CLASSES, MetricTester +from paddlemetrics.classification.precision_recall_curve import PrecisionRecallCurve +from paddlemetrics.functional import precision_recall_curve + +seed_all(42) + + +def _sk_precision_recall_curve(y_true, probas_pred, num_classes=1): + """Adjusted comparison function that can also handles multiclass.""" + if num_classes == 1: + return sk_precision_recall_curve(y_true, probas_pred) + + precision, recall, thresholds = [], [], [] + for i in range(num_classes): + y_true_temp = np.zeros_like(y_true) + y_true_temp[y_true == i] = 1 + res = sk_precision_recall_curve(y_true_temp, probas_pred[:, i]) + precision.append(res[0]) + recall.append(res[1]) + thresholds.append(res[2]) + return precision, recall, thresholds + + +def _sk_prec_rc_binary_prob(preds, target, num_classes=1): + sk_preds = preds.view(-1).numpy() + sk_target = target.view(-1).numpy() + + return _sk_precision_recall_curve(y_true=sk_target, probas_pred=sk_preds, num_classes=num_classes) + + +def _sk_prec_rc_multiclass_prob(preds, target, num_classes=1): + sk_preds = preds.reshape(-1, num_classes).numpy() + sk_target = target.view(-1).numpy() + + return _sk_precision_recall_curve(y_true=sk_target, probas_pred=sk_preds, num_classes=num_classes) + + +def _sk_prec_rc_multidim_multiclass_prob(preds, target, num_classes=1): + sk_preds = preds.transpose(0, 1).reshape(num_classes, -1).transpose(0, 1).numpy() + sk_target = target.view(-1).numpy() + return _sk_precision_recall_curve(y_true=sk_target, probas_pred=sk_preds, num_classes=num_classes) + + +@pytest.mark.parametrize( + "preds, target, sk_metric, num_classes", + [ + (_input_binary_prob.preds, _input_binary_prob.target, _sk_prec_rc_binary_prob, 1), + (_input_mcls_prob.preds, _input_mcls_prob.target, _sk_prec_rc_multiclass_prob, NUM_CLASSES), + (_input_mdmc_prob.preds, _input_mdmc_prob.target, _sk_prec_rc_multidim_multiclass_prob, NUM_CLASSES), + ], +) +class TestPrecisionRecallCurve(MetricTester): + @pytest.mark.parametrize("ddp", [True, False]) + @pytest.mark.parametrize("dist_sync_on_step", [True, False]) + def test_precision_recall_curve(self, preds, target, sk_metric, num_classes, ddp, dist_sync_on_step): + self.run_class_metric_test( + ddp=ddp, + preds=preds, + target=target, + metric_class=PrecisionRecallCurve, + sk_metric=partial(sk_metric, num_classes=num_classes), + dist_sync_on_step=dist_sync_on_step, + metric_args={"num_classes": num_classes}, + ) + + def test_precision_recall_curve_functional(self, preds, target, sk_metric, num_classes): + self.run_functional_metric_test( + preds, + target, + metric_functional=precision_recall_curve, + sk_metric=partial(sk_metric, num_classes=num_classes), + metric_args={"num_classes": num_classes}, + ) + + def test_precision_recall_curve_differentiability(self, preds, target, sk_metric, num_classes): + self.run_differentiability_test( + preds, + target, + metric_module=PrecisionRecallCurve, + metric_functional=precision_recall_curve, + metric_args={"num_classes": num_classes}, + ) + + +@pytest.mark.parametrize( + ["pred", "target", "expected_p", "expected_r", "expected_t"], + [pytest.param([1, 2, 3, 4], [1, 0, 0, 1], [0.5, 1 / 3, 0.5, 1.0, 1.0], [1, 0.5, 0.5, 0.5, 0.0], [1, 2, 3, 4])], +) +def test_pr_curve(pred, target, expected_p, expected_r, expected_t): + p, r, t = precision_recall_curve(tensor(pred), tensor(target)) + assert p.size() == r.size() + assert p.size(0) == t.size(0) + 1 + + assert B.allclose(p, tensor(expected_p).to(p)) + assert B.allclose(r, tensor(expected_r).to(r)) + assert B.allclose(t, tensor(expected_t).to(t)) diff --git a/RE/paddlemetric/src/tests/classification/test_roc.py b/RE/paddlemetric/src/tests/classification/test_roc.py new file mode 100644 index 00000000..efe45335 --- /dev/null +++ b/RE/paddlemetric/src/tests/classification/test_roc.py @@ -0,0 +1,146 @@ +# Copyright The PyTorch Lightning team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from functools import partial + +import numpy as np +import pytest +import paddleext.torchapi as B +from sklearn.metrics import roc_curve as sk_roc_curve +from paddleext.torchapi import tensor + +from tests.classification.inputs import _input_binary_prob +from tests.classification.inputs import _input_multiclass_prob as _input_mcls_prob +from tests.classification.inputs import _input_multidim_multiclass_prob as _input_mdmc_prob +from tests.classification.inputs import _input_multilabel_multidim_prob as _input_mlmd_prob +from tests.classification.inputs import _input_multilabel_prob as _input_mlb_prob +from tests.helpers import seed_all +from tests.helpers.testers import NUM_CLASSES, MetricTester +from paddlemetrics.classification.roc import ROC +from paddlemetrics.functional import roc + +seed_all(42) + + +def _sk_roc_curve(y_true, probas_pred, num_classes: int = 1, multilabel: bool = False): + """Adjusted comparison function that can also handles multiclass.""" + if num_classes == 1: + return sk_roc_curve(y_true, probas_pred, drop_intermediate=False) + + fpr, tpr, thresholds = [], [], [] + for i in range(num_classes): + if multilabel: + y_true_temp = y_true[:, i] + else: + y_true_temp = np.zeros_like(y_true) + y_true_temp[y_true == i] = 1 + + res = sk_roc_curve(y_true_temp, probas_pred[:, i], drop_intermediate=False) + fpr.append(res[0]) + tpr.append(res[1]) + thresholds.append(res[2]) + return fpr, tpr, thresholds + + +def _sk_roc_binary_prob(preds, target, num_classes=1): + sk_preds = preds.view(-1).numpy() + sk_target = target.view(-1).numpy() + + return _sk_roc_curve(y_true=sk_target, probas_pred=sk_preds, num_classes=num_classes) + + +def _sk_roc_multiclass_prob(preds, target, num_classes=1): + sk_preds = preds.reshape(-1, num_classes).numpy() + sk_target = target.view(-1).numpy() + + return _sk_roc_curve(y_true=sk_target, probas_pred=sk_preds, num_classes=num_classes) + + +def _sk_roc_multidim_multiclass_prob(preds, target, num_classes=1): + sk_preds = preds.transpose(0, 1).reshape(num_classes, -1).transpose(0, 1).numpy() + sk_target = target.view(-1).numpy() + return _sk_roc_curve(y_true=sk_target, probas_pred=sk_preds, num_classes=num_classes) + + +def _sk_roc_multilabel_prob(preds, target, num_classes=1): + sk_preds = preds.numpy() + sk_target = target.numpy() + return _sk_roc_curve(y_true=sk_target, probas_pred=sk_preds, num_classes=num_classes, multilabel=True) + + +def _sk_roc_multilabel_multidim_prob(preds, target, num_classes=1): + sk_preds = preds.transpose(0, 1).reshape(num_classes, -1).transpose(0, 1).numpy() + sk_target = target.transpose(0, 1).reshape(num_classes, -1).transpose(0, 1).numpy() + return _sk_roc_curve(y_true=sk_target, probas_pred=sk_preds, num_classes=num_classes, multilabel=True) + + +@pytest.mark.parametrize( + "preds, target, sk_metric, num_classes", + [ + (_input_binary_prob.preds, _input_binary_prob.target, _sk_roc_binary_prob, 1), + (_input_mcls_prob.preds, _input_mcls_prob.target, _sk_roc_multiclass_prob, NUM_CLASSES), + (_input_mdmc_prob.preds, _input_mdmc_prob.target, _sk_roc_multidim_multiclass_prob, NUM_CLASSES), + (_input_mlb_prob.preds, _input_mlb_prob.target, _sk_roc_multilabel_prob, NUM_CLASSES), + (_input_mlmd_prob.preds, _input_mlmd_prob.target, _sk_roc_multilabel_multidim_prob, NUM_CLASSES), + ], +) +class TestROC(MetricTester): + @pytest.mark.parametrize("ddp", [True, False]) + @pytest.mark.parametrize("dist_sync_on_step", [True, False]) + def test_roc(self, preds, target, sk_metric, num_classes, ddp, dist_sync_on_step): + self.run_class_metric_test( + ddp=ddp, + preds=preds, + target=target, + metric_class=ROC, + sk_metric=partial(sk_metric, num_classes=num_classes), + dist_sync_on_step=dist_sync_on_step, + metric_args={"num_classes": num_classes}, + ) + + def test_roc_functional(self, preds, target, sk_metric, num_classes): + self.run_functional_metric_test( + preds, + target, + metric_functional=roc, + sk_metric=partial(sk_metric, num_classes=num_classes), + metric_args={"num_classes": num_classes}, + ) + + def test_roc_differentiability(self, preds, target, sk_metric, num_classes): + self.run_differentiability_test( + preds, + target, + metric_module=ROC, + metric_functional=roc, + metric_args={"num_classes": num_classes}, + ) + + +@pytest.mark.parametrize( + ["pred", "target", "expected_tpr", "expected_fpr"], + [ + pytest.param([0, 1], [0, 1], [0, 1, 1], [0, 0, 1]), + pytest.param([1, 0], [0, 1], [0, 0, 1], [0, 1, 1]), + pytest.param([1, 1], [1, 0], [0, 1], [0, 1]), + pytest.param([1, 0], [1, 0], [0, 1, 1], [0, 0, 1]), + pytest.param([0.5, 0.5], [0, 1], [0, 1], [0, 1]), + ], +) +def test_roc_curve(pred, target, expected_tpr, expected_fpr): + fpr, tpr, thresh = roc(tensor(pred), tensor(target)) + + assert fpr.shape == tpr.shape + assert fpr.size(0) == thresh.size(0) + assert B.allclose(fpr, tensor(expected_fpr).to(fpr)) + assert B.allclose(tpr, tensor(expected_tpr).to(tpr)) diff --git a/RE/paddlemetric/src/tests/classification/test_specificity.py b/RE/paddlemetric/src/tests/classification/test_specificity.py new file mode 100644 index 00000000..90611d06 --- /dev/null +++ b/RE/paddlemetric/src/tests/classification/test_specificity.py @@ -0,0 +1,414 @@ +# Copyright The PyTorch Lightning team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import math +from functools import partial +from typing import Callable, Optional + +import numpy as np +import pytest +import paddleext.torchapi as B +from sklearn.metrics import multilabel_confusion_matrix +from paddleext.torchapi import Tensor, tensor + +from tests.classification.inputs import _input_binary, _input_binary_prob +from tests.classification.inputs import _input_multiclass as _input_mcls +from tests.classification.inputs import _input_multiclass_prob as _input_mcls_prob +from tests.classification.inputs import _input_multidim_multiclass as _input_mdmc +from tests.classification.inputs import _input_multidim_multiclass_prob as _input_mdmc_prob +from tests.classification.inputs import _input_multilabel as _input_mlb +from tests.classification.inputs import _input_multilabel_prob as _input_mlb_prob +from tests.helpers import seed_all +from tests.helpers.testers import NUM_CLASSES, THRESHOLD, MetricTester +from paddlemetrics import Metric, Specificity +from paddlemetrics.functional import specificity +from paddlemetrics.functional.classification.stat_scores import _reduce_stat_scores +from paddlemetrics.utilities.checks import _input_format_classification +from paddlemetrics.utilities.enums import AverageMethod + +seed_all(42) + + +def _sk_stats_score(preds, target, reduce, num_classes, multiclass, ignore_index, top_k): + preds, target, _ = _input_format_classification( + preds, target, threshold=THRESHOLD, num_classes=num_classes, multiclass=multiclass, top_k=top_k + ) + sk_preds, sk_target = preds.numpy(), target.numpy() + + if reduce != "macro" and ignore_index is not None and preds.shape[1] > 1: + sk_preds = np.delete(sk_preds, ignore_index, 1) + sk_target = np.delete(sk_target, ignore_index, 1) + + if preds.shape[1] == 1 and reduce == "samples": + sk_target = sk_target.T + sk_preds = sk_preds.T + + sk_stats = multilabel_confusion_matrix( + sk_target, sk_preds, samplewise=(reduce == "samples") and preds.shape[1] != 1 + ) + + if preds.shape[1] == 1 and reduce != "samples": + sk_stats = sk_stats[[1]].reshape(-1, 4)[:, [3, 1, 0, 2]] + else: + sk_stats = sk_stats.reshape(-1, 4)[:, [3, 1, 0, 2]] + + if reduce == "micro": + sk_stats = sk_stats.sum(axis=0, keepdims=True) + + sk_stats = np.concatenate([sk_stats, sk_stats[:, [3]] + sk_stats[:, [0]]], 1) + + if reduce == "micro": + sk_stats = sk_stats[0] + + if reduce == "macro" and ignore_index is not None and preds.shape[1]: + sk_stats[ignore_index, :] = -1 + + if reduce == "micro": + _, fp, tn, _, _ = sk_stats + else: + _, fp, tn, _ = sk_stats[:, 0], sk_stats[:, 1], sk_stats[:, 2], sk_stats[:, 3] + return fp, tn + + +def _sk_spec(preds, target, reduce, num_classes, multiclass, ignore_index, top_k=None, mdmc_reduce=None, stats=None): + + if stats: + fp, tn = stats + else: + stats = _sk_stats_score(preds, target, reduce, num_classes, multiclass, ignore_index, top_k) + fp, tn = stats + + fp, tn = tensor(fp), tensor(tn) + spec = _reduce_stat_scores( + numerator=tn, + denominator=tn + fp, + weights=None if reduce != "weighted" else tn + fp, + average=reduce, + mdmc_average=mdmc_reduce, + ) + if reduce in [None, "none"] and ignore_index is not None and preds.shape[1] > 1: + spec = spec.numpy() + spec = np.insert(spec, ignore_index, math.nan) + spec = tensor(spec) + + return spec + + +def _sk_spec_mdim_mcls(preds, target, reduce, mdmc_reduce, num_classes, multiclass, ignore_index, top_k=None): + preds, target, _ = _input_format_classification( + preds, target, threshold=THRESHOLD, num_classes=num_classes, multiclass=multiclass, top_k=top_k + ) + + if mdmc_reduce == "global": + preds = B.transpose(preds, 1, 2).reshape(-1, preds.shape[1]) + target = B.transpose(target, 1, 2).reshape(-1, target.shape[1]) + return _sk_spec(preds, target, reduce, num_classes, False, ignore_index, top_k, mdmc_reduce) + fp, tn = [], [] + stats = [] + + for i in range(preds.shape[0]): + pred_i = preds[i, ...].T + target_i = target[i, ...].T + fp_i, tn_i = _sk_stats_score(pred_i, target_i, reduce, num_classes, False, ignore_index, top_k) + fp.append(fp_i) + tn.append(tn_i) + + stats.append(fp) + stats.append(tn) + return _sk_spec(preds[0], target[0], reduce, num_classes, multiclass, ignore_index, top_k, mdmc_reduce, stats) + + +@pytest.mark.parametrize("metric, fn_metric", [(Specificity, specificity)]) +@pytest.mark.parametrize( + "average, mdmc_average, num_classes, ignore_index, match_str", + [ + ("wrong", None, None, None, "`average`"), + ("micro", "wrong", None, None, "`mdmc"), + ("macro", None, None, None, "number of classes"), + ("macro", None, 1, 0, "ignore_index"), + ], +) +def test_wrong_params(metric, fn_metric, average, mdmc_average, num_classes, ignore_index, match_str): + with pytest.raises(ValueError, match=match_str): + metric( + average=average, + mdmc_average=mdmc_average, + num_classes=num_classes, + ignore_index=ignore_index, + ) + + with pytest.raises(ValueError, match=match_str): + fn_metric( + _input_binary.preds[0], + _input_binary.target[0], + average=average, + mdmc_average=mdmc_average, + num_classes=num_classes, + ignore_index=ignore_index, + ) + + +@pytest.mark.parametrize("metric_class, metric_fn", [(Specificity, specificity)]) +def test_zero_division(metric_class, metric_fn): + """Test that zero_division works correctly (currently should just set to 0).""" + + preds = tensor([1, 2, 1, 1]) + target = tensor([0, 0, 0, 0]) + + cl_metric = metric_class(average="none", num_classes=3) + cl_metric(preds, target) + + result_cl = cl_metric.compute() + result_fn = metric_fn(preds, target, average="none", num_classes=3) + + assert result_cl[0] == result_fn[0] == 0 + + +@pytest.mark.parametrize("metric_class, metric_fn", [(Specificity, specificity)]) +def test_no_support(metric_class, metric_fn): + """This tests a rare edge case, where there is only one class present. + + in target, and ignore_index is set to exactly that class - and the + average method is equal to 'weighted'. + + This would mean that the sum of weights equals zero, and would, without + taking care of this case, return NaN. However, the reduction function + should catch that and set the metric to equal the value of zero_division + in this case (zero_division is for now not configurable and equals 0). + """ + + preds = tensor([1, 1, 0, 0]) + target = tensor([0, 0, 0, 0]) + + cl_metric = metric_class(average="weighted", num_classes=2, ignore_index=1) + cl_metric(preds, target) + + result_cl = cl_metric.compute() + result_fn = metric_fn(preds, target, average="weighted", num_classes=2, ignore_index=1) + + assert result_cl == result_fn == 0 + + +@pytest.mark.parametrize("metric_class, metric_fn", [(Specificity, specificity)]) +@pytest.mark.parametrize("average", ["micro", "macro", None, "weighted", "samples"]) +@pytest.mark.parametrize("ignore_index", [None, 0]) +@pytest.mark.parametrize( + "preds, target, num_classes, multiclass, mdmc_average, sk_wrapper", + [ + (_input_binary_prob.preds, _input_binary_prob.target, 1, None, None, _sk_spec), + (_input_binary.preds, _input_binary.target, 1, False, None, _sk_spec), + (_input_mlb_prob.preds, _input_mlb_prob.target, NUM_CLASSES, None, None, _sk_spec), + (_input_mlb.preds, _input_mlb.target, NUM_CLASSES, False, None, _sk_spec), + (_input_mcls_prob.preds, _input_mcls_prob.target, NUM_CLASSES, None, None, _sk_spec), + (_input_mcls.preds, _input_mcls.target, NUM_CLASSES, None, None, _sk_spec), + (_input_mdmc.preds, _input_mdmc.target, NUM_CLASSES, None, "global", _sk_spec_mdim_mcls), + (_input_mdmc_prob.preds, _input_mdmc_prob.target, NUM_CLASSES, None, "global", _sk_spec_mdim_mcls), + (_input_mdmc.preds, _input_mdmc.target, NUM_CLASSES, None, "samplewise", _sk_spec_mdim_mcls), + (_input_mdmc_prob.preds, _input_mdmc_prob.target, NUM_CLASSES, None, "samplewise", _sk_spec_mdim_mcls), + ], +) +class TestSpecificity(MetricTester): + @pytest.mark.parametrize("ddp", [False, True]) + @pytest.mark.parametrize("dist_sync_on_step", [True, False]) + def test_specificity_class( + self, + ddp: bool, + dist_sync_on_step: bool, + preds: Tensor, + target: Tensor, + sk_wrapper: Callable, + metric_class: Metric, + metric_fn: Callable, + multiclass: Optional[bool], + num_classes: Optional[int], + average: str, + mdmc_average: Optional[str], + ignore_index: Optional[int], + ): + # todo: `metric_fn` is unused + if num_classes == 1 and average != "micro": + pytest.skip("Only test binary data for 'micro' avg (equivalent of 'binary' in sklearn)") + + if ignore_index is not None and preds.ndim == 2: + pytest.skip("Skipping ignore_index test with binary inputs.") + + if average == "weighted" and ignore_index is not None and mdmc_average is not None: + pytest.skip("Ignore special case where we are ignoring entire sample for 'weighted' average") + + self.run_class_metric_test( + ddp=ddp, + preds=preds, + target=target, + metric_class=metric_class, + sk_metric=partial( + sk_wrapper, + reduce=average, + num_classes=num_classes, + multiclass=multiclass, + ignore_index=ignore_index, + mdmc_reduce=mdmc_average, + ), + dist_sync_on_step=dist_sync_on_step, + metric_args={ + "num_classes": num_classes, + "average": average, + "threshold": THRESHOLD, + "multiclass": multiclass, + "ignore_index": ignore_index, + "mdmc_average": mdmc_average, + }, + check_dist_sync_on_step=True, + check_batch=True, + ) + + def test_specificity_fn( + self, + preds: Tensor, + target: Tensor, + sk_wrapper: Callable, + metric_class: Metric, + metric_fn: Callable, + multiclass: Optional[bool], + num_classes: Optional[int], + average: str, + mdmc_average: Optional[str], + ignore_index: Optional[int], + ): + # todo: `metric_class` is unused + if num_classes == 1 and average != "micro": + pytest.skip("Only test binary data for 'micro' avg (equivalent of 'binary' in sklearn)") + + if ignore_index is not None and preds.ndim == 2: + pytest.skip("Skipping ignore_index test with binary inputs.") + + if average == "weighted" and ignore_index is not None and mdmc_average is not None: + pytest.skip("Ignore special case where we are ignoring entire sample for 'weighted' average") + + self.run_functional_metric_test( + preds, + target, + metric_functional=metric_fn, + sk_metric=partial( + sk_wrapper, + reduce=average, + num_classes=num_classes, + multiclass=multiclass, + ignore_index=ignore_index, + mdmc_reduce=mdmc_average, + ), + metric_args={ + "num_classes": num_classes, + "average": average, + "threshold": THRESHOLD, + "multiclass": multiclass, + "ignore_index": ignore_index, + "mdmc_average": mdmc_average, + }, + ) + + def test_accuracy_differentiability( + self, + preds: Tensor, + target: Tensor, + sk_wrapper: Callable, + metric_class: Metric, + metric_fn: Callable, + multiclass: Optional[bool], + num_classes: Optional[int], + average: str, + mdmc_average: Optional[str], + ignore_index: Optional[int], + ): + + if num_classes == 1 and average != "micro": + pytest.skip("Only test binary data for 'micro' avg (equivalent of 'binary' in sklearn)") + + if ignore_index is not None and preds.ndim == 2: + pytest.skip("Skipping ignore_index test with binary inputs.") + + if average == "weighted" and ignore_index is not None and mdmc_average is not None: + pytest.skip("Ignore special case where we are ignoring entire sample for 'weighted' average") + + self.run_differentiability_test( + preds=preds, + target=target, + metric_module=metric_class, + metric_functional=metric_fn, + metric_args={ + "num_classes": num_classes, + "average": average, + "threshold": THRESHOLD, + "multiclass": multiclass, + "ignore_index": ignore_index, + "mdmc_average": mdmc_average, + }, + ) + + +_mc_k_target = tensor([0, 1, 2]) +_mc_k_preds = tensor([[0.35, 0.4, 0.25], [0.1, 0.5, 0.4], [0.2, 0.1, 0.7]]) +_ml_k_target = tensor([[0, 1, 0], [1, 1, 0], [0, 0, 0]]) +_ml_k_preds = tensor([[0.9, 0.2, 0.75], [0.1, 0.7, 0.8], [0.6, 0.1, 0.7]]) + + +@pytest.mark.parametrize("metric_class, metric_fn", [(Specificity, specificity)]) +@pytest.mark.parametrize( + "k, preds, target, average, expected_spec", + [ + (1, _mc_k_preds, _mc_k_target, "micro", tensor(5 / 6)), + (2, _mc_k_preds, _mc_k_target, "micro", tensor(1 / 2)), + (1, _ml_k_preds, _ml_k_target, "micro", tensor(1 / 2)), + (2, _ml_k_preds, _ml_k_target, "micro", tensor(1 / 6)), + ], +) +def test_top_k( + metric_class, + metric_fn, + k: int, + preds: Tensor, + target: Tensor, + average: str, + expected_spec: Tensor, +): + """A simple test to check that top_k works as expected. + + Just a sanity check, the tests in StatScores should already guarantee the correctness of results. + """ + + class_metric = metric_class(top_k=k, average=average, num_classes=3) + class_metric.update(preds, target) + + assert B.equal(class_metric.compute(), expected_spec) + assert B.equal(metric_fn(preds, target, top_k=k, average=average, num_classes=3), expected_spec) + + +@pytest.mark.parametrize("metric_class, metric_fn", [(Specificity, specificity)]) +@pytest.mark.parametrize( + "ignore_index, expected", [(None, B.tensor([0.0, np.nan])), (0, B.tensor([np.nan, np.nan]))] +) +def test_class_not_present(metric_class, metric_fn, ignore_index, expected): + """This tests that when metric is computed per class and a given class is not present in both the `preds` and + `target`, the resulting score is `nan`.""" + preds = B.tensor([0, 0, 0]) + target = B.tensor([0, 0, 0]) + num_classes = 2 + + # test functional + result_fn = metric_fn(preds, target, average=AverageMethod.NONE, num_classes=num_classes, ignore_index=ignore_index) + assert B.allclose(expected, result_fn, equal_nan=True) + + # test class + cl_metric = metric_class(average=AverageMethod.NONE, num_classes=num_classes, ignore_index=ignore_index) + cl_metric(preds, target) + result_cl = cl_metric.compute() + assert B.allclose(expected, result_cl, equal_nan=True) diff --git a/RE/paddlemetric/src/tests/classification/test_stat_scores.py b/RE/paddlemetric/src/tests/classification/test_stat_scores.py new file mode 100644 index 00000000..c0e2656c --- /dev/null +++ b/RE/paddlemetric/src/tests/classification/test_stat_scores.py @@ -0,0 +1,323 @@ +# Copyright The PyTorch Lightning team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from functools import partial +from typing import Callable, Optional + +import numpy as np +import pytest +import paddleext.torchapi as B +from sklearn.metrics import multilabel_confusion_matrix +from paddleext.torchapi import Tensor, tensor + +from tests.classification.inputs import _input_binary, _input_binary_logits, _input_binary_prob, _input_multiclass +from tests.classification.inputs import _input_multiclass_logits as _input_mcls_logits +from tests.classification.inputs import _input_multiclass_prob as _input_mcls_prob +from tests.classification.inputs import _input_multidim_multiclass as _input_mdmc +from tests.classification.inputs import _input_multidim_multiclass_prob as _input_mdmc_prob +from tests.classification.inputs import _input_multilabel as _input_mcls +from tests.classification.inputs import _input_multilabel_logits as _input_mlb_logits +from tests.classification.inputs import _input_multilabel_prob as _input_mlb_prob +from tests.helpers import seed_all +from tests.helpers.testers import NUM_CLASSES, MetricTester +from paddlemetrics import StatScores +from paddlemetrics.functional import stat_scores +from paddlemetrics.utilities.checks import _input_format_classification + +seed_all(42) + + +def _sk_stat_scores(preds, target, reduce, num_classes, multiclass, ignore_index, top_k, threshold, mdmc_reduce=None): + # todo: `mdmc_reduce` is unused + preds, target, _ = _input_format_classification( + preds, target, threshold=threshold, num_classes=num_classes, multiclass=multiclass, top_k=top_k + ) + sk_preds, sk_target = preds.numpy(), target.numpy() + + if reduce != "macro" and ignore_index is not None and preds.shape[1] > 1: + sk_preds = np.delete(sk_preds, ignore_index, 1) + sk_target = np.delete(sk_target, ignore_index, 1) + + if preds.shape[1] == 1 and reduce == "samples": + sk_target = sk_target.T + sk_preds = sk_preds.T + + sk_stats = multilabel_confusion_matrix( + sk_target, sk_preds, samplewise=(reduce == "samples") and preds.shape[1] != 1 + ) + + if preds.shape[1] == 1 and reduce != "samples": + sk_stats = sk_stats[[1]].reshape(-1, 4)[:, [3, 1, 0, 2]] + else: + sk_stats = sk_stats.reshape(-1, 4)[:, [3, 1, 0, 2]] + + if reduce == "micro": + sk_stats = sk_stats.sum(axis=0, keepdims=True) + + sk_stats = np.concatenate([sk_stats, sk_stats[:, [3]] + sk_stats[:, [0]]], 1) + + if reduce == "micro": + sk_stats = sk_stats[0] + + if reduce == "macro" and ignore_index is not None and preds.shape[1]: + sk_stats[ignore_index, :] = -1 + + return sk_stats + + +def _sk_stat_scores_mdim_mcls( + preds, target, reduce, mdmc_reduce, num_classes, multiclass, ignore_index, top_k, threshold +): + preds, target, _ = _input_format_classification( + preds, target, threshold=threshold, num_classes=num_classes, multiclass=multiclass, top_k=top_k + ) + + if mdmc_reduce == "global": + preds = B.transpose(preds, 1, 2).reshape(-1, preds.shape[1]) + target = B.transpose(target, 1, 2).reshape(-1, target.shape[1]) + + return _sk_stat_scores(preds, target, reduce, None, False, ignore_index, top_k, threshold) + if mdmc_reduce == "samplewise": + scores = [] + + for i in range(preds.shape[0]): + pred_i = preds[i, ...].T + target_i = target[i, ...].T + scores_i = _sk_stat_scores(pred_i, target_i, reduce, None, False, ignore_index, top_k, threshold) + + scores.append(np.expand_dims(scores_i, 0)) + + return np.concatenate(scores) + + +@pytest.mark.parametrize( + "reduce, mdmc_reduce, num_classes, inputs, ignore_index", + [ + ["unknown", None, None, _input_binary, None], + ["micro", "unknown", None, _input_binary, None], + ["macro", None, None, _input_binary, None], + ["micro", None, None, _input_mdmc_prob, None], + ["micro", None, None, _input_binary_prob, 0], + ["micro", None, None, _input_mcls_prob, NUM_CLASSES], + ["micro", None, NUM_CLASSES, _input_mcls_prob, NUM_CLASSES], + ], +) +def test_wrong_params(reduce, mdmc_reduce, num_classes, inputs, ignore_index): + """Test a combination of parameters that are invalid and should raise an error. + + This includes invalid ``reduce`` and ``mdmc_reduce`` parameter values, not setting ``num_classes`` when + ``reduce='macro'`, not setting ``mdmc_reduce`` when inputs are multi-dim multi-class``, setting ``ignore_index`` + when inputs are binary, as well as setting ``ignore_index`` to a value higher than the number of classes. + """ + with pytest.raises(ValueError): + stat_scores( + inputs.preds[0], inputs.target[0], reduce, mdmc_reduce, num_classes=num_classes, ignore_index=ignore_index + ) + + with pytest.raises(ValueError): + sts = StatScores(reduce=reduce, mdmc_reduce=mdmc_reduce, num_classes=num_classes, ignore_index=ignore_index) + sts(inputs.preds[0], inputs.target[0]) + + +@pytest.mark.parametrize("ignore_index", [None, 0]) +@pytest.mark.parametrize("reduce", ["micro", "macro", "samples"]) +@pytest.mark.parametrize( + "preds, target, sk_fn, mdmc_reduce, num_classes, multiclass, top_k, threshold", + [ + (_input_binary_logits.preds, _input_binary_logits.target, _sk_stat_scores, None, 1, None, None, 0.0), + (_input_binary_prob.preds, _input_binary_prob.target, _sk_stat_scores, None, 1, None, None, 0.5), + (_input_binary.preds, _input_binary.target, _sk_stat_scores, None, 1, False, None, 0.5), + (_input_mlb_logits.preds, _input_mlb_logits.target, _sk_stat_scores, None, NUM_CLASSES, None, None, 0.0), + (_input_mlb_prob.preds, _input_mlb_prob.target, _sk_stat_scores, None, NUM_CLASSES, None, None, 0.5), + (_input_mlb_prob.preds, _input_mlb_prob.target, _sk_stat_scores, None, NUM_CLASSES, None, 2, 0.5), + (_input_mcls.preds, _input_mcls.target, _sk_stat_scores, None, NUM_CLASSES, False, None, 0.5), + (_input_mcls_prob.preds, _input_mcls_prob.target, _sk_stat_scores, None, NUM_CLASSES, None, None, 0.5), + (_input_mcls_logits.preds, _input_mcls_logits.target, _sk_stat_scores, None, NUM_CLASSES, None, None, 0.0), + (_input_mcls_prob.preds, _input_mcls_prob.target, _sk_stat_scores, None, NUM_CLASSES, None, 2, 0.0), + (_input_multiclass.preds, _input_multiclass.target, _sk_stat_scores, None, NUM_CLASSES, None, None, 0.0), + (_input_mdmc.preds, _input_mdmc.target, _sk_stat_scores_mdim_mcls, "samplewise", NUM_CLASSES, None, None, 0.0), + ( + _input_mdmc_prob.preds, + _input_mdmc_prob.target, + _sk_stat_scores_mdim_mcls, + "samplewise", + NUM_CLASSES, + None, + None, + 0.0, + ), + (_input_mdmc.preds, _input_mdmc.target, _sk_stat_scores_mdim_mcls, "global", NUM_CLASSES, None, None, 0.0), + ( + _input_mdmc_prob.preds, + _input_mdmc_prob.target, + _sk_stat_scores_mdim_mcls, + "global", + NUM_CLASSES, + None, + None, + 0.0, + ), + ], +) +class TestStatScores(MetricTester): + # DDP tests temporarily disabled due to hanging issues + @pytest.mark.parametrize("ddp", [False]) + @pytest.mark.parametrize("dist_sync_on_step", [False]) #True, + def test_stat_scores_class( + self, + ddp: bool, + dist_sync_on_step: bool, + sk_fn: Callable, + preds: Tensor, + target: Tensor, + reduce: str, + mdmc_reduce: Optional[str], + num_classes: Optional[int], + multiclass: Optional[bool], + ignore_index: Optional[int], + top_k: Optional[int], + threshold: Optional[float], + ): + if ignore_index is not None and preds.ndim == 2: + pytest.skip("Skipping ignore_index test with binary inputs.") + + self.run_class_metric_test( + ddp=ddp, + preds=preds, + target=target, + metric_class=StatScores, + sk_metric=partial( + sk_fn, + reduce=reduce, + mdmc_reduce=mdmc_reduce, + num_classes=num_classes, + multiclass=multiclass, + ignore_index=ignore_index, + top_k=top_k, + threshold=threshold, + ), + dist_sync_on_step=dist_sync_on_step, + metric_args={ + "num_classes": num_classes, + "reduce": reduce, + "mdmc_reduce": mdmc_reduce, + "threshold": threshold, + "multiclass": multiclass, + "ignore_index": ignore_index, + "top_k": top_k, + }, + check_dist_sync_on_step=True, + check_batch=True, + ) + + def test_stat_scores_fn( + self, + sk_fn: Callable, + preds: Tensor, + target: Tensor, + reduce: str, + mdmc_reduce: Optional[str], + num_classes: Optional[int], + multiclass: Optional[bool], + ignore_index: Optional[int], + top_k: Optional[int], + threshold: Optional[float], + ): + if ignore_index is not None and preds.ndim == 2: + pytest.skip("Skipping ignore_index test with binary inputs.") + + self.run_functional_metric_test( + preds, + target, + metric_functional=stat_scores, + sk_metric=partial( + sk_fn, + reduce=reduce, + mdmc_reduce=mdmc_reduce, + num_classes=num_classes, + multiclass=multiclass, + ignore_index=ignore_index, + top_k=top_k, + threshold=threshold, + ), + metric_args={ + "num_classes": num_classes, + "reduce": reduce, + "mdmc_reduce": mdmc_reduce, + "threshold": threshold, + "multiclass": multiclass, + "ignore_index": ignore_index, + "top_k": top_k, + }, + ) + + def test_stat_scores_differentiability( + self, + sk_fn: Callable, + preds: Tensor, + target: Tensor, + reduce: str, + mdmc_reduce: Optional[str], + num_classes: Optional[int], + multiclass: Optional[bool], + ignore_index: Optional[int], + top_k: Optional[int], + threshold: Optional[float], + ): + if ignore_index is not None and preds.ndim == 2: + pytest.skip("Skipping ignore_index test with binary inputs.") + + self.run_differentiability_test( + preds, + target, + metric_module=StatScores, + metric_functional=stat_scores, + metric_args={ + "num_classes": num_classes, + "reduce": reduce, + "mdmc_reduce": mdmc_reduce, + "threshold": threshold, + "multiclass": multiclass, + "ignore_index": ignore_index, + "top_k": top_k, + }, + ) + + +_mc_k_target = tensor([0, 1, 2]) +_mc_k_preds = tensor([[0.35, 0.4, 0.25], [0.1, 0.5, 0.4], [0.2, 0.1, 0.7]]) +_ml_k_target = tensor([[0, 1, 0], [1, 1, 0], [0, 0, 0]]) +_ml_k_preds = tensor([[0.9, 0.2, 0.75], [0.1, 0.7, 0.8], [0.6, 0.1, 0.7]]) + + +@pytest.mark.parametrize( + "k, preds, target, reduce, expected", + [ + (1, _mc_k_preds, _mc_k_target, "micro", tensor([2, 1, 5, 1, 3])), + (2, _mc_k_preds, _mc_k_target, "micro", tensor([3, 3, 3, 0, 3])), + (1, _ml_k_preds, _ml_k_target, "micro", tensor([0, 3, 3, 3, 3])), + (2, _ml_k_preds, _ml_k_target, "micro", tensor([1, 5, 1, 2, 3])), + (1, _mc_k_preds, _mc_k_target, "macro", tensor([[0, 1, 1], [0, 1, 0], [2, 1, 2], [1, 0, 0], [1, 1, 1]])), + (2, _mc_k_preds, _mc_k_target, "macro", tensor([[1, 1, 1], [1, 1, 1], [1, 1, 1], [0, 0, 0], [1, 1, 1]])), + (1, _ml_k_preds, _ml_k_target, "macro", tensor([[0, 0, 0], [1, 0, 2], [1, 1, 1], [1, 2, 0], [1, 2, 0]])), + (2, _ml_k_preds, _ml_k_target, "macro", tensor([[0, 1, 0], [2, 0, 3], [0, 1, 0], [1, 1, 0], [1, 2, 0]])), + ], +) +def test_top_k(k: int, preds: Tensor, target: Tensor, reduce: str, expected: Tensor): + """A simple test to check that top_k works as expected.""" + + class_metric = StatScores(top_k=k, reduce=reduce, num_classes=3) + class_metric.update(preds, target) + + assert B.allclose(class_metric.compute(), expected.T) + assert B.allclose(stat_scores(preds, target, top_k=k, reduce=reduce, num_classes=3), expected.T) diff --git a/RE/paddlemetric/src/tests/functional/__init__.py b/RE/paddlemetric/src/tests/functional/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/RE/paddlemetric/src/tests/functional/test_classification.py b/RE/paddlemetric/src/tests/functional/test_classification.py new file mode 100644 index 00000000..d2048387 --- /dev/null +++ b/RE/paddlemetric/src/tests/functional/test_classification.py @@ -0,0 +1,123 @@ +# Copyright The PyTorch Lightning team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import pytest +import paddleext.torchapi as B +from paddleext.torchapi import Tensor, tensor + +from tests.helpers import seed_all +from paddlemetrics.functional import dice_score +from paddlemetrics.functional.classification.precision_recall_curve import _binary_clf_curve +from paddlemetrics.utilities.data import get_num_classes, to_categorical, to_onehot + + +def test_onehot(): + test_tensor = tensor([[0, 1, 2, 3, 4], [5, 6, 7, 8, 9]]) + expected = B.stack( + [ + B.cat([B.eye(5, dtype=int), B.zeros((5, 5), dtype=int)]), + B.cat([B.zeros((5, 5), dtype=int), B.eye(5, dtype=int)]), + ] + ) + + assert tuple(test_tensor.shape) == (2, 5) + assert tuple(expected.shape) == (2, 10, 5) + + onehot_classes = to_onehot(test_tensor, num_classes=10) + onehot_no_classes = to_onehot(test_tensor) + + assert B.allclose(onehot_classes, onehot_no_classes) + + assert onehot_classes.shape == expected.shape + assert onehot_no_classes.shape == expected.shape + + assert B.allclose(expected.to(onehot_no_classes), onehot_no_classes) + assert B.allclose(expected.to(onehot_classes), onehot_classes) + + +def test_to_categorical(): + test_tensor = B.stack( + [ + B.cat([B.eye(5, dtype=int), B.zeros((5, 5), dtype=int)]), + B.cat([B.zeros((5, 5), dtype=int), B.eye(5, dtype=int)]), + ] + ).to(B.float) + + expected = tensor([[0, 1, 2, 3, 4], [5, 6, 7, 8, 9]]) + assert tuple(expected.shape) == (2, 5) + assert tuple(test_tensor.shape) == (2, 10, 5) + + result = to_categorical(test_tensor) + + assert result.shape == expected.shape + assert B.allclose(result, expected.to(result.dtype)) + + +@pytest.mark.parametrize( + ["preds", "target", "num_classes", "expected_num_classes"], + [ + pytest.param(B.rand(32, 10, 28, 28), B.randint(10, (32, 28, 28)), 10, 10), + pytest.param(B.rand(32, 10, 28, 28), B.randint(10, (32, 28, 28)), None, 10), + pytest.param(B.rand(32, 28, 28), B.randint(10, (32, 28, 28)), None, 10), + ], +) +def test_get_num_classes(preds, target, num_classes, expected_num_classes): + assert get_num_classes(preds, target, num_classes) == expected_num_classes + + +@pytest.mark.parametrize( + ["sample_weight", "pos_label", "exp_shape"], + [ + pytest.param(1, 1.0, 42), + pytest.param(None, 1.0, 42), + ], +) +def test_binary_clf_curve(sample_weight, pos_label, exp_shape): + # TODO: move back the pred and target to test func arguments + # if you fix the array inside the function, you'd also have fix the shape, + # because when the array changes, you also have to fix the shape + seed_all(0) + pred = B.randint(low=51, high=99, size=(100,), dtype=B.float) / 100 + target = tensor([0, 1] * 50, dtype=B.int) + if sample_weight is not None: + sample_weight = B.ones_like(pred) * sample_weight + + fps, tps, thresh = _binary_clf_curve(preds=pred, target=target, sample_weights=sample_weight, pos_label=pos_label) + + assert isinstance(tps, Tensor) + assert isinstance(fps, Tensor) + assert isinstance(thresh, Tensor) + if B.platform() == "torch": + assert tuple(tps.shape) == (exp_shape,) + assert tuple(fps.shape) == (exp_shape,) + assert tuple(thresh.shape) == (exp_shape,) + elif B.platform() == "paddle": + assert tuple(tps.shape) == (exp_shape - 1,) + assert tuple(fps.shape) == (exp_shape - 1,) + assert tuple(thresh.shape) == (exp_shape - 1,) + else: + raise Exception(f"unknown platform {B.platform()}") + + +@pytest.mark.parametrize( + ["pred", "target", "expected"], + [ + pytest.param([[0, 0], [1, 1]], [[0, 0], [1, 1]], 1.0), + pytest.param([[1, 1], [0, 0]], [[0, 0], [1, 1]], 0.0), + pytest.param([[1, 1], [1, 1]], [[1, 1], [0, 0]], 2 / 3), + pytest.param([[1, 1], [0, 0]], [[1, 1], [0, 0]], 1.0), + ], +) +def test_dice_score(pred, target, expected): + score = dice_score(tensor(pred), tensor(target)) + assert score == expected diff --git a/RE/paddlemetric/src/tests/functional/test_image_gradients.py b/RE/paddlemetric/src/tests/functional/test_image_gradients.py new file mode 100644 index 00000000..b4bad7f1 --- /dev/null +++ b/RE/paddlemetric/src/tests/functional/test_image_gradients.py @@ -0,0 +1,110 @@ +# Copyright The PyTorch Lightning team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import pytest +import paddleext.torchapi as B +from paddleext.torchapi import Tensor + +from paddlemetrics.functional import image_gradients + + +def test_invalid_input_img_type(): + """Test Whether the module successfully handles invalid input data type.""" + invalid_dummy_input = [[1, 2, 3], [4, 5, 6], [7, 8, 9]] + + with pytest.raises(TypeError): + image_gradients(invalid_dummy_input) + + +def test_invalid_input_ndims(): + """Test whether the module successfully handles invalid number of dimensions of input tensor.""" + + BATCH_SIZE = 1 + HEIGHT = 5 + WIDTH = 5 + CHANNELS = 1 + + image = B.arange(0, BATCH_SIZE * HEIGHT * WIDTH * CHANNELS, dtype=B.float32) + image = B.reshape(image, (HEIGHT, WIDTH)) + + with pytest.raises(RuntimeError): + image_gradients(image) + + +def test_multi_batch_image_gradients(): + """Test whether the module correctly calculates gradients for known input with non-unity batch size.Example + input-output pair taken from TF's implementation of i mage-gradients.""" + + BATCH_SIZE = 5 + HEIGHT = 5 + WIDTH = 5 + CHANNELS = 1 + + single_channel_img = B.arange(0, 1 * HEIGHT * WIDTH * CHANNELS, dtype=B.float32) + single_channel_img = B.reshape(single_channel_img, (CHANNELS, HEIGHT, WIDTH)) + image = B.stack([single_channel_img for _ in range(BATCH_SIZE)], dim=0) + + true_dy = [ + [5.0, 5.0, 5.0, 5.0, 5.0], + [5.0, 5.0, 5.0, 5.0, 5.0], + [5.0, 5.0, 5.0, 5.0, 5.0], + [5.0, 5.0, 5.0, 5.0, 5.0], + [0.0, 0.0, 0.0, 0.0, 0.0], + ] + true_dy = B.to_tensor(true_dy) + + dy, dx = image_gradients(image) + + for batch_id in range(BATCH_SIZE): + assert B.allclose(dy[batch_id, 0, :, :], true_dy) + assert tuple(dy.shape) == (BATCH_SIZE, 1, HEIGHT, WIDTH) + assert tuple(dx.shape) == (BATCH_SIZE, 1, HEIGHT, WIDTH) + + +def test_image_gradients(): + """Test whether the module correctly calculates gradients for known input. + + Example input-output pair taken from TF's implementation of image- gradients + """ + + BATCH_SIZE = 1 + HEIGHT = 5 + WIDTH = 5 + CHANNELS = 1 + + image = B.arange(0, BATCH_SIZE * HEIGHT * WIDTH * CHANNELS, dtype=B.float32) + image = B.reshape(image, (BATCH_SIZE, CHANNELS, HEIGHT, WIDTH)) + + true_dy = [ + [5.0, 5.0, 5.0, 5.0, 5.0], + [5.0, 5.0, 5.0, 5.0, 5.0], + [5.0, 5.0, 5.0, 5.0, 5.0], + [5.0, 5.0, 5.0, 5.0, 5.0], + [0.0, 0.0, 0.0, 0.0, 0.0], + ] + + true_dx = [ + [1.0, 1.0, 1.0, 1.0, 0.0], + [1.0, 1.0, 1.0, 1.0, 0.0], + [1.0, 1.0, 1.0, 1.0, 0.0], + [1.0, 1.0, 1.0, 1.0, 0.0], + [1.0, 1.0, 1.0, 1.0, 0.0], + ] + + true_dy = B.to_tensor(true_dy) + true_dx = B.to_tensor(true_dx) + + dy, dx = image_gradients(image) + + assert B.allclose(dy.squeeze(), true_dy), "dy fails test" + assert B.allclose(dx.squeeze(), true_dx), "dx fails tests" diff --git a/RE/paddlemetric/src/tests/functional/test_reduction.py b/RE/paddlemetric/src/tests/functional/test_reduction.py new file mode 100644 index 00000000..729cd5b9 --- /dev/null +++ b/RE/paddlemetric/src/tests/functional/test_reduction.py @@ -0,0 +1,41 @@ +# Copyright The PyTorch Lightning team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import pytest +import paddleext.torchapi as B + +from paddlemetrics.utilities.distributed import class_reduce, reduce + + +def test_reduce(): + start_tensor = B.rand(50, 40, 30) + + assert B.allclose(reduce(start_tensor, "elementwise_mean"), B.mean(start_tensor)) + assert B.allclose(reduce(start_tensor, "sum"), B.sum(start_tensor)) + assert B.allclose(reduce(start_tensor, "none"), start_tensor) + + with pytest.raises(ValueError): + reduce(start_tensor, "error_reduction") + + +def test_class_reduce(): + num = B.randint(1, 10, (100,)).float() + denom = B.randint(10, 20, (100,)).float() + weights = B.randint(1, 100, (100,)).float() + + assert B.allclose(class_reduce(num, denom, weights, "micro"), B.sum(num) / B.sum(denom)) + assert B.allclose(class_reduce(num, denom, weights, "macro"), B.mean(num / denom)) + assert B.allclose( + class_reduce(num, denom, weights, "weighted"), B.sum(num / denom * (weights / B.sum(weights))) + ) + assert B.allclose(class_reduce(num, denom, weights, "none"), num / denom) diff --git a/RE/paddlemetric/src/tests/functional/test_self_supervised.py b/RE/paddlemetric/src/tests/functional/test_self_supervised.py new file mode 100644 index 00000000..4c675192 --- /dev/null +++ b/RE/paddlemetric/src/tests/functional/test_self_supervised.py @@ -0,0 +1,46 @@ +# Copyright The PyTorch Lightning team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import pytest +import paddleext.torchapi as B +from sklearn.metrics import pairwise +from paddleext.torchapi import tensor + +from paddlemetrics.functional import embedding_similarity + + +@pytest.mark.parametrize("similarity", ["cosine", "dot"]) +@pytest.mark.parametrize("reduction", ["none", "mean", "sum"]) +def test_against_sklearn(similarity, reduction): + """Compare PL metrics to sklearn version.""" + device = "cuda" if B.cuda.is_available() else "cpu" + + batch = B.randn(5, 10, device=device) # 100 samples in 10 dimensions + + pl_dist = embedding_similarity(batch, similarity=similarity, reduction=reduction, zero_diagonal=False) + + def sklearn_embedding_distance(batch, similarity, reduction): + + metric_func = {"cosine": pairwise.cosine_similarity, "dot": pairwise.linear_kernel}[similarity] + + dist = metric_func(batch, batch) + if reduction == "mean": + return dist.mean(axis=-1) + if reduction == "sum": + return dist.sum(axis=-1) + return dist + + sk_dist = sklearn_embedding_distance(batch.cpu().detach().numpy(), similarity=similarity, reduction=reduction) + sk_dist = tensor(sk_dist, dtype=B.float, device=device) + + assert B.allclose(sk_dist, pl_dist) diff --git a/RE/paddlemetric/src/tests/helpers/__init__.py b/RE/paddlemetric/src/tests/helpers/__init__.py new file mode 100644 index 00000000..3773a49f --- /dev/null +++ b/RE/paddlemetric/src/tests/helpers/__init__.py @@ -0,0 +1,20 @@ +import operator +import random + +import numpy +import paddleext.torchapi as B + +from paddlemetrics.utilities.imports import _TORCH_LOWER_1_4, _TORCH_LOWER_1_5, _TORCH_LOWER_1_6, _compare_version + +_MARK_TORCH_MIN_1_4 = dict(condition=_TORCH_LOWER_1_4, reason="required PT >= 1.4") +_MARK_TORCH_MIN_1_5 = dict(condition=_TORCH_LOWER_1_5, reason="required PT >= 1.5") +_MARK_TORCH_MIN_1_6 = dict(condition=_TORCH_LOWER_1_6, reason="required PT >= 1.6") + +_LIGHTNING_GREATER_EQUAL_1_3 = _compare_version("pytorch_lightning", operator.ge, "1.3.0") + + +def seed_all(seed): + random.seed(seed) + numpy.random.seed(seed) + B.manual_seed(seed) + B.cuda.manual_seed_all(seed) diff --git a/RE/paddlemetric/src/tests/helpers/non_sklearn_metrics.py b/RE/paddlemetric/src/tests/helpers/non_sklearn_metrics.py new file mode 100644 index 00000000..fa4f8429 --- /dev/null +++ b/RE/paddlemetric/src/tests/helpers/non_sklearn_metrics.py @@ -0,0 +1,187 @@ +"""File for non sklearn metrics that are to be used for reference for tests.""" +from typing import Optional, Union + +import numpy as np +from sklearn.metrics._regression import _check_reg_targets +from sklearn.utils import assert_all_finite, check_consistent_length, column_or_1d + + +def symmetric_mean_absolute_percentage_error( + y_true: np.ndarray, + y_pred: np.ndarray, + sample_weight: Optional[np.ndarray] = None, + multioutput: str = "uniform_average", +): + r"""Symmetric mean absolute percentage error regression loss (SMAPE_): + + .. math:: \text{SMAPE} = \frac{2}{n}\sum_1^n\frac{max(| y_i - \hat{y_i} |}{| y_i | + | \hat{y_i} |, \epsilon)} + + Where :math:`y` is a tensor of target values, and :math:`\hat{y}` is a tensor of predictions. + + Args: + y_true: array-like of shape (n_samples,) or (n_samples, n_outputs) + Ground truth (correct) target values. + y_pred: array-like of shape (n_samples,) or (n_samples, n_outputs) + Estimated target values. + sample_weight: array-like of shape (n_samples,), default=None + Sample weights. + multioutput: {'raw_values', 'uniform_average'} or array-like + Defines aggregating of multiple output values. + Array-like value defines weights used to average errors. + If input is list then the shape must be (n_outputs,). + + - 'raw_values': Returns a full set of errors in case of multioutput input. + - 'uniform_average': Errors of all outputs are averaged with uniform weight. + + Returns: + loss: float or ndarray of floats in the range [0, 1] + If multioutput is 'raw_values', then symmetric mean absolute percentage error + is returned for each output separately. + If multioutput is 'uniform_average' or an ndarray of weights, then the + weighted average of all output errors is returned. + MAPE output is non-negative floating point. The best value is 0.0. + But note the fact that bad predictions can lead to arbitarily large + MAPE values, especially if some y_true values are very close to zero. + Note that we return a large value instead of `inf` when y_true is zero. + + """ + _, y_true, y_pred, multioutput = _check_reg_targets(y_true, y_pred, multioutput) + check_consistent_length(y_true, y_pred, sample_weight) + epsilon = np.finfo(np.float64).eps + smape = 2 * np.abs(y_pred - y_true) / np.maximum(np.abs(y_true) + np.abs(y_pred), epsilon) + output_errors = np.average(smape, weights=sample_weight, axis=0) + if isinstance(multioutput, str): + if multioutput == "raw_values": + return output_errors + # pass None as weights to np.average: uniform mean + multioutput = None + + return np.average(output_errors, weights=multioutput) + + +# sklearn reference function from +# https://github.com/samronsin/scikit-learn/blob/calibration-loss/sklearn/metrics/_classification.py. +# TODO: when the PR into sklearn is accepted, update this to use the official function. +def calibration_error( + y_true: np.ndarray, + y_prob: np.ndarray, + sample_weight: Optional[np.ndarray] = None, + norm: str = "l2", + n_bins: int = 10, + strategy: str = "uniform", + pos_label: Optional[Union[int, str]] = None, + reduce_bias: bool = True, +) -> float: + """Compute calibration error of a binary classifier. Across all items in a set of N predictions, the + calibration error measures the aggregated difference between (1) the average predicted probabilities assigned + to the positive class, and (2) the frequencies of the positive class in the actual outcome. The calibration + error is only appropriate for binary categorical outcomes. Which label is considered to be the positive label + is controlled via the parameter pos_label, which defaults to 1. + + Args: + y_true: array-like of shape (n_samples,) + True targets of a binary classification task. + y_prob: array-like of (n_samples,) + Probabilities of the positive class. + sample_weight: array-like of shape (n_samples,) + norm: {'l1', 'l2', 'max'} + Norm method. The l1-norm is the Expected Calibration Error (ECE), + and the max-norm corresponds to Maximum Calibration Error (MCE). + n_bins: int, default=10 + The number of bins to compute error on. + strategy: {'uniform', 'quantile'} + Strategy used to define the widths of the bins. + uniform + All bins have identical widths. + quantile + All bins have the same number of points. + pos_label: int or str, default=None + Label of the positive class. If None, the maximum label is used as positive class. + reduce_bias: bool, default=True + Add debiasing term as in Verified Uncertainty Calibration, A. Kumar. + Only effective for the l2-norm. + + Returns: + score: float with calibration error + """ + y_true = column_or_1d(y_true) + y_prob = column_or_1d(y_prob) + assert_all_finite(y_true) + assert_all_finite(y_prob) + check_consistent_length(y_true, y_prob, sample_weight) + if any(y_prob < 0) or any(y_prob > 1): + raise ValueError("y_prob has values outside of [0, 1] range") + + labels = np.unique(y_true) + if len(labels) > 2: + raise ValueError("Only binary classification is supported. " "Provided labels %s." % labels) + + if pos_label is None: + pos_label = y_true.max() + if pos_label not in labels: + raise ValueError("pos_label=%r is not a valid label: " "%r" % (pos_label, labels)) + y_true = np.array(y_true == pos_label, int) + + norm_options = ("l1", "l2", "max") + if norm not in norm_options: + raise ValueError(f"norm has to be one of {norm_options}, got: {norm}.") + + remapping = np.argsort(y_prob) + y_true = y_true[remapping] + y_prob = y_prob[remapping] + if sample_weight is not None: + sample_weight = sample_weight[remapping] + else: + sample_weight = np.ones(y_true.shape[0]) + + n_bins = int(n_bins) + if strategy == "quantile": + quantiles = np.percentile(y_prob, np.arange(0, 1, 1.0 / n_bins) * 100) + elif strategy == "uniform": + quantiles = np.arange(0, 1, 1.0 / n_bins) + else: + raise ValueError( + f"Invalid entry to 'strategy' input. \ + The strategy must be either quantile' or 'uniform'. Got {strategy} instead." + ) + + threshold_indices = np.searchsorted(y_prob, quantiles).tolist() + threshold_indices.append(y_true.shape[0]) + avg_pred_true = np.zeros(n_bins) + bin_centroid = np.zeros(n_bins) + delta_count = np.zeros(n_bins) + debias = np.zeros(n_bins) + + loss = 0.0 + count = float(sample_weight.sum()) + for i, i_start in enumerate(threshold_indices[:-1]): + i_end = threshold_indices[i + 1] + # ignore empty bins + if i_end == i_start: + continue + delta_count[i] = float(sample_weight[i_start:i_end].sum()) + avg_pred_true[i] = np.dot(y_true[i_start:i_end], sample_weight[i_start:i_end]) / delta_count[i] + bin_centroid[i] = np.dot(y_prob[i_start:i_end], sample_weight[i_start:i_end]) / delta_count[i] + if norm == "l2" and reduce_bias: + # NOTE: I think there's a mistake in the original implementation. + # delta_debias = ( + # avg_pred_true[i] * (avg_pred_true[i] - 1) * delta_count[i] + # ) + # delta_debias /= (count * delta_count[i] - 1) + delta_debias = avg_pred_true[i] * (avg_pred_true[i] - 1) * delta_count[i] + delta_debias /= count * (delta_count[i] - 1) + debias[i] = delta_debias + + if norm == "max": + loss = np.max(np.abs(avg_pred_true - bin_centroid)) + elif norm == "l1": + delta_loss = np.abs(avg_pred_true - bin_centroid) * delta_count + loss = np.sum(delta_loss) / count + elif norm == "l2": + delta_loss = (avg_pred_true - bin_centroid) ** 2 * delta_count + loss = np.sum(delta_loss) / count + if reduce_bias: + # convert nans to zero + loss += np.sum(np.nan_to_num(debias)) + loss = np.sqrt(max(loss, 0.0)) + return loss diff --git a/RE/paddlemetric/src/tests/helpers/testers.py b/RE/paddlemetric/src/tests/helpers/testers.py new file mode 100644 index 00000000..02f237c8 --- /dev/null +++ b/RE/paddlemetric/src/tests/helpers/testers.py @@ -0,0 +1,578 @@ +# Copyright The PyTorch Lightning team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import os +import pickle +import sys +from functools import partial +from typing import Any, Callable, Dict, Optional, Sequence + +import numpy as np +import pytest +import paddleext.torchapi as B +from paddleext.torchapi import Tensor, tensor +from multiprocessing import Pool, set_start_method + +from paddlemetrics import Metric + +try: + set_start_method("spawn") +except RuntimeError: + pass + +NUM_PROCESSES = 2 +NUM_BATCHES = 10 +BATCH_SIZE = 32 +NUM_CLASSES = 5 +EXTRA_DIM = 3 +THRESHOLD = 0.5 + +MAX_PORT = 8100 +START_PORT = 8088 +CURRENT_PORT = START_PORT + + +def setup_ddp(rank, world_size): + """Setup ddp environment.""" + global CURRENT_PORT + + os.environ["MASTER_ADDR"] = "localhost" + os.environ["MASTER_PORT"] = str(CURRENT_PORT) + + CURRENT_PORT += 1 + if CURRENT_PORT > MAX_PORT: + CURRENT_PORT = START_PORT + + if B.distributed.is_available() and sys.platform not in ("win32", "cygwin"): + B.distributed.init_process_group("gloo", rank=rank, world_size=world_size) + + +def _assert_allclose(pl_result: Any, sk_result: Any, atol: float = 1e-8, key: Optional[str] = None) -> None: + """Utility function for recursively asserting that two results are within a certain tolerance.""" + # single output compare + if isinstance(pl_result, Tensor): + assert np.allclose(pl_result.detach().cpu().numpy(), sk_result, atol=atol, equal_nan=True) + # multi output compare + elif isinstance(pl_result, Sequence): + for pl_res, sk_res in zip(pl_result, sk_result): + _assert_allclose(pl_res, sk_res, atol=atol) + elif isinstance(pl_result, Dict): + if key is None: + raise KeyError("Provide Key for Dict based metric results.") + assert np.allclose(pl_result[key].detach().cpu().numpy(), sk_result, atol=atol, equal_nan=True) + else: + raise ValueError("Unknown format for comparison") + + +def _assert_tensor(pl_result: Any, key: Optional[str] = None) -> None: + """Utility function for recursively checking that some input only consists of torch tensors.""" + if isinstance(pl_result, Sequence): + for plr in pl_result: + _assert_tensor(plr) + elif isinstance(pl_result, Dict): + if key is None: + raise KeyError("Provide Key for Dict based metric results.") + assert isinstance(pl_result[key], Tensor) + else: + assert isinstance(pl_result, Tensor) + + +def _assert_requires_grad(metric: Metric, pl_result: Any, key: Optional[str] = None) -> None: + """Utility function for recursively asserting that metric output is consistent with the `is_differentiable` + attribute.""" + if isinstance(pl_result, Sequence): + for plr in pl_result: + _assert_requires_grad(metric, plr, key=key) + elif isinstance(pl_result, Dict): + if key is None: + raise KeyError("Provide Key for Dict based metric results.") + assert metric.is_differentiable == pl_result[key].requires_grad + else: + assert metric.is_differentiable == pl_result.requires_grad, f"{metric.is_differentiable} vs {pl_result.requires_grad}" + + +def _class_test( + rank: int, + worldsize: int, + preds: Tensor, + target: Tensor, + metric_class: Metric, + sk_metric: Callable, + dist_sync_on_step: bool, + metric_args: dict = None, + check_dist_sync_on_step: bool = True, + check_batch: bool = True, + atol: float = 1e-8, + device: str = "cpu", + fragment_kwargs: bool = False, + check_scriptable: bool = True, + **kwargs_update: Any, +): + """Utility function doing the actual comparison between lightning class metric and reference metric. + + Args: + rank: rank of current process + worldsize: number of processes + preds: torch tensor with predictions + target: torch tensor with targets + metric_class: lightning metric class that should be tested + sk_metric: callable function that is used for comparison + dist_sync_on_step: bool, if true will synchronize metric state across + processes at each ``forward()`` + metric_args: dict with additional arguments used for class initialization + check_dist_sync_on_step: bool, if true will check if the metric is also correctly + calculated per batch per device (and not just at the end) + check_batch: bool, if true will check if the metric is also correctly + calculated across devices for each batch (and not just at the end) + device: determine which device to run on, either 'cuda' or 'cpu' + fragment_kwargs: whether tensors in kwargs should be divided as `preds` and `target` among processes + kwargs_update: Additional keyword arguments that will be passed with preds and + target when running update on the metric. + """ + assert preds.shape[0] == target.shape[0] + num_batches = preds.shape[0] + + if not metric_args: + metric_args = {} + + # Instantiate lightning metric + metric = metric_class( + compute_on_step=check_dist_sync_on_step or check_batch, dist_sync_on_step=dist_sync_on_step, **metric_args + ) + with pytest.raises(RuntimeError): + metric.is_differentiable = not metric.is_differentiable + with pytest.raises(RuntimeError): + metric.higher_is_better = not metric.higher_is_better + + # check that the metric is scriptable + # if check_scriptable: + # B.jit.script(metric) + + # move to device + metric = metric.to(device) + preds = preds.to(device) + target = target.to(device) + kwargs_update = {k: v.to(device) if isinstance(v, Tensor) else v for k, v in kwargs_update.items()} + + # verify metrics work after being loaded from pickled state +# pickled_metric = pickle.dumps(metric) +# metric = pickle.loads(pickled_metric) + + for i in range(rank, num_batches, worldsize): + batch_kwargs_update = {k: v[i] if isinstance(v, Tensor) else v for k, v in kwargs_update.items()} + + batch_result = metric(preds[i], target[i], **batch_kwargs_update) + + if metric.dist_sync_on_step and check_dist_sync_on_step and rank == 0: + ddp_preds = B.cat([preds[i + r] for r in range(worldsize)]).cpu() + ddp_target = B.cat([target[i + r] for r in range(worldsize)]).cpu() + ddp_kwargs_upd = { + k: B.cat([v[i + r] for r in range(worldsize)]).cpu() if isinstance(v, Tensor) else v + for k, v in (kwargs_update if fragment_kwargs else batch_kwargs_update).items() + } + + sk_batch_result = sk_metric(ddp_preds, ddp_target, **ddp_kwargs_upd) + _assert_allclose(batch_result, sk_batch_result, atol=atol) + + elif check_batch and not metric.dist_sync_on_step: + batch_kwargs_update = { + k: v.cpu() if isinstance(v, Tensor) else v + for k, v in (batch_kwargs_update if fragment_kwargs else kwargs_update).items() + } + sk_batch_result = sk_metric(preds[i].cpu(), target[i].cpu(), **batch_kwargs_update) + _assert_allclose(batch_result, sk_batch_result, atol=atol) + + # check that metrics are hashable + assert hash(metric) + + # check on all batches on all ranks + result = metric.compute() + _assert_tensor(result) + + total_preds = B.cat([preds[i] for i in range(num_batches)]).cpu() + total_target = B.cat([target[i] for i in range(num_batches)]).cpu() + total_kwargs_update = { + k: B.cat([v[i] for i in range(num_batches)]).cpu() if isinstance(v, Tensor) else v + for k, v in kwargs_update.items() + } + sk_result = sk_metric(total_preds, total_target, **total_kwargs_update) + + # assert after aggregation + _assert_allclose(result, sk_result, atol=atol) + + +def _functional_test( + preds: Tensor, + target: Tensor, + metric_functional: Callable, + sk_metric: Callable, + metric_args: dict = None, + atol: float = 1e-8, + device: str = "cpu", + fragment_kwargs: bool = False, + **kwargs_update, +): + """Utility function doing the actual comparison between lightning functional metric and reference metric. + + Args: + preds: torch tensor with predictions + target: torch tensor with targets + metric_functional: lightning metric functional that should be tested + sk_metric: callable function that is used for comparison + metric_args: dict with additional arguments used for class initialization + device: determine which device to run on, either 'cuda' or 'cpu' + fragment_kwargs: whether tensors in kwargs should be divided as `preds` and `target` among processes + kwargs_update: Additional keyword arguments that will be passed with preds and + target when running update on the metric. + """ + assert preds.shape[0] == target.shape[0] + num_batches = preds.shape[0] + + if not metric_args: + metric_args = {} + + metric = partial(metric_functional, **metric_args) + + # move to device + preds = preds.to(device) + target = target.to(device) + kwargs_update = {k: v.to(device) if isinstance(v, Tensor) else v for k, v in kwargs_update.items()} + + for i in range(num_batches): + extra_kwargs = {k: v[i] if isinstance(v, Tensor) else v for k, v in kwargs_update.items()} + lightning_result = metric(preds[i], target[i], **extra_kwargs) + extra_kwargs = { + k: v.cpu() if isinstance(v, Tensor) else v + for k, v in (extra_kwargs if fragment_kwargs else kwargs_update).items() + } + sk_result = sk_metric(preds[i].cpu(), target[i].cpu(), **extra_kwargs) + + # assert its the same + _assert_allclose(lightning_result, sk_result, atol=atol) + + +def _assert_half_support( + metric_module: Optional[Metric], + metric_functional: Optional[Callable], + preds: Tensor, + target: Tensor, + device: str = "cpu", + **kwargs_update, +): + """Test if an metric can be used with half precision tensors. + + Args: + metric_module: the metric module to test + metric_functional: the metric functional to test + preds: torch tensor with predictions + target: torch tensor with targets + device: determine device, either "cpu" or "cuda" + kwargs_update: Additional keyword arguments that will be passed with preds and + target when running update on the metric. + """ + y_hat = preds[0].half().to(device) if preds[0].is_floating_point() else preds[0].to(device) + y = target[0].half().to(device) if target[0].is_floating_point() else target[0].to(device) + kwargs_update = { + k: (v[0].half() if v.is_floating_point() else v[0]).to(device) if isinstance(v, Tensor) else v + for k, v in kwargs_update.items() + } + if metric_module is not None: + metric_module = metric_module.to(device) + _assert_tensor(metric_module(y_hat, y, **kwargs_update)) + if metric_functional is not None: + _assert_tensor(metric_functional(y_hat, y, **kwargs_update)) + + +gpu_device_name = "cuda" if B.platform() == "torch" else "gpu" + +class MetricTester: + """Class used for efficiently run alot of parametrized tests in ddp mode. Makes sure that ddp is only setup + once and that pool of processes are used for all tests. + + All tests should subclass from this and implement a new method called `test_metric_name` where the method + `self.run_metric_test` is called inside. + """ + + atol = 1e-8 + + def setup_class(self): + """Setup the metric class. + + This will spawn the pool of workers that are used for metric testing and setup_ddp + """ + + self.poolSize = NUM_PROCESSES + self.pool = Pool(processes=self.poolSize) + self.pool.starmap(setup_ddp, [(rank, self.poolSize) for rank in range(self.poolSize)]) + + def teardown_class(self): + """Close pool of workers.""" + self.pool.close() + self.pool.join() + + def run_functional_metric_test( + self, + preds: Tensor, + target: Tensor, + metric_functional: Callable, + sk_metric: Callable, + metric_args: dict = None, + fragment_kwargs: bool = False, + **kwargs_update, + ): + """Main method that should be used for testing functions. Call this inside testing method. + + Args: + preds: torch tensor with predictions + target: torch tensor with targets + metric_functional: lightning metric class that should be tested + sk_metric: callable function that is used for comparison + metric_args: dict with additional arguments used for class initialization + fragment_kwargs: whether tensors in kwargs should be divided as `preds` and `target` among processes + kwargs_update: Additional keyword arguments that will be passed with preds and + target when running update on the metric. + """ + device = gpu_device_name if (B.cuda.is_available() and B.cuda.device_count() > 0) else "cpu" + + _functional_test( + preds=preds, + target=target, + metric_functional=metric_functional, + sk_metric=sk_metric, + metric_args=metric_args, + atol=self.atol, + device=device, + fragment_kwargs=fragment_kwargs, + **kwargs_update, + ) + + def run_class_metric_test( + self, + ddp: bool, + preds: Tensor, + target: Tensor, + metric_class: Metric, + sk_metric: Callable, + dist_sync_on_step: bool, + metric_args: dict = None, + check_dist_sync_on_step: bool = True, + check_batch: bool = True, + fragment_kwargs: bool = False, + check_scriptable: bool = True, + **kwargs_update, + ): + """Main method that should be used for testing class. Call this inside testing methods. + + Args: + ddp: bool, if running in ddp mode or not + preds: torch tensor with predictions + target: torch tensor with targets + metric_class: lightning metric class that should be tested + sk_metric: callable function that is used for comparison + dist_sync_on_step: bool, if true will synchronize metric state across + processes at each ``forward()`` + metric_args: dict with additional arguments used for class initialization + check_dist_sync_on_step: bool, if true will check if the metric is also correctly + calculated per batch per device (and not just at the end) + check_batch: bool, if true will check if the metric is also correctly + calculated across devices for each batch (and not just at the end) + fragment_kwargs: whether tensors in kwargs should be divided as `preds` and `target` among processes + kwargs_update: Additional keyword arguments that will be passed with preds and + target when running update on the metric. + """ + if not metric_args: + metric_args = {} + if ddp: + if sys.platform == "win32": + pytest.skip("DDP not supported on windows") + + self.pool.starmap( + partial( + _class_test, + preds=preds, + target=target, + metric_class=metric_class, + sk_metric=sk_metric, + dist_sync_on_step=dist_sync_on_step, + metric_args=metric_args, + check_dist_sync_on_step=check_dist_sync_on_step, + check_batch=check_batch, + atol=self.atol, + fragment_kwargs=fragment_kwargs, + check_scriptable=check_scriptable, + **kwargs_update, + ), + [(rank, self.poolSize) for rank in range(self.poolSize)], + ) + else: + device = gpu_device_name if (B.cuda.is_available() and B.cuda.device_count() > 0) else "cpu" + + _class_test( + rank=0, + worldsize=1, + preds=preds, + target=target, + metric_class=metric_class, + sk_metric=sk_metric, + dist_sync_on_step=dist_sync_on_step, + metric_args=metric_args, + check_dist_sync_on_step=check_dist_sync_on_step, + check_batch=check_batch, + atol=self.atol, + device=device, + fragment_kwargs=fragment_kwargs, + check_scriptable=check_scriptable, + **kwargs_update, + ) + + @staticmethod + def run_precision_test_cpu( + preds: Tensor, + target: Tensor, + metric_module: Optional[Metric] = None, + metric_functional: Optional[Callable] = None, + metric_args: Optional[dict] = None, + **kwargs_update, + ): + """Test if a metric can be used with half precision tensors on cpu + Args: + preds: torch tensor with predictions + target: torch tensor with targets + metric_module: the metric module to test + metric_functional: the metric functional to test + metric_args: dict with additional arguments used for class initialization + kwargs_update: Additional keyword arguments that will be passed with preds and + target when running update on the metric. + """ + metric_args = metric_args or {} + _assert_half_support( + metric_module(**metric_args) if metric_module is not None else None, + metric_functional, + preds, + target, + device="cpu", + **kwargs_update, + ) + + @staticmethod + def run_precision_test_gpu( + preds: Tensor, + target: Tensor, + metric_module: Optional[Metric] = None, + metric_functional: Optional[Callable] = None, + metric_args: Optional[dict] = None, + **kwargs_update, + ): + """Test if a metric can be used with half precision tensors on gpu + Args: + preds: torch tensor with predictions + target: torch tensor with targets + metric_module: the metric module to test + metric_functional: the metric functional to test + metric_args: dict with additional arguments used for class initialization + kwargs_update: Additional keyword arguments that will be passed with preds and + target when running update on the metric. + """ + metric_args = metric_args or {} + _assert_half_support( + metric_module(**metric_args) if metric_module is not None else None, + metric_functional, + preds, + target, + device=gpu_device_name, + **kwargs_update, + ) + + @staticmethod + def run_differentiability_test( + preds: Tensor, + target: Tensor, + metric_module: Metric, + metric_functional: Optional[Callable] = None, + metric_args: Optional[dict] = None, + ): + """Test if a metric is differentiable or not. + + Args: + preds: torch tensor with predictions + target: torch tensor with targets + metric_module: the metric module to test + metric_args: dict with additional arguments used for class initialization + """ + metric_args = metric_args or {} + # only floating point tensors can require grad + metric = metric_module(**metric_args) + if preds.is_floating_point(): + preds.requires_grad = True + out = metric(preds[0], target[0]) + + # Check if requires_grad matches is_differentiable attribute + # _assert_requires_grad(metric, out) + + if metric.is_differentiable and metric_functional is not None: + # check for numerical correctness + assert B.autograd.gradcheck( + partial(metric_functional, **metric_args), (preds[0].double(), target[0]) + ) + + # reset as else it will carry over to other tests + preds.requires_grad = False + + +class DummyMetric(Metric): + name = "Dummy" + + def __init__(self): + super().__init__() + self.add_state("x", tensor(0.0), dist_reduce_fx=None) + + def update(self): + pass + + def compute(self): + pass + + +class DummyListMetric(Metric): + name = "DummyList" + + def __init__(self): + super().__init__() + self.add_state("x", [], dist_reduce_fx=None) + + def update(self): + pass + + def compute(self): + pass + + +class DummyMetricSum(DummyMetric): + def update(self, x): + self.x += x + + def compute(self): + return self.x + + +class DummyMetricDiff(DummyMetric): + def update(self, y): + self.x -= y + + def compute(self): + return self.x + + +class DummyMetricMultiOutput(DummyMetricSum): + def compute(self): + return [self.x, self.x] diff --git a/RE/paddlemetric/src/tests/image/__init__.py b/RE/paddlemetric/src/tests/image/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/RE/paddlemetric/src/tests/image/test_fid.py b/RE/paddlemetric/src/tests/image/test_fid.py new file mode 100644 index 00000000..fe76a105 --- /dev/null +++ b/RE/paddlemetric/src/tests/image/test_fid.py @@ -0,0 +1,156 @@ +# Copyright The PyTorch Lightning team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import pickle + +import pytest +import paddleext.torchapi as B +from scipy.linalg import sqrtm as scipy_sqrtm +from B.utils.data import Dataset + +from paddlemetrics.image.fid import FID, sqrtm +from paddlemetrics.utilities.imports import _TORCH_FIDELITY_AVAILABLE + +B.manual_seed(42) + + +@pytest.mark.parametrize("matrix_size", [2, 10, 100, 500]) +def test_matrix_sqrt(matrix_size): + """test that metrix sqrt function works as expected.""" + + def generate_cov(n): + data = B.randn(2 * n, n) + return (data - data.mean(dim=0)).T @ (data - data.mean(dim=0)) + + cov1 = generate_cov(matrix_size) + cov2 = generate_cov(matrix_size) + + scipy_res = scipy_sqrtm((cov1 @ cov2).numpy()).real + tm_res = sqrtm(cov1 @ cov2) + assert B.allclose(B.tensor(scipy_res).float(), tm_res, atol=1e-3) + + +@pytest.mark.skipif(not _TORCH_FIDELITY_AVAILABLE, reason="test requires torch-fidelity") +def test_no_train(): + """Assert that metric never leaves evaluation mode.""" + + class MyModel(B.nn.Module): + def __init__(self): + super().__init__() + self.metric = FID() + + def forward(self, x): + return x + + model = MyModel() + model.train() + assert model.training + assert not model.metric.inception.training, "FID metric was changed to training mode which should not happen" + + +@pytest.mark.skipif(not _TORCH_FIDELITY_AVAILABLE, reason="test requires torch-fidelity") +def test_fid_pickle(): + """Assert that we can initialize the metric and pickle it.""" + metric = FID() + assert metric + + # verify metrics work after being loaded from pickled state + pickled_metric = pickle.dumps(metric) + metric = pickle.loads(pickled_metric) + + +def test_fid_raises_errors_and_warnings(): + """Test that expected warnings and errors are raised.""" + with pytest.warns( + UserWarning, + match="Metric `FID` will save all extracted features in buffer." + " For large datasets this may lead to large memory footprint.", + ): + _ = FID() + + if _TORCH_FIDELITY_AVAILABLE: + with pytest.raises(ValueError, match="Integer input to argument `feature` must be one of .*"): + _ = FID(feature=2) + else: + with pytest.raises( + ValueError, + match="FID metric requires that Torch-fidelity is installed." + "Either install as `pip install paddlemetrics[image-quality]`" + " or `pip install torch-fidelity`", + ): + _ = FID() + + with pytest.raises(TypeError, match="Got unknown input to argument `feature`"): + _ = FID(feature=[1, 2]) + + +@pytest.mark.skipif(not _TORCH_FIDELITY_AVAILABLE, reason="test requires torch-fidelity") +@pytest.mark.parametrize("feature", [64, 192, 768, 2048]) +def test_fid_same_input(feature): + """if real and fake are update on the same data the fid score should be + 0.""" + metric = FID(feature=feature) + + for _ in range(2): + img = B.randint(0, 255, (10, 3, 299, 299), dtype=B.uint8) + metric.update(img, real=True) + metric.update(img, real=False) + + assert B.allclose(B.cat(metric.real_features, dim=0), B.cat(metric.fake_features, dim=0)) + + val = metric.compute() + assert B.allclose(val, B.zeros_like(val), atol=1e-3) + + +class _ImgDataset(Dataset): + def __init__(self, imgs): + self.imgs = imgs + + def __getitem__(self, idx): + return self.imgs[idx] + + def __len__(self): + return self.imgs.shape[0] + + +@pytest.mark.skipif(not B.cuda.is_available(), reason="test is too slow without gpu") +@pytest.mark.skipif(not _TORCH_FIDELITY_AVAILABLE, reason="test requires torch-fidelity") +def test_compare_fid(tmpdir, feature=2048): + """check that the hole pipeline give the same result as torch-fidelity.""" + from torch_fidelity import calculate_metrics + + metric = FID(feature=feature).cuda() + + # Generate some synthetic data + img1 = B.randint(0, 180, (100, 3, 299, 299), dtype=B.uint8) + img2 = B.randint(100, 255, (100, 3, 299, 299), dtype=B.uint8) + + batch_size = 10 + for i in range(img1.shape[0] // batch_size): + metric.update(img1[batch_size * i : batch_size * (i + 1)].cuda(), real=True) + + for i in range(img2.shape[0] // batch_size): + metric.update(img2[batch_size * i : batch_size * (i + 1)].cuda(), real=False) + + torch_fid = calculate_metrics( + input1=_ImgDataset(img1), + input2=_ImgDataset(img2), + fid=True, + feature_layer_fid=str(feature), + batch_size=batch_size, + save_cpu_ram=True, + ) + + tm_res = metric.compute() + + assert B.allclose(tm_res.cpu(), B.tensor([torch_fid["frechet_inception_distance"]]), atol=1e-3) diff --git a/RE/paddlemetric/src/tests/image/test_inception.py b/RE/paddlemetric/src/tests/image/test_inception.py new file mode 100644 index 00000000..4bfd5db6 --- /dev/null +++ b/RE/paddlemetric/src/tests/image/test_inception.py @@ -0,0 +1,125 @@ +# Copyright The PyTorch Lightning team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import pickle + +import pytest +import paddleext.torchapi as B +from B.utils.data import Dataset + +from paddlemetrics.image.inception import IS +from paddlemetrics.utilities.imports import _TORCH_FIDELITY_AVAILABLE + +B.manual_seed(42) + + +@pytest.mark.skipif(not _TORCH_FIDELITY_AVAILABLE, reason="test requires torch-fidelity") +def test_no_train(): + """Assert that metric never leaves evaluation mode.""" + + class MyModel(B.nn.Module): + def __init__(self): + super().__init__() + self.metric = IS() + + def forward(self, x): + return x + + model = MyModel() + model.train() + assert model.training + assert not model.metric.inception.training, "IS metric was changed to training mode which should not happen" + + +@pytest.mark.skipif(not _TORCH_FIDELITY_AVAILABLE, reason="test requires torch-fidelity") +def test_is_pickle(): + """Assert that we can initialize the metric and pickle it.""" + metric = IS() + assert metric + + # verify metrics work after being loaded from pickled state + pickled_metric = pickle.dumps(metric) + metric = pickle.loads(pickled_metric) + + +def test_is_raises_errors_and_warnings(): + """Test that expected warnings and errors are raised.""" + with pytest.warns( + UserWarning, + match="Metric `IS` will save all extracted features in buffer." + " For large datasets this may lead to large memory footprint.", + ): + IS() + + if _TORCH_FIDELITY_AVAILABLE: + with pytest.raises(ValueError, match="Integer input to argument `feature` must be one of .*"): + _ = IS(feature=2) + else: + with pytest.raises( + ValueError, + match="IS metric requires that Torch-fidelity is installed." + "Either install as `pip install paddlemetrics[image-quality]`" + " or `pip install torch-fidelity`", + ): + IS() + + with pytest.raises(TypeError, match="Got unknown input to argument `feature`"): + IS(feature=[1, 2]) + + +@pytest.mark.skipif(not _TORCH_FIDELITY_AVAILABLE, reason="test requires torch-fidelity") +def test_is_update_compute(): + metric = IS() + + for _ in range(2): + img = B.randint(0, 255, (10, 3, 299, 299), dtype=B.uint8) + metric.update(img) + + mean, std = metric.compute() + assert mean >= 0.0 + assert std >= 0.0 + + +class _ImgDataset(Dataset): + def __init__(self, imgs): + self.imgs = imgs + + def __getitem__(self, idx): + return self.imgs[idx] + + def __len__(self): + return self.imgs.shape[0] + + +@pytest.mark.skipif(not B.cuda.is_available(), reason="test is too slow without gpu") +@pytest.mark.skipif(not _TORCH_FIDELITY_AVAILABLE, reason="test requires torch-fidelity") +def test_compare_is(tmpdir): + """check that the hole pipeline give the same result as torch-fidelity.""" + from torch_fidelity import calculate_metrics + + metric = IS(splits=1).cuda() + + # Generate some synthetic data + img1 = B.randint(0, 255, (100, 3, 299, 299), dtype=B.uint8) + + batch_size = 10 + for i in range(img1.shape[0] // batch_size): + metric.update(img1[batch_size * i : batch_size * (i + 1)].cuda()) + + torch_fid = calculate_metrics( + input1=_ImgDataset(img1), isc=True, isc_splits=1, batch_size=batch_size, save_cpu_ram=True + ) + + tm_mean, _ = metric.compute() + + assert B.allclose(tm_mean.cpu(), B.tensor([torch_fid["inception_score_mean"]]), atol=1e-3) diff --git a/RE/paddlemetric/src/tests/image/test_kid.py b/RE/paddlemetric/src/tests/image/test_kid.py new file mode 100644 index 00000000..586c5f04 --- /dev/null +++ b/RE/paddlemetric/src/tests/image/test_kid.py @@ -0,0 +1,166 @@ +# Copyright The PyTorch Lightning team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import pickle + +import pytest +import paddleext.torchapi as B +from B.utils.data import Dataset + +from paddlemetrics.image.kid import KID +from paddlemetrics.utilities.imports import _TORCH_FIDELITY_AVAILABLE + +B.manual_seed(42) + + +@pytest.mark.skipif(not _TORCH_FIDELITY_AVAILABLE, reason="test requires torch-fidelity") +def test_no_train(): + """Assert that metric never leaves evaluation mode.""" + + class MyModel(B.nn.Module): + def __init__(self): + super().__init__() + self.metric = KID() + + def forward(self, x): + return x + + model = MyModel() + model.train() + assert model.training + assert not model.metric.inception.training, "FID metric was changed to training mode which should not happen" + + +@pytest.mark.skipif(not _TORCH_FIDELITY_AVAILABLE, reason="test requires torch-fidelity") +def test_kid_pickle(): + """Assert that we can initialize the metric and pickle it.""" + metric = KID() + assert metric + + # verify metrics work after being loaded from pickled state + pickled_metric = pickle.dumps(metric) + metric = pickle.loads(pickled_metric) + + +def test_kid_raises_errors_and_warnings(): + """Test that expected warnings and errors are raised.""" + with pytest.warns( + UserWarning, + match="Metric `KID` will save all extracted features in buffer." + " For large datasets this may lead to large memory footprint.", + ): + KID() + + if _TORCH_FIDELITY_AVAILABLE: + with pytest.raises(ValueError, match="Integer input to argument `feature` must be one of .*"): + KID(feature=2) + else: + with pytest.raises( + ValueError, + match="KID metric requires that Torch-fidelity is installed." + "Either install as `pip install paddlemetrics[image]`" + " or `pip install torch-fidelity`", + ): + KID() + + with pytest.raises(TypeError, match="Got unknown input to argument `feature`"): + KID(feature=[1, 2]) + + with pytest.raises(ValueError, match="Argument `subset_size` should be smaller than the number of samples"): + m = KID() + m.update(B.randint(0, 255, (5, 3, 299, 299), dtype=B.uint8), real=True) + m.update(B.randint(0, 255, (5, 3, 299, 299), dtype=B.uint8), real=False) + m.compute() + + +@pytest.mark.skipif(not _TORCH_FIDELITY_AVAILABLE, reason="test requires torch-fidelity") +def test_kid_extra_parameters(): + with pytest.raises(ValueError, match="Argument `subsets` expected to be integer larger than 0"): + KID(subsets=-1) + + with pytest.raises(ValueError, match="Argument `subset_size` expected to be integer larger than 0"): + KID(subset_size=-1) + + with pytest.raises(ValueError, match="Argument `degree` expected to be integer larger than 0"): + KID(degree=-1) + + with pytest.raises(ValueError, match="Argument `gamma` expected to be `None` or float larger than 0"): + KID(gamma=-1) + + with pytest.raises(ValueError, match="Argument `coef` expected to be float larger than 0"): + KID(coef=-1) + + +@pytest.mark.skipif(not _TORCH_FIDELITY_AVAILABLE, reason="test requires torch-fidelity") +@pytest.mark.parametrize("feature", [64, 192, 768, 2048]) +def test_kid_same_input(feature): + """test that the metric works.""" + metric = KID(feature=feature, subsets=5, subset_size=2) + + for _ in range(2): + img = B.randint(0, 255, (10, 3, 299, 299), dtype=B.uint8) + metric.update(img, real=True) + metric.update(img, real=False) + + assert B.allclose(B.cat(metric.real_features, dim=0), B.cat(metric.fake_features, dim=0)) + + mean, std = metric.compute() + assert mean != 0.0 + assert std >= 0.0 + + +class _ImgDataset(Dataset): + def __init__(self, imgs): + self.imgs = imgs + + def __getitem__(self, idx): + return self.imgs[idx] + + def __len__(self): + return self.imgs.shape[0] + + +@pytest.mark.skipif(not B.cuda.is_available(), reason="test is too slow without gpu") +@pytest.mark.skipif(not _TORCH_FIDELITY_AVAILABLE, reason="test requires torch-fidelity") +def test_compare_kid(tmpdir, feature=2048): + """check that the hole pipeline give the same result as torch-fidelity.""" + from torch_fidelity import calculate_metrics + + metric = KID(feature=feature, subsets=1, subset_size=100).cuda() + + # Generate some synthetic data + img1 = B.randint(0, 180, (100, 3, 299, 299), dtype=B.uint8) + img2 = B.randint(100, 255, (100, 3, 299, 299), dtype=B.uint8) + + batch_size = 10 + for i in range(img1.shape[0] // batch_size): + metric.update(img1[batch_size * i : batch_size * (i + 1)].cuda(), real=True) + + for i in range(img2.shape[0] // batch_size): + metric.update(img2[batch_size * i : batch_size * (i + 1)].cuda(), real=False) + + torch_fid = calculate_metrics( + input1=_ImgDataset(img1), + input2=_ImgDataset(img2), + kid=True, + feature_layer_fid=str(feature), + batch_size=batch_size, + kid_subsets=1, + kid_subset_size=100, + save_cpu_ram=True, + ) + + tm_mean, tm_std = metric.compute() + + assert B.allclose(tm_mean.cpu(), B.tensor([torch_fid["kernel_inception_distance_mean"]]), atol=1e-3) + assert B.allclose(tm_std.cpu(), B.tensor([torch_fid["kernel_inception_distance_std"]]), atol=1e-3) diff --git a/RE/paddlemetric/src/tests/image/test_lpips.py b/RE/paddlemetric/src/tests/image/test_lpips.py new file mode 100644 index 00000000..fd3e83a1 --- /dev/null +++ b/RE/paddlemetric/src/tests/image/test_lpips.py @@ -0,0 +1,103 @@ +# Copyright The PyTorch Lightning team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from collections import namedtuple +from functools import partial + +import pytest +import paddleext.torchapi as B +from lpips import LPIPS as reference_LPIPS +from paddleext.torchapi import Tensor + +from tests.helpers import seed_all +from tests.helpers.testers import BATCH_SIZE, NUM_BATCHES, MetricTester +from paddlemetrics.image.lpip_similarity import LPIPS +from paddlemetrics.utilities.imports import _LPIPS_AVAILABLE + +seed_all(42) + +Input = namedtuple("Input", ["img1", "img2"]) + +_inputs = Input( + img1=B.rand(int(NUM_BATCHES * 0.4), int(BATCH_SIZE / 16), 3, 100, 100), + img2=B.rand(int(NUM_BATCHES * 0.4), int(BATCH_SIZE / 16), 3, 100, 100), +) + + +def _compare_fn(img1: Tensor, img2: Tensor, net_type: str, reduction: str = "mean") -> Tensor: + """comparison function for tm implementation.""" + ref = reference_LPIPS(net=net_type) + res = ref(img1, img2).detach().cpu().numpy() + if reduction == "mean": + return res.mean() + return res.sum() + + +@pytest.mark.skipif(not _LPIPS_AVAILABLE, reason="test requires that lpips is installed") +@pytest.mark.parametrize("net_type", ["vgg", "alex", "squeeze"]) +class TestLPIPS(MetricTester): + @pytest.mark.parametrize("ddp", [True, False]) + def test_lpips(self, net_type, ddp): + """test modular implementation for correctness.""" + self.run_class_metric_test( + ddp=ddp, + preds=_inputs.img1, + target=_inputs.img2, + metric_class=LPIPS, + sk_metric=partial(_compare_fn, net_type=net_type), + dist_sync_on_step=False, + check_scriptable=False, + metric_args={"net_type": net_type}, + ) + + def test_lpips_differentiability(self, net_type): + """test for differentiability of LPIPS metric.""" + self.run_differentiability_test(preds=_inputs.img1, target=_inputs.img2, metric_module=LPIPS) + + # LPIPS half + cpu does not work due to missing support in B.min + @pytest.mark.xfail(reason="PearsonCorrcoef metric does not support cpu + half precision") + def test_lpips_half_cpu(self, net_type): + """test for half + cpu support.""" + self.run_precision_test_cpu(_inputs.img1, _inputs.img2, LPIPS) + + @pytest.mark.skipif(not B.cuda.is_available(), reason="test requires cuda") + def test_lpips_half_gpu(self, net_type): + """test for half + gpu support.""" + self.run_precision_test_gpu(_inputs.img1, _inputs.img2, LPIPS) + + +@pytest.mark.skipif(not _LPIPS_AVAILABLE, reason="test requires that lpips is installed") +def test_error_on_wrong_init(): + """Test class raises the expected errors.""" + with pytest.raises(ValueError, match="Argument `net_type` must be one .*"): + LPIPS(net_type="resnet") + + with pytest.raises(ValueError, match="Argument `reduction` must be one .*"): + LPIPS(reduction=None) + + +@pytest.mark.skipif(not _LPIPS_AVAILABLE, reason="test requires that lpips is installed") +@pytest.mark.parametrize( + "inp1, inp2", + [ + (B.rand(1, 1, 28, 28), B.rand(1, 3, 28, 28)), # wrong number of channels + (B.rand(1, 3, 28, 28), B.rand(1, 1, 28, 28)), # wrong number of channels + (B.randn(1, 3, 28, 28), B.rand(1, 3, 28, 28)), # non-normalized input + (B.rand(1, 3, 28, 28), B.randn(1, 3, 28, 28)), # non-normalized input + ], +) +def test_error_on_wrong_update(inp1, inp2): + """test error is raised on wrong input to update method.""" + metric = LPIPS() + with pytest.raises(ValueError, match="Expected both input arguments to be normalized tensors .*"): + metric(inp1, inp2) diff --git a/RE/paddlemetric/src/tests/image/test_psnr.py b/RE/paddlemetric/src/tests/image/test_psnr.py new file mode 100644 index 00000000..cc8b857a --- /dev/null +++ b/RE/paddlemetric/src/tests/image/test_psnr.py @@ -0,0 +1,149 @@ +# Copyright The PyTorch Lightning team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from collections import namedtuple +from functools import partial + +import numpy as np +import pytest +import paddleext.torchapi as B +from skimage.metrics import peak_signal_noise_ratio + +from tests.helpers import seed_all +from tests.helpers.testers import BATCH_SIZE, NUM_BATCHES, MetricTester +from paddlemetrics.functional import psnr +from paddlemetrics.image import PSNR + +seed_all(42) + +Input = namedtuple("Input", ["preds", "target"]) + +_input_size = (NUM_BATCHES, BATCH_SIZE, 32, 32) +_inputs = [ + Input( + preds=B.randint(n_cls_pred, _input_size, dtype=B.float), + target=B.randint(n_cls_target, _input_size, dtype=B.float), + ) + for n_cls_pred, n_cls_target in [(10, 10), (5, 10), (10, 5)] +] + + +def _to_sk_peak_signal_noise_ratio_inputs(value, dim): + value = value.numpy() + batches = value[None] if value.ndim == len(_input_size) - 1 else value + + if dim is None: + return [batches] + + num_dims = np.size(dim) + if not num_dims: + return batches + + inputs = [] + for batch in batches: + batch = np.moveaxis(batch, dim, np.arange(-num_dims, 0)) + psnr_input_shape = batch.shape[-num_dims:] + inputs.extend(batch.reshape(-1, *psnr_input_shape)) + return inputs + + +def _sk_psnr(preds, target, data_range, reduction, dim): + sk_preds_lists = _to_sk_peak_signal_noise_ratio_inputs(preds, dim=dim) + sk_target_lists = _to_sk_peak_signal_noise_ratio_inputs(target, dim=dim) + np_reduce_map = {"elementwise_mean": np.mean, "none": np.array, "sum": np.sum} + return np_reduce_map[reduction]( + [ + peak_signal_noise_ratio(sk_target, sk_preds, data_range=data_range) + for sk_target, sk_preds in zip(sk_target_lists, sk_preds_lists) + ] + ) + + +def _base_e_sk_psnr(preds, target, data_range, reduction, dim): + return _sk_psnr(preds, target, data_range, reduction, dim) * np.log(10) + + +@pytest.mark.parametrize( + "preds, target, data_range, reduction, dim", + [ + (_inputs[0].preds, _inputs[0].target, 10, "elementwise_mean", None), + (_inputs[1].preds, _inputs[1].target, 10, "elementwise_mean", None), + (_inputs[2].preds, _inputs[2].target, 5, "elementwise_mean", None), + (_inputs[2].preds, _inputs[2].target, 5, "elementwise_mean", 1), + (_inputs[2].preds, _inputs[2].target, 5, "elementwise_mean", (1, 2)), + (_inputs[2].preds, _inputs[2].target, 5, "sum", (1, 2)), + ], +) +@pytest.mark.parametrize( + "base, sk_metric", + [ + (10.0, _sk_psnr), + (2.718281828459045, _base_e_sk_psnr), + ], +) +class TestPSNR(MetricTester): + @pytest.mark.parametrize("ddp", [True, False]) + @pytest.mark.parametrize("dist_sync_on_step", [True, False]) + def test_psnr(self, preds, target, data_range, base, reduction, dim, sk_metric, ddp, dist_sync_on_step): + _args = {"data_range": data_range, "base": base, "reduction": reduction, "dim": dim} + self.run_class_metric_test( + ddp, + preds, + target, + PSNR, + partial(sk_metric, data_range=data_range, reduction=reduction, dim=dim), + metric_args=_args, + dist_sync_on_step=dist_sync_on_step, + ) + + def test_psnr_functional(self, preds, target, sk_metric, data_range, base, reduction, dim): + _args = {"data_range": data_range, "base": base, "reduction": reduction, "dim": dim} + self.run_functional_metric_test( + preds, + target, + psnr, + partial(sk_metric, data_range=data_range, reduction=reduction, dim=dim), + metric_args=_args, + ) + + # PSNR half + cpu does not work due to missing support in B.log + @pytest.mark.xfail(reason="PSNR metric does not support cpu + half precision") + def test_psnr_half_cpu(self, preds, target, data_range, reduction, dim, base, sk_metric): + self.run_precision_test_cpu( + preds, target, PSNR, psnr, {"data_range": data_range, "base": base, "reduction": reduction, "dim": dim} + ) + + @pytest.mark.skipif(not B.cuda.is_available(), reason="test requires cuda") + def test_psnr_half_gpu(self, preds, target, data_range, reduction, dim, base, sk_metric): + self.run_precision_test_gpu( + preds, target, PSNR, psnr, {"data_range": data_range, "base": base, "reduction": reduction, "dim": dim} + ) + + +@pytest.mark.parametrize("reduction", ["none", "sum"]) +def test_reduction_for_dim_none(reduction): + match = f"The `reduction={reduction}` will not have any effect when `dim` is None." + with pytest.warns(UserWarning, match=match): + PSNR(reduction=reduction, dim=None) + + with pytest.warns(UserWarning, match=match): + psnr(_inputs[0].preds, _inputs[0].target, reduction=reduction, dim=None) + + +def test_missing_data_range(): + with pytest.raises(ValueError): + PSNR(data_range=None, dim=0) + + with pytest.raises(ValueError): + psnr(_inputs[0].preds, _inputs[0].target, data_range=None, dim=0) diff --git a/RE/paddlemetric/src/tests/image/test_ssim.py b/RE/paddlemetric/src/tests/image/test_ssim.py new file mode 100644 index 00000000..d249db0d --- /dev/null +++ b/RE/paddlemetric/src/tests/image/test_ssim.py @@ -0,0 +1,167 @@ +# Copyright The PyTorch Lightning team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from collections import namedtuple +from functools import partial + +import pytest +import paddleext.torchapi as B +from skimage.metrics import structural_similarity + +from tests.helpers import seed_all +from tests.helpers.testers import BATCH_SIZE, NUM_BATCHES, MetricTester +from paddlemetrics.functional import ssim +from paddlemetrics.image import SSIM + +seed_all(42) + +Input = namedtuple("Input", ["preds", "target", "multichannel"]) + +_inputs = [] +for size, channel, coef, multichannel, dtype in [ + (12, 3, 0.9, True, B.float), + (13, 1, 0.8, False, B.float32), + (14, 1, 0.7, False, B.double), + (15, 3, 0.6, True, B.float64), +]: + preds = B.rand(NUM_BATCHES, BATCH_SIZE, channel, size, size, dtype=dtype) + _inputs.append( + Input( + preds=preds, + target=preds * coef, + multichannel=multichannel, + ) + ) + + +def _sk_ssim(preds, target, data_range, multichannel, kernel_size): + c, h, w = preds.shape[-3:] + sk_preds = preds.view(-1, c, h, w).permute(0, 2, 3, 1).numpy() + sk_target = target.view(-1, c, h, w).permute(0, 2, 3, 1).numpy() + if not multichannel: + sk_preds = sk_preds[:, :, :, 0] + sk_target = sk_target[:, :, :, 0] + + return structural_similarity( + sk_target, + sk_preds, + data_range=data_range, + multichannel=multichannel, + gaussian_weights=True, + win_size=kernel_size, + sigma=1.5, + use_sample_covariance=False, + ) + + +@pytest.mark.parametrize( + "preds, target, multichannel", + [(i.preds, i.target, i.multichannel) for i in _inputs], +) +@pytest.mark.parametrize("kernel_size", [5, 11]) +class TestSSIM(MetricTester): + atol = 6e-3 + + @pytest.mark.parametrize("ddp", [True, False]) + @pytest.mark.parametrize("dist_sync_on_step", [True, False]) + def test_ssim(self, preds, target, multichannel, kernel_size, ddp, dist_sync_on_step): + self.run_class_metric_test( + ddp, + preds, + target, + SSIM, + partial(_sk_ssim, data_range=1.0, multichannel=multichannel, kernel_size=kernel_size), + metric_args={"data_range": 1.0, "kernel_size": (kernel_size, kernel_size)}, + dist_sync_on_step=dist_sync_on_step, + ) + + def test_ssim_functional(self, preds, target, multichannel, kernel_size): + self.run_functional_metric_test( + preds, + target, + ssim, + partial(_sk_ssim, data_range=1.0, multichannel=multichannel, kernel_size=kernel_size), + metric_args={"data_range": 1.0, "kernel_size": (kernel_size, kernel_size)}, + ) + + # SSIM half + cpu does not work due to missing support in B.log + @pytest.mark.xfail(reason="SSIM metric does not support cpu + half precision") + def test_ssim_half_cpu(self, preds, target, multichannel, kernel_size): + self.run_precision_test_cpu(preds, target, SSIM, ssim, {"data_range": 1.0}) + + @pytest.mark.skipif(not B.cuda.is_available(), reason="test requires cuda") + def test_ssim_half_gpu(self, preds, target, multichannel, kernel_size): + self.run_precision_test_gpu(preds, target, SSIM, ssim, {"data_range": 1.0}) + + +@pytest.mark.parametrize( + ["pred", "target", "kernel", "sigma"], + [ + pytest.param([1, 16, 16], [1, 16, 16], [11, 11], [1.5, 1.5]), # len(shape) + pytest.param([1, 1, 16, 16], [1, 1, 16, 16], [11, 11], [1.5]), # len(kernel), len(sigma) + pytest.param([1, 1, 16, 16], [1, 1, 16, 16], [11], [1.5, 1.5]), # len(kernel), len(sigma) + pytest.param([1, 1, 16, 16], [1, 1, 16, 16], [11], [1.5]), # len(kernel), len(sigma) + pytest.param([1, 1, 16, 16], [1, 1, 16, 16], [11, 0], [1.5, 1.5]), # invalid kernel input + pytest.param([1, 1, 16, 16], [1, 1, 16, 16], [11, 10], [1.5, 1.5]), # invalid kernel input + pytest.param([1, 1, 16, 16], [1, 1, 16, 16], [11, -11], [1.5, 1.5]), # invalid kernel input + pytest.param([1, 1, 16, 16], [1, 1, 16, 16], [11, 11], [1.5, 0]), # invalid sigma input + pytest.param([1, 1, 16, 16], [1, 1, 16, 16], [11, 0], [1.5, -1.5]), # invalid sigma input + ], +) +def test_ssim_invalid_inputs(pred, target, kernel, sigma): + pred_t = B.rand(pred) + target_t = B.rand(target, dtype=B.float64) + with pytest.raises(TypeError): + ssim(pred_t, target_t) + + pred = B.rand(pred) + target = B.rand(target) + with pytest.raises(ValueError): + ssim(pred, target, kernel, sigma) + + +def test_ssim_unequal_kernel_size(): + """Test the case where kernel_size[0] != kernel_size[1]""" + preds = B.tensor( + [ + [ + [ + [1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0], + [1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0], + [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], + [0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], + [0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0], + [0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0], + [1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0], + ] + ] + ] + ) + target = B.tensor( + [ + [ + [ + [1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0], + [0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0], + [1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0], + [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0], + [1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0], + [0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0], + [0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0], + ] + ] + ] + ) + # kernel order matters + assert ssim(preds, target, kernel_size=(3, 5)) == B.tensor(0.10814697) + assert ssim(preds, target, kernel_size=(5, 3)) != B.tensor(0.10814697) diff --git a/RE/paddlemetric/src/tests/pairwise/__init__.py b/RE/paddlemetric/src/tests/pairwise/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/RE/paddlemetric/src/tests/pairwise/test_pairwise_distance.py b/RE/paddlemetric/src/tests/pairwise/test_pairwise_distance.py new file mode 100644 index 00000000..5a642a60 --- /dev/null +++ b/RE/paddlemetric/src/tests/pairwise/test_pairwise_distance.py @@ -0,0 +1,121 @@ +# Copyright The PyTorch Lightning team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from collections import namedtuple +from functools import partial + +import pytest +import paddleext.torchapi as B +from sklearn.metrics.pairwise import cosine_similarity, euclidean_distances, linear_kernel, manhattan_distances + +from tests.helpers import seed_all +from tests.helpers.testers import BATCH_SIZE, NUM_BATCHES, MetricTester +from paddlemetrics.functional import ( + pairwise_cosine_similarity, + pairwise_euclidean_distance, + pairwise_linear_similarity, + pairwise_manhatten_distance, +) +from paddlemetrics.utilities.imports import _TORCH_GREATER_EQUAL_1_7 + +seed_all(42) + +extra_dim = 5 + +Input = namedtuple("Input", ["x", "y"]) + + +_inputs1 = Input( + x=B.rand(NUM_BATCHES, BATCH_SIZE, extra_dim), + y=B.rand(NUM_BATCHES, BATCH_SIZE, extra_dim), +) + + +_inputs2 = Input( + x=B.rand(NUM_BATCHES, BATCH_SIZE, extra_dim), + y=B.rand(NUM_BATCHES, BATCH_SIZE, extra_dim), +) + + +def _sk_metric(x, y, sk_fn, reduction): + """comparison function.""" + x = x.view(-1, extra_dim).numpy() + y = y.view(-1, extra_dim).numpy() + res = sk_fn(x, y) + if reduction == "sum": + return res.sum(axis=-1) + elif reduction == "mean": + return res.mean(axis=-1) + return res + + +@pytest.mark.parametrize( + "x, y", + [ + (_inputs1.x, _inputs1.y), + (_inputs2.x, _inputs2.y), + ], +) +@pytest.mark.parametrize( + "metric_functional, sk_fn", + [ + (pairwise_cosine_similarity, cosine_similarity), + (pairwise_euclidean_distance, euclidean_distances), + (pairwise_manhatten_distance, manhattan_distances), + (pairwise_linear_similarity, linear_kernel), + ], +) +@pytest.mark.parametrize("reduction", ["sum", "mean", None]) +class TestPairwise(MetricTester): + """test pairwise implementations.""" + + atol = 1e-4 + + def test_pairwise_functional(self, x, y, metric_functional, sk_fn, reduction): + """test functional pairwise implementations.""" + self.run_functional_metric_test( + preds=x, + target=y, + metric_functional=metric_functional, + sk_metric=partial(_sk_metric, sk_fn=sk_fn, reduction=reduction), + metric_args={"reduction": reduction}, + ) + + @pytest.mark.skipif( + not _TORCH_GREATER_EQUAL_1_7, reason="half support of core operations on not support before pytorch v1.7" + ) + def test_pairwise_half_cpu(self, x, y, metric_functional, sk_fn, reduction): + """test half precision support on cpu.""" + if metric_functional == pairwise_euclidean_distance: + pytest.xfail("pairwise_euclidean_distance metric does not support cpu + half precision") + self.run_precision_test_cpu(x, y, None, metric_functional, metric_args={"reduction": reduction}) + + @pytest.mark.skipif(not B.cuda.is_available(), reason="test requires cuda") + def test_pairwise_half_gpu(self, x, y, metric_functional, sk_fn, reduction): + """test half precision support on gpu.""" + self.run_precision_test_gpu(x, y, None, metric_functional, metric_args={"reduction": reduction}) + + +@pytest.mark.parametrize( + "metric", [pairwise_cosine_similarity, pairwise_euclidean_distance, pairwise_manhatten_distance] +) +def test_error_on_wrong_shapes(metric): + """Test errors are raised on wrong input.""" + with pytest.raises(ValueError, match="Expected argument `x` to be a 2D tensor .*"): + metric(B.randn(10)) + + with pytest.raises(ValueError, match="Expected argument `y` to be a 2D tensor .*"): + metric(B.randn(10, 5), B.randn(5, 3)) + + with pytest.raises(ValueError, match="Expected reduction to be one of .*"): + metric(B.randn(10, 5), B.randn(10, 5), reduction=1) diff --git a/RE/paddlemetric/src/tests/regression/__init__.py b/RE/paddlemetric/src/tests/regression/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/RE/paddlemetric/src/tests/regression/test_cosine_similarity.py b/RE/paddlemetric/src/tests/regression/test_cosine_similarity.py new file mode 100644 index 00000000..0821cc11 --- /dev/null +++ b/RE/paddlemetric/src/tests/regression/test_cosine_similarity.py @@ -0,0 +1,111 @@ +# Copyright The PyTorch Lightning team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from collections import namedtuple +from functools import partial + +import numpy as np +import pytest +import paddleext.torchapi as B +from sklearn.metrics.pairwise import cosine_similarity as sk_cosine + +from tests.helpers import seed_all +from tests.helpers.testers import BATCH_SIZE, NUM_BATCHES, MetricTester +from paddlemetrics.functional.regression.cosine_similarity import cosine_similarity +from paddlemetrics.regression.cosine_similarity import CosineSimilarity + +seed_all(42) + +num_targets = 5 + +Input = namedtuple("Input", ["preds", "target"]) + +_single_target_inputs = Input( + preds=B.rand(NUM_BATCHES, BATCH_SIZE), + target=B.rand(NUM_BATCHES, BATCH_SIZE), +) + +_multi_target_inputs = Input( + preds=B.rand(NUM_BATCHES, BATCH_SIZE, num_targets), + target=B.rand(NUM_BATCHES, BATCH_SIZE, num_targets), +) + + +def _multi_target_sk_metric(preds, target, reduction, sk_fn=sk_cosine): + sk_preds = preds.view(-1, num_targets).numpy() + sk_target = target.view(-1, num_targets).numpy() + result_array = sk_fn(sk_target, sk_preds) + col = np.diagonal(result_array) + col_sum = col.sum() + if reduction == "sum": + to_return = col_sum + elif reduction == "mean": + mean = col_sum / len(col) + to_return = mean + else: + to_return = col + return to_return + + +def _single_target_sk_metric(preds, target, reduction, sk_fn=sk_cosine): + sk_preds = preds.view(-1).numpy() + sk_target = target.view(-1).numpy() + result_array = sk_fn(np.expand_dims(sk_preds, axis=0), np.expand_dims(sk_target, axis=0)) + col = np.diagonal(result_array) + col_sum = col.sum() + if reduction == "sum": + to_return = col_sum + elif reduction == "mean": + mean = col_sum / len(col) + to_return = mean + else: + to_return = col + return to_return + + +@pytest.mark.parametrize("reduction", ["sum", "mean"]) +@pytest.mark.parametrize( + "preds, target, sk_metric", + [ + (_single_target_inputs.preds, _single_target_inputs.target, _single_target_sk_metric), + (_multi_target_inputs.preds, _multi_target_inputs.target, _multi_target_sk_metric), + ], +) +class TestCosineSimilarity(MetricTester): + @pytest.mark.parametrize("ddp", [True, False]) + @pytest.mark.parametrize("dist_sync_on_step", [True, False]) + def test_cosine_similarity(self, reduction, preds, target, sk_metric, ddp, dist_sync_on_step): + self.run_class_metric_test( + ddp, + preds, + target, + CosineSimilarity, + partial(sk_metric, reduction=reduction), + dist_sync_on_step, + metric_args=dict(reduction=reduction), + ) + + def test_cosine_similarity_functional(self, reduction, preds, target, sk_metric): + self.run_functional_metric_test( + preds, + target, + cosine_similarity, + partial(sk_metric, reduction=reduction), + metric_args=dict(reduction=reduction), + ) + + +def test_error_on_different_shape(metric_class=CosineSimilarity): + metric = metric_class() + with pytest.raises(RuntimeError, match="Predictions and targets are expected to have the same shape"): + metric(B.randn(100), B.randn(50)) diff --git a/RE/paddlemetric/src/tests/regression/test_explained_variance.py b/RE/paddlemetric/src/tests/regression/test_explained_variance.py new file mode 100644 index 00000000..a227d0d4 --- /dev/null +++ b/RE/paddlemetric/src/tests/regression/test_explained_variance.py @@ -0,0 +1,110 @@ +# Copyright The PyTorch Lightning team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from collections import namedtuple +from functools import partial + +import pytest +import paddleext.torchapi as B +from sklearn.metrics import explained_variance_score + +from tests.helpers import seed_all +from tests.helpers.testers import BATCH_SIZE, NUM_BATCHES, MetricTester +from paddlemetrics.functional import explained_variance +from paddlemetrics.regression import ExplainedVariance +from paddlemetrics.utilities.imports import _TORCH_GREATER_EQUAL_1_6 + +seed_all(42) + +num_targets = 5 + +Input = namedtuple("Input", ["preds", "target"]) + +_single_target_inputs = Input( + preds=B.rand(NUM_BATCHES, BATCH_SIZE), + target=B.rand(NUM_BATCHES, BATCH_SIZE), +) + +_multi_target_inputs = Input( + preds=B.rand(NUM_BATCHES, BATCH_SIZE, num_targets), + target=B.rand(NUM_BATCHES, BATCH_SIZE, num_targets), +) + + +def _single_target_sk_metric(preds, target, sk_fn=explained_variance_score): + sk_preds = preds.view(-1).numpy() + sk_target = target.view(-1).numpy() + return sk_fn(sk_target, sk_preds) + + +def _multi_target_sk_metric(preds, target, sk_fn=explained_variance_score): + sk_preds = preds.view(-1, num_targets).numpy() + sk_target = target.view(-1, num_targets).numpy() + return sk_fn(sk_target, sk_preds) + + +@pytest.mark.parametrize("multioutput", ["raw_values", "uniform_average", "variance_weighted"]) +@pytest.mark.parametrize( + "preds, target, sk_metric", + [ + (_single_target_inputs.preds, _single_target_inputs.target, _single_target_sk_metric), + (_multi_target_inputs.preds, _multi_target_inputs.target, _multi_target_sk_metric), + ], +) +class TestExplainedVariance(MetricTester): + @pytest.mark.parametrize("ddp", [True, False]) + @pytest.mark.parametrize("dist_sync_on_step", [True, False]) + def test_explained_variance(self, multioutput, preds, target, sk_metric, ddp, dist_sync_on_step): + self.run_class_metric_test( + ddp, + preds, + target, + ExplainedVariance, + partial(sk_metric, sk_fn=partial(explained_variance_score, multioutput=multioutput)), + dist_sync_on_step, + metric_args=dict(multioutput=multioutput), + ) + + def test_explained_variance_functional(self, multioutput, preds, target, sk_metric): + self.run_functional_metric_test( + preds, + target, + explained_variance, + partial(sk_metric, sk_fn=partial(explained_variance_score, multioutput=multioutput)), + metric_args=dict(multioutput=multioutput), + ) + + def test_explained_variance_differentiability(self, multioutput, preds, target, sk_metric): + self.run_differentiability_test( + preds=preds, + target=target, + metric_module=ExplainedVariance, + metric_functional=explained_variance, + metric_args={"multioutput": multioutput}, + ) + + @pytest.mark.skipif( + not _TORCH_GREATER_EQUAL_1_6, reason="half support of core operations on not support before pytorch v1.6" + ) + def test_explained_variance_half_cpu(self, multioutput, preds, target, sk_metric): + self.run_precision_test_cpu(preds, target, ExplainedVariance, explained_variance) + + @pytest.mark.skipif(not B.cuda.is_available(), reason="test requires cuda") + def test_explained_variance_half_gpu(self, multioutput, preds, target, sk_metric): + self.run_precision_test_gpu(preds, target, ExplainedVariance, explained_variance) + + +def test_error_on_different_shape(metric_class=ExplainedVariance): + metric = metric_class() + with pytest.raises(RuntimeError, match="Predictions and targets are expected to have the same shape"): + metric(B.randn(100), B.randn(50)) diff --git a/RE/paddlemetric/src/tests/regression/test_mean_error.py b/RE/paddlemetric/src/tests/regression/test_mean_error.py new file mode 100644 index 00000000..b9d9a31e --- /dev/null +++ b/RE/paddlemetric/src/tests/regression/test_mean_error.py @@ -0,0 +1,177 @@ +# Copyright The PyTorch Lightning team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import math +from collections import namedtuple +from functools import partial + +import pytest +import paddleext.torchapi as B +from sklearn.metrics import mean_absolute_error as sk_mean_absolute_error +from sklearn.metrics import mean_absolute_percentage_error as sk_mean_abs_percentage_error +from sklearn.metrics import mean_squared_error as sk_mean_squared_error +from sklearn.metrics import mean_squared_log_error as sk_mean_squared_log_error + +from tests.helpers import seed_all +from tests.helpers.non_sklearn_metrics import ( + symmetric_mean_absolute_percentage_error as sk_sym_mean_abs_percentage_error, +) +from tests.helpers.testers import BATCH_SIZE, NUM_BATCHES, MetricTester +from paddlemetrics.functional import ( + mean_absolute_error, + mean_absolute_percentage_error, + mean_squared_error, + mean_squared_log_error, +) +from paddlemetrics.functional.regression.symmetric_mean_absolute_percentage_error import ( + symmetric_mean_absolute_percentage_error, +) +from paddlemetrics.regression import ( + MeanAbsoluteError, + MeanAbsolutePercentageError, + MeanSquaredError, + MeanSquaredLogError, +) +from paddlemetrics.regression.symmetric_mean_absolute_percentage_error import SymmetricMeanAbsolutePercentageError +from paddlemetrics.utilities.imports import _TORCH_GREATER_EQUAL_1_6 + +seed_all(42) + +num_targets = 5 + +Input = namedtuple("Input", ["preds", "target"]) + +_single_target_inputs = Input( + preds=B.rand(NUM_BATCHES, BATCH_SIZE), + target=B.rand(NUM_BATCHES, BATCH_SIZE), +) + +_multi_target_inputs = Input( + preds=B.rand(NUM_BATCHES, BATCH_SIZE, num_targets), + target=B.rand(NUM_BATCHES, BATCH_SIZE, num_targets), +) + + +def _single_target_sk_metric(preds, target, sk_fn, metric_args): + sk_preds = preds.view(-1).numpy() + sk_target = target.view(-1).numpy() + + # `sk_target` and `sk_preds` switched to fix failing tests. + # For more info, check https://github.com/PyTorchLightning/metrics/pull/248#issuecomment-841232277 + res = sk_fn(sk_target, sk_preds) + + return math.sqrt(res) if (metric_args and not metric_args["squared"]) else res + + +def _multi_target_sk_metric(preds, target, sk_fn, metric_args): + sk_preds = preds.view(-1, num_targets).numpy() + sk_target = target.view(-1, num_targets).numpy() + + # `sk_target` and `sk_preds` switched to fix failing tests. + # For more info, check https://github.com/PyTorchLightning/metrics/pull/248#issuecomment-841232277 + res = sk_fn(sk_target, sk_preds) + + return math.sqrt(res) if (metric_args and not metric_args["squared"]) else res + + +@pytest.mark.parametrize( + "preds, target, sk_metric", + [ + (_single_target_inputs.preds, _single_target_inputs.target, _single_target_sk_metric), + (_multi_target_inputs.preds, _multi_target_inputs.target, _multi_target_sk_metric), + ], +) +@pytest.mark.parametrize( + "metric_class, metric_functional, sk_fn, metric_args", + [ + (MeanSquaredError, mean_squared_error, sk_mean_squared_error, {"squared": True}), + (MeanSquaredError, mean_squared_error, sk_mean_squared_error, {"squared": False}), + (MeanAbsoluteError, mean_absolute_error, sk_mean_absolute_error, {}), + (MeanAbsolutePercentageError, mean_absolute_percentage_error, sk_mean_abs_percentage_error, {}), + ( + SymmetricMeanAbsolutePercentageError, + symmetric_mean_absolute_percentage_error, + sk_sym_mean_abs_percentage_error, + {}, + ), + (MeanSquaredLogError, mean_squared_log_error, sk_mean_squared_log_error, {}), + ], +) +class TestMeanError(MetricTester): + @pytest.mark.parametrize("ddp", [True, False]) + @pytest.mark.parametrize("dist_sync_on_step", [True, False]) + def test_mean_error_class( + self, preds, target, sk_metric, metric_class, metric_functional, sk_fn, metric_args, ddp, dist_sync_on_step + ): + # todo: `metric_functional` is unused + self.run_class_metric_test( + ddp=ddp, + preds=preds, + target=target, + metric_class=metric_class, + sk_metric=partial(sk_metric, sk_fn=sk_fn, metric_args=metric_args), + dist_sync_on_step=dist_sync_on_step, + metric_args=metric_args, + ) + + def test_mean_error_functional(self, preds, target, sk_metric, metric_class, metric_functional, sk_fn, metric_args): + # todo: `metric_class` is unused + self.run_functional_metric_test( + preds=preds, + target=target, + metric_functional=metric_functional, + sk_metric=partial(sk_metric, sk_fn=sk_fn, metric_args=metric_args), + metric_args=metric_args, + ) + + def test_mean_error_differentiability( + self, preds, target, sk_metric, metric_class, metric_functional, sk_fn, metric_args + ): + self.run_differentiability_test( + preds=preds, + target=target, + metric_module=metric_class, + metric_functional=metric_functional, + metric_args=metric_args, + ) + + @pytest.mark.skipif( + not _TORCH_GREATER_EQUAL_1_6, reason="half support of core operations on not support before pytorch v1.6" + ) + def test_mean_error_half_cpu(self, preds, target, sk_metric, metric_class, metric_functional, sk_fn, metric_args): + if metric_class == MeanSquaredLogError: + # MeanSquaredLogError half + cpu does not work due to missing support in B.log + pytest.xfail("MeanSquaredLogError metric does not support cpu + half precision") + + if metric_class == MeanAbsolutePercentageError: + # MeanSquaredPercentageError half + cpu does not work due to missing support in B.log + pytest.xfail("MeanSquaredPercentageError metric does not support cpu + half precision") + + if metric_class == SymmetricMeanAbsolutePercentageError: + # MeanSquaredPercentageError half + cpu does not work due to missing support in B.log + pytest.xfail("SymmetricMeanAbsolutePercentageError metric does not support cpu + half precision") + + self.run_precision_test_cpu(preds, target, metric_class, metric_functional) + + @pytest.mark.skipif(not B.cuda.is_available(), reason="test requires cuda") + def test_mean_error_half_gpu(self, preds, target, sk_metric, metric_class, metric_functional, sk_fn, metric_args): + self.run_precision_test_gpu(preds, target, metric_class, metric_functional) + + +@pytest.mark.parametrize( + "metric_class", [MeanSquaredError, MeanAbsoluteError, MeanSquaredLogError, MeanAbsolutePercentageError] +) +def test_error_on_different_shape(metric_class): + metric = metric_class() + with pytest.raises(RuntimeError, match="Predictions and targets are expected to have the same shape"): + metric(B.randn(100), B.randn(50)) diff --git a/RE/paddlemetric/src/tests/regression/test_pearson.py b/RE/paddlemetric/src/tests/regression/test_pearson.py new file mode 100644 index 00000000..09e1ac21 --- /dev/null +++ b/RE/paddlemetric/src/tests/regression/test_pearson.py @@ -0,0 +1,93 @@ +# Copyright The PyTorch Lightning team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from collections import namedtuple + +import pytest +import paddleext.torchapi as B +from scipy.stats import pearsonr + +from tests.helpers import seed_all +from tests.helpers.testers import BATCH_SIZE, NUM_BATCHES, MetricTester +from paddlemetrics.functional.regression.pearson import pearson_corrcoef +from paddlemetrics.regression.pearson import PearsonCorrcoef + +seed_all(42) + +Input = namedtuple("Input", ["preds", "target"]) + +_single_target_inputs1 = Input( + preds=B.rand(NUM_BATCHES, BATCH_SIZE), + target=B.rand(NUM_BATCHES, BATCH_SIZE), +) + +_single_target_inputs2 = Input( + preds=B.randn(NUM_BATCHES, BATCH_SIZE), + target=B.randn(NUM_BATCHES, BATCH_SIZE), +) + + +def _sk_pearsonr(preds, target): + sk_preds = preds.view(-1).numpy() + sk_target = target.view(-1).numpy() + return pearsonr(sk_target, sk_preds)[0] + + +@pytest.mark.parametrize( + "preds, target", + [ + (_single_target_inputs1.preds, _single_target_inputs1.target), + (_single_target_inputs2.preds, _single_target_inputs2.target), + ], +) +class TestPearsonCorrcoef(MetricTester): + atol = 1e-2 + + @pytest.mark.parametrize("ddp", [True, False]) + def test_pearson_corrcoef(self, preds, target, ddp): + self.run_class_metric_test( + ddp=ddp, + preds=preds, + target=target, + metric_class=PearsonCorrcoef, + sk_metric=_sk_pearsonr, + dist_sync_on_step=False, + ) + + def test_pearson_corrcoef_functional(self, preds, target): + self.run_functional_metric_test( + preds=preds, target=target, metric_functional=pearson_corrcoef, sk_metric=_sk_pearsonr + ) + + def test_pearson_corrcoef_differentiability(self, preds, target): + self.run_differentiability_test( + preds=preds, target=target, metric_module=PearsonCorrcoef, metric_functional=pearson_corrcoef + ) + + # Pearson half + cpu does not work due to missing support in B.sqrt + @pytest.mark.xfail(reason="PearsonCorrcoef metric does not support cpu + half precision") + def test_pearson_corrcoef_half_cpu(self, preds, target): + self.run_precision_test_cpu(preds, target, PearsonCorrcoef, pearson_corrcoef) + + @pytest.mark.skipif(not B.cuda.is_available(), reason="test requires cuda") + def test_pearson_corrcoef_half_gpu(self, preds, target): + self.run_precision_test_gpu(preds, target, PearsonCorrcoef, pearson_corrcoef) + + +def test_error_on_different_shape(): + metric = PearsonCorrcoef() + with pytest.raises(RuntimeError, match="Predictions and targets are expected to have the same shape"): + metric(B.randn(100), B.randn(50)) + + with pytest.raises(ValueError, match="Expected both predictions and target to be 1 dimensional tensors."): + metric(B.randn(100, 2), B.randn(100, 2)) diff --git a/RE/paddlemetric/src/tests/regression/test_r2.py b/RE/paddlemetric/src/tests/regression/test_r2.py new file mode 100644 index 00000000..ebed636a --- /dev/null +++ b/RE/paddlemetric/src/tests/regression/test_r2.py @@ -0,0 +1,164 @@ +# Copyright The PyTorch Lightning team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from collections import namedtuple +from functools import partial + +import pytest +import paddleext.torchapi as B +from sklearn.metrics import r2_score as sk_r2score + +from tests.helpers import seed_all +from tests.helpers.testers import BATCH_SIZE, NUM_BATCHES, MetricTester +from paddlemetrics.functional import r2_score +from paddlemetrics.regression import R2Score +from paddlemetrics.utilities.imports import _TORCH_GREATER_EQUAL_1_6 + +seed_all(42) + +num_targets = 5 + +Input = namedtuple("Input", ["preds", "target"]) + +_single_target_inputs = Input( + preds=B.rand(NUM_BATCHES, BATCH_SIZE), + target=B.rand(NUM_BATCHES, BATCH_SIZE), +) + +_multi_target_inputs = Input( + preds=B.rand(NUM_BATCHES, BATCH_SIZE, num_targets), + target=B.rand(NUM_BATCHES, BATCH_SIZE, num_targets), +) + + +def _single_target_sk_metric(preds, target, adjusted, multioutput): + sk_preds = preds.view(-1).numpy() + sk_target = target.view(-1).numpy() + r2_score = sk_r2score(sk_target, sk_preds, multioutput=multioutput) + if adjusted != 0: + r2_score = 1 - (1 - r2_score) * (sk_preds.shape[0] - 1) / (sk_preds.shape[0] - adjusted - 1) + return r2_score + + +def _multi_target_sk_metric(preds, target, adjusted, multioutput): + sk_preds = preds.view(-1, num_targets).numpy() + sk_target = target.view(-1, num_targets).numpy() + r2_score = sk_r2score(sk_target, sk_preds, multioutput=multioutput) + if adjusted != 0: + r2_score = 1 - (1 - r2_score) * (sk_preds.shape[0] - 1) / (sk_preds.shape[0] - adjusted - 1) + return r2_score + + +@pytest.mark.parametrize("adjusted", [0, 5, 10]) +@pytest.mark.parametrize("multioutput", ["raw_values", "uniform_average", "variance_weighted"]) +@pytest.mark.parametrize( + "preds, target, sk_metric, num_outputs", + [ + (_single_target_inputs.preds, _single_target_inputs.target, _single_target_sk_metric, 1), + (_multi_target_inputs.preds, _multi_target_inputs.target, _multi_target_sk_metric, num_targets), + ], +) +class TestR2Score(MetricTester): + @pytest.mark.parametrize("ddp", [True, False]) + @pytest.mark.parametrize("dist_sync_on_step", [True, False]) + def test_r2(self, adjusted, multioutput, preds, target, sk_metric, num_outputs, ddp, dist_sync_on_step): + self.run_class_metric_test( + ddp, + preds, + target, + R2Score, + partial(sk_metric, adjusted=adjusted, multioutput=multioutput), + dist_sync_on_step, + metric_args=dict(adjusted=adjusted, multioutput=multioutput, num_outputs=num_outputs), + ) + + def test_r2_functional(self, adjusted, multioutput, preds, target, sk_metric, num_outputs): + # todo: `num_outputs` is unused + self.run_functional_metric_test( + preds, + target, + r2_score, + partial(sk_metric, adjusted=adjusted, multioutput=multioutput), + metric_args=dict(adjusted=adjusted, multioutput=multioutput), + ) + + def test_r2_differentiability(self, adjusted, multioutput, preds, target, sk_metric, num_outputs): + self.run_differentiability_test( + preds=preds, + target=target, + metric_module=partial(R2Score, num_outputs=num_outputs), + metric_functional=r2_score, + metric_args=dict(adjusted=adjusted, multioutput=multioutput), + ) + + @pytest.mark.skipif( + not _TORCH_GREATER_EQUAL_1_6, reason="half support of core operations on not support before pytorch v1.6" + ) + def test_r2_half_cpu(self, adjusted, multioutput, preds, target, sk_metric, num_outputs): + self.run_precision_test_cpu( + preds, + target, + partial(R2Score, num_outputs=num_outputs), + r2_score, + {"adjusted": adjusted, "multioutput": multioutput}, + ) + + @pytest.mark.skipif(not B.cuda.is_available(), reason="test requires cuda") + def test_r2_half_gpu(self, adjusted, multioutput, preds, target, sk_metric, num_outputs): + self.run_precision_test_gpu( + preds, + target, + partial(R2Score, num_outputs=num_outputs), + r2_score, + {"adjusted": adjusted, "multioutput": multioutput}, + ) + + +def test_error_on_different_shape(metric_class=R2Score): + metric = metric_class() + with pytest.raises(RuntimeError, match="Predictions and targets are expected to have the same shape"): + metric(B.randn(100), B.randn(50)) + + +def test_error_on_multidim_tensors(metric_class=R2Score): + metric = metric_class() + with pytest.raises( + ValueError, + match=r"Expected both prediction and target to be 1D or 2D tensors," r" but received tensors with dimension .", + ): + metric(B.randn(10, 20, 5), B.randn(10, 20, 5)) + + +def test_error_on_too_few_samples(metric_class=R2Score): + metric = metric_class() + with pytest.raises(ValueError, match="Needs at least two samples to calculate r2 score."): + metric(B.randn(1), B.randn(1)) + metric.reset() + + # calling update twice should still work + metric.update(B.randn(1), B.randn(1)) + metric.update(B.randn(1), B.randn(1)) + assert metric.compute() + + +def test_warning_on_too_large_adjusted(metric_class=R2Score): + metric = metric_class(adjusted=10) + + with pytest.warns( + UserWarning, + match="More independent regressions than data points in" " adjusted r2 score. Falls back to standard r2 score.", + ): + metric(B.randn(10), B.randn(10)) + + with pytest.warns(UserWarning, match="Division by zero in adjusted r2 score. Falls back to" " standard r2 score."): + metric(B.randn(11), B.randn(11)) diff --git a/RE/paddlemetric/src/tests/regression/test_spearman.py b/RE/paddlemetric/src/tests/regression/test_spearman.py new file mode 100644 index 00000000..a3764fde --- /dev/null +++ b/RE/paddlemetric/src/tests/regression/test_spearman.py @@ -0,0 +1,115 @@ +# Copyright The PyTorch Lightning team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from collections import namedtuple + +import pytest +import paddleext.torchapi as B +from scipy.stats import rankdata, spearmanr + +from tests.helpers import seed_all +from tests.helpers.testers import BATCH_SIZE, NUM_BATCHES, MetricTester +from paddlemetrics.functional.regression.spearman import _rank_data, spearman_corrcoef +from paddlemetrics.regression.spearman import SpearmanCorrcoef + +seed_all(42) + +Input = namedtuple("Input", ["preds", "target"]) + +_single_target_inputs1 = Input( + preds=B.rand(NUM_BATCHES, BATCH_SIZE), + target=B.rand(NUM_BATCHES, BATCH_SIZE), +) + +_single_target_inputs2 = Input( + preds=B.randn(NUM_BATCHES, BATCH_SIZE), + target=B.randn(NUM_BATCHES, BATCH_SIZE), +) + +_specific_input = Input( + preds=B.stack([B.tensor([1.0, 0.0, 4.0, 1.0, 0.0, 3.0, 0.0]) for _ in range(NUM_BATCHES)]), + target=B.stack([B.tensor([4.0, 0.0, 3.0, 3.0, 3.0, 1.0, 1.0]) for _ in range(NUM_BATCHES)]), +) + + +@pytest.mark.parametrize( + "preds, target", + [ + (_single_target_inputs1.preds, _single_target_inputs1.target), + (_single_target_inputs2.preds, _single_target_inputs2.target), + (_specific_input.preds, _specific_input.target), + ], +) +def test_ranking(preds, target): + """test that ranking function works as expected.""" + for p, t in zip(preds, target): + scipy_ranking = [rankdata(p.numpy()), rankdata(t.numpy())] + tm_ranking = [_rank_data(p), _rank_data(t)] + assert (B.tensor(scipy_ranking[0]) == tm_ranking[0]).all() + assert (B.tensor(scipy_ranking[1]) == tm_ranking[1]).all() + + +def _sk_metric(preds, target): + sk_preds = preds.view(-1).numpy() + sk_target = target.view(-1).numpy() + return spearmanr(sk_target, sk_preds)[0] + + +@pytest.mark.parametrize( + "preds, target", + [ + (_single_target_inputs1.preds, _single_target_inputs1.target), + (_single_target_inputs2.preds, _single_target_inputs2.target), + (_specific_input.preds, _specific_input.target), + ], +) +class TestSpearmanCorrcoef(MetricTester): + atol = 1e-2 + + @pytest.mark.parametrize("ddp", [True, False]) + @pytest.mark.parametrize("dist_sync_on_step", [True, False]) + def test_spearman_corrcoef(self, preds, target, ddp, dist_sync_on_step): + self.run_class_metric_test( + ddp, + preds, + target, + SpearmanCorrcoef, + _sk_metric, + dist_sync_on_step, + ) + + def test_spearman_corrcoef_functional(self, preds, target): + self.run_functional_metric_test(preds, target, spearman_corrcoef, _sk_metric) + + def test_spearman_corrcoef_differentiability(self, preds, target): + self.run_differentiability_test( + preds=preds, target=target, metric_module=SpearmanCorrcoef, metric_functional=spearman_corrcoef + ) + + # Spearman half + cpu does not work due to missing support in B.arange + @pytest.mark.xfail(reason="Spearman metric does not support cpu + half precision") + def test_spearman_corrcoef_half_cpu(self, preds, target): + self.run_precision_test_cpu(preds, target, SpearmanCorrcoef, spearman_corrcoef) + + @pytest.mark.skipif(not B.cuda.is_available(), reason="test requires cuda") + def test_spearman_corrcoef_half_gpu(self, preds, target): + self.run_precision_test_gpu(preds, target, SpearmanCorrcoef, spearman_corrcoef) + + +def test_error_on_different_shape(): + metric = SpearmanCorrcoef() + with pytest.raises(RuntimeError, match="Predictions and targets are expected to have the same shape"): + metric(B.randn(100), B.randn(50)) + + with pytest.raises(ValueError, match="Expected both predictions and target to be 1 dimensional tensors."): + metric(B.randn(100, 2), B.randn(100, 2)) diff --git a/RE/paddlemetric/src/tests/regression/test_tweedie_deviance.py b/RE/paddlemetric/src/tests/regression/test_tweedie_deviance.py new file mode 100644 index 00000000..af130313 --- /dev/null +++ b/RE/paddlemetric/src/tests/regression/test_tweedie_deviance.py @@ -0,0 +1,140 @@ +# Copyright The PyTorch Lightning team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from collections import namedtuple +from functools import partial + +import pytest +import paddleext.torchapi as B +from sklearn.metrics import mean_tweedie_deviance +from paddleext.torchapi import Tensor + +from tests.helpers import seed_all +from tests.helpers.testers import BATCH_SIZE, NUM_BATCHES, MetricTester +from paddlemetrics.functional.regression.tweedie_deviance import tweedie_deviance_score +from paddlemetrics.regression.tweedie_deviance import TweedieDevianceScore + +seed_all(42) + +Input = namedtuple("Input", ["preds", "targets"]) + +_single_target_inputs1 = Input( + preds=B.rand(NUM_BATCHES, BATCH_SIZE), + targets=B.rand(NUM_BATCHES, BATCH_SIZE), +) + +_single_target_inputs2 = Input( + preds=B.rand(NUM_BATCHES, BATCH_SIZE), + targets=B.rand(NUM_BATCHES, BATCH_SIZE), +) + +_multi_target_inputs = Input( + preds=B.rand(NUM_BATCHES, BATCH_SIZE, 5), + targets=B.rand(NUM_BATCHES, BATCH_SIZE, 5), +) + + +def _sk_deviance(preds: Tensor, targets: Tensor, power: float): + sk_preds = preds.view(-1).numpy() + sk_target = targets.view(-1).numpy() + return mean_tweedie_deviance(sk_target, sk_preds, power=power) + + +@pytest.mark.parametrize("power", [-0.5, 0, 1, 1.5, 2, 3]) +@pytest.mark.parametrize( + "preds, targets", + [ + (_single_target_inputs1.preds, _single_target_inputs1.targets), + (_single_target_inputs2.preds, _single_target_inputs2.targets), + (_multi_target_inputs.preds, _multi_target_inputs.targets), + ], +) +class TestDevianceScore(MetricTester): + @pytest.mark.parametrize("ddp", [True, False]) + @pytest.mark.parametrize("dist_sync_on_step", [True, False]) + def test_deviance_scores_class(self, ddp, dist_sync_on_step, preds, targets, power): + self.run_class_metric_test( + ddp, + preds, + targets, + TweedieDevianceScore, + partial(_sk_deviance, power=power), + dist_sync_on_step, + metric_args=dict(power=power), + ) + + def test_deviance_scores_functional(self, preds, targets, power): + self.run_functional_metric_test( + preds, + targets, + tweedie_deviance_score, + partial(_sk_deviance, power=power), + metric_args=dict(power=power), + ) + + def test_pearson_corrcoef_differentiability(self, preds, targets, power): + self.run_differentiability_test( + preds, targets, metric_module=TweedieDevianceScore, metric_functional=tweedie_deviance_score + ) + + # Tweedie Deviance Score half + cpu does not work due to missing support in B.log + @pytest.mark.xfail(reason="TweedieDevianceScore metric does not support cpu + half precision") + def test_pearson_corrcoef_half_cpu(self, preds, targets, power): + metric_args = {"power": power} + self.run_precision_test_cpu( + preds, + targets, + metric_module=TweedieDevianceScore, + metric_functional=tweedie_deviance_score, + metric_args=metric_args, + ) + + @pytest.mark.skipif(not B.cuda.is_available(), reason="test requires cuda") + def test_pearson_corrcoef_half_gpu(self, preds, targets, power): + metric_args = {"power": power} + self.run_precision_test_gpu( + preds, + targets, + metric_module=TweedieDevianceScore, + metric_functional=tweedie_deviance_score, + metric_args=metric_args, + ) + + +def test_error_on_different_shape(metric_class=TweedieDevianceScore): + metric = metric_class() + with pytest.raises(RuntimeError, match="Predictions and targets are expected to have the same shape"): + metric(B.randn(100), B.randn(50)) + + +def test_error_on_invalid_inputs(metric_class=TweedieDevianceScore): + with pytest.raises(ValueError, match="Deviance Score is not defined for power=0.5."): + metric_class(power=0.5) + + metric = metric_class(power=1) + with pytest.raises( + ValueError, match="For power=1, 'preds' has to be strictly positive and 'targets' cannot be negative." + ): + metric(B.tensor([-1.0, 2.0, 3.0]), B.rand(3)) + + with pytest.raises( + ValueError, match="For power=1, 'preds' has to be strictly positive and 'targets' cannot be negative." + ): + metric(B.rand(3), B.tensor([-1.0, 2.0, 3.0])) + + metric = metric_class(power=2) + with pytest.raises(ValueError, match="For power=2, both 'preds' and 'targets' have to be strictly positive."): + metric(B.tensor([-1.0, 2.0, 3.0]), B.rand(3)) + + with pytest.raises(ValueError, match="For power=2, both 'preds' and 'targets' have to be strictly positive."): + metric(B.rand(3), B.tensor([-1.0, 2.0, 3.0])) diff --git a/RE/paddlemetric/src/tests/retrieval/__init__.py b/RE/paddlemetric/src/tests/retrieval/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/RE/paddlemetric/src/tests/retrieval/helpers.py b/RE/paddlemetric/src/tests/retrieval/helpers.py new file mode 100644 index 00000000..419fe19b --- /dev/null +++ b/RE/paddlemetric/src/tests/retrieval/helpers.py @@ -0,0 +1,511 @@ +# Copyright The PyTorch Lightning team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from functools import partial +from typing import Callable, Dict, List, Tuple, Type, Union + +import numpy as np +import pytest +import paddleext.torchapi as B +from numpy import array +from paddleext.torchapi import Tensor, tensor + +from tests.helpers import seed_all +from tests.helpers.testers import Metric, MetricTester +from tests.retrieval.inputs import _input_retrieval_scores as _irs +from tests.retrieval.inputs import _input_retrieval_scores_all_target as _irs_all +from tests.retrieval.inputs import _input_retrieval_scores_empty as _irs_empty +from tests.retrieval.inputs import _input_retrieval_scores_extra as _irs_extra +from tests.retrieval.inputs import _input_retrieval_scores_float_target as _irs_float_tgt +from tests.retrieval.inputs import _input_retrieval_scores_int_target as _irs_int_tgt +from tests.retrieval.inputs import _input_retrieval_scores_mismatching_sizes as _irs_mis_sz +from tests.retrieval.inputs import _input_retrieval_scores_mismatching_sizes_func as _irs_mis_sz_fn +from tests.retrieval.inputs import _input_retrieval_scores_no_target as _irs_no_tgt +from tests.retrieval.inputs import _input_retrieval_scores_wrong_targets as _irs_bad_tgt + +seed_all(42) + +# a version of get_group_indexes that depends on NumPy is here to avoid this dependency for the full library + + +def get_group_indexes(indexes: Union[Tensor, np.ndarray]) -> List[Union[Tensor, np.ndarray]]: + """Given an integer `B.Tensor` or `np.ndarray` `indexes`, return a `B.Tensor` or `np.ndarray` of + indexes for each different value in `indexes`. + + Args: + indexes: a `B.Tensor` or `np.ndarray` of integers + + Return: + A list of integer `B.Tensor`s or `np.ndarray`s + + Example: + >>> indexes = B.tensor([0, 0, 0, 1, 1, 1, 1]) + >>> get_group_indexes(indexes) + [tensor([0, 1, 2]), tensor([3, 4, 5, 6])] + """ + structure, dtype = (tensor, B.long) if isinstance(indexes, Tensor) else (np.array, np.int64) + + res = {} + for i, _id in enumerate(indexes): + _id = _id.item() + if _id in res: + res[_id] += [i] + else: + res[_id] = [i] + + return [structure(x, dtype=dtype) for x in res.values()] + + +def _compute_sklearn_metric( + preds: Union[Tensor, array], + target: Union[Tensor, array], + indexes: np.ndarray = None, + metric: Callable = None, + empty_target_action: str = "skip", + reverse: bool = False, + **kwargs, +) -> Tensor: + """Compute metric with multiple iterations over every query predictions set.""" + + if indexes is None: + indexes = np.full_like(preds, fill_value=0, dtype=np.int64) + if isinstance(indexes, Tensor): + indexes = indexes.cpu().numpy() + if isinstance(preds, Tensor): + preds = preds.cpu().numpy() + if isinstance(target, Tensor): + target = target.cpu().numpy() + + assert isinstance(indexes, np.ndarray) + assert isinstance(preds, np.ndarray) + assert isinstance(target, np.ndarray) + + indexes = indexes.flatten() + preds = preds.flatten() + target = target.flatten() + groups = get_group_indexes(indexes) + + sk_results = [] + for group in groups: + trg, pds = target[group], preds[group] + + if ((1 - trg) if reverse else trg).sum() == 0: + if empty_target_action == "skip": + pass + elif empty_target_action == "pos": + sk_results.append(1.0) + else: + sk_results.append(0.0) + else: + res = metric(trg, pds, **kwargs) + sk_results.append(res) + + if len(sk_results) > 0: + return np.mean(sk_results) + return np.array(0.0) + + +def _concat_tests(*tests: Tuple[Dict]) -> Dict: + """Concat tests composed by a string and a list of arguments.""" + assert len(tests), "`_concat_tests` expects at least an argument" + assert all(tests[0]["argnames"] == x["argnames"] for x in tests[1:]), "the header must be the same for all tests" + return dict(argnames=tests[0]["argnames"], argvalues=sum((x["argvalues"] for x in tests), [])) + + +_errors_test_functional_metric_parameters_default = dict( + argnames="preds,target,message,metric_args", + argvalues=[ + # check input shapes are consistent (func) + (_irs_mis_sz_fn.preds, _irs_mis_sz_fn.target, "`preds` and `target` must be of the same shape", {}), + # check input tensors are not empty + (_irs_empty.preds, _irs_empty.target, "`preds` and `target` must be non-empty and non-scalar tensors", {}), + # check on input dtypes + (_irs.preds.bool(), _irs.target, "`preds` must be a tensor of floats", {}), + # check targets are between 0 and 1 + (_irs_bad_tgt.preds, _irs_bad_tgt.target, "`target` must contain `binary` values", {}), + ], +) + +_errors_test_functional_metric_parameters_with_nonbinary = dict( + argnames="preds,target,message,metric_args", + argvalues=[ + # check input shapes are consistent (func) + (_irs_mis_sz_fn.preds, _irs_mis_sz_fn.target, "`preds` and `target` must be of the same shape", {}), + # check input tensors are not empty + (_irs_empty.preds, _irs_empty.target, "`preds` and `target` must be non-empty and non-scalar tensors", {}), + # check on input dtypes + (_irs.preds.bool(), _irs.target, "`preds` must be a tensor of floats", {}), + ], +) + +_errors_test_functional_metric_parameters_k = dict( + argnames="preds,target,message,metric_args", + argvalues=[ + (_irs.preds, _irs.target, "`k` has to be a positive integer or None", dict(k=-10)), + (_irs.preds, _irs.target, "`k` has to be a positive integer or None", dict(k=4.0)), + ], +) + +_errors_test_class_metric_parameters_no_pos_target = dict( + argnames="indexes,preds,target,message,metric_args", + argvalues=[ + # check when error when there are no positive targets + ( + _irs_no_tgt.indexes, + _irs_no_tgt.preds, + _irs_no_tgt.target, + "`compute` method was provided with a query with no positive target.", + dict(empty_target_action="error"), + ), + ], +) + +_errors_test_class_metric_parameters_no_neg_target = dict( + argnames="indexes,preds,target,message,metric_args", + argvalues=[ + # check when error when there are no negative targets + ( + _irs_all.indexes, + _irs_all.preds, + _irs_all.target, + "`compute` method was provided with a query with no negative target.", + dict(empty_target_action="error"), + ), + ], +) + +_errors_test_class_metric_parameters_with_nonbinary = dict( + argnames="indexes,preds,target,message,metric_args", + argvalues=[ + (None, _irs.preds, _irs.target, "`indexes` cannot be None", dict(empty_target_action="error")), + # check when input arguments are invalid + ( + _irs.indexes, + _irs.preds, + _irs.target, + "`empty_target_action` received a wrong value `casual_argument`.", + dict(empty_target_action="casual_argument"), + ), + # check input shapes are consistent + ( + _irs_mis_sz.indexes, + _irs_mis_sz.preds, + _irs_mis_sz.target, + "`indexes`, `preds` and `target` must be of the same shape", + dict(empty_target_action="skip"), + ), + # check input tensors are not empty + ( + _irs_empty.indexes, + _irs_empty.preds, + _irs_empty.target, + "`indexes`, `preds` and `target` must be non-empty and non-scalar tensors", + dict(empty_target_action="skip"), + ), + # check on input dtypes + ( + _irs.indexes.bool(), + _irs.preds, + _irs.target, + "`indexes` must be a tensor of long integers", + dict(empty_target_action="skip"), + ), + ( + _irs.indexes, + _irs.preds.bool(), + _irs.target, + "`preds` must be a tensor of floats", + dict(empty_target_action="skip"), + ), + ], +) + +_errors_test_class_metric_parameters_default = dict( + argnames="indexes,preds,target,message,metric_args", + argvalues=[ + (None, _irs.preds, _irs.target, "`indexes` cannot be None", dict(empty_target_action="error")), + # check when input arguments are invalid + ( + _irs.indexes, + _irs.preds, + _irs.target, + "`empty_target_action` received a wrong value `casual_argument`.", + dict(empty_target_action="casual_argument"), + ), + # check input shapes are consistent + ( + _irs_mis_sz.indexes, + _irs_mis_sz.preds, + _irs_mis_sz.target, + "`indexes`, `preds` and `target` must be of the same shape", + dict(empty_target_action="skip"), + ), + # check input tensors are not empty + ( + _irs_empty.indexes, + _irs_empty.preds, + _irs_empty.target, + "`indexes`, `preds` and `target` must be non-empty and non-scalar tensors", + dict(empty_target_action="skip"), + ), + # check on input dtypes + ( + _irs.indexes.bool(), + _irs.preds, + _irs.target, + "`indexes` must be a tensor of long integers", + dict(empty_target_action="skip"), + ), + ( + _irs.indexes, + _irs.preds.bool(), + _irs.target, + "`preds` must be a tensor of floats", + dict(empty_target_action="skip"), + ), + ], +) + +_errors_test_class_metric_parameters_k = dict( + argnames="indexes,preds,target,message,metric_args", + argvalues=[ + (_irs.index, _irs.preds, _irs.target, "`k` has to be a positive integer or None", dict(k=-10)), + ], +) + +_default_metric_class_input_arguments = dict( + argnames="indexes,preds,target", + argvalues=[ + (_irs.indexes, _irs.preds, _irs.target), + (_irs_extra.indexes, _irs_extra.preds, _irs_extra.target), + (_irs_no_tgt.indexes, _irs_no_tgt.preds, _irs_no_tgt.target), + ], +) + +_default_metric_class_input_arguments_with_non_binary_target = dict( + argnames="indexes,preds,target", + argvalues=[ + (_irs.indexes, _irs.preds, _irs.target), + (_irs_extra.indexes, _irs_extra.preds, _irs_extra.target), + (_irs_no_tgt.indexes, _irs_no_tgt.preds, _irs_no_tgt.target), + (_irs_int_tgt.indexes, _irs_int_tgt.preds, _irs_int_tgt.target), + (_irs_float_tgt.indexes, _irs_float_tgt.preds, _irs_float_tgt.target), + ], +) + +_default_metric_functional_input_arguments = dict( + argnames="preds,target", + argvalues=[ + (_irs.preds, _irs.target), + (_irs_extra.preds, _irs_extra.target), + (_irs_no_tgt.preds, _irs_no_tgt.target), + ], +) + +_default_metric_functional_input_arguments_with_non_binary_target = dict( + argnames="preds,target", + argvalues=[ + (_irs.preds, _irs.target), + (_irs_extra.preds, _irs_extra.target), + (_irs_no_tgt.preds, _irs_no_tgt.target), + (_irs_int_tgt.preds, _irs_int_tgt.target), + (_irs_float_tgt.preds, _irs_float_tgt.target), + ], +) + + +def _errors_test_class_metric( + indexes: Tensor, + preds: Tensor, + target: Tensor, + metric_class: Metric, + message: str = "", + metric_args: dict = None, + exception_type: Type[Exception] = ValueError, + kwargs_update: dict = None, +): + """Utility function doing checks about types, parameters and errors. + + Args: + indexes: torch tensor with indexes + preds: torch tensor with predictions + target: torch tensor with targets + metric_class: lightning metric class that should be tested + message: message that exception should return + metric_args: arguments for class initialization + exception_type: callable function that is used for comparison + kwargs_update: Additional keyword arguments that will be passed with indexes, preds and + target when running update on the metric. + """ + metric_args = metric_args or {} + kwargs_update = kwargs_update or {} + with pytest.raises(exception_type, match=message): + metric = metric_class(**metric_args) + metric(preds, target, indexes=indexes, **kwargs_update) + + +def _errors_test_functional_metric( + preds: Tensor, + target: Tensor, + metric_functional: Metric, + message: str = "", + exception_type: Type[Exception] = ValueError, + kwargs_update: dict = None, +): + """Utility function doing checks about types, parameters and errors. + + Args: + preds: torch tensor with predictions + target: torch tensor with targets + metric_functional: lightning functional metric that should be tested + message: message that exception should return + exception_type: callable function that is used for comparison + kwargs_update: Additional keyword arguments that will be passed with indexes, preds and + target when running update on the metric. + """ + kwargs_update = kwargs_update or {} + with pytest.raises(exception_type, match=message): + metric_functional(preds, target, **kwargs_update) + + +class RetrievalMetricTester(MetricTester): + def run_class_metric_test( + self, + ddp: bool, + indexes: Tensor, + preds: Tensor, + target: Tensor, + metric_class: Metric, + sk_metric: Callable, + dist_sync_on_step: bool, + metric_args: dict, + reverse: bool = False, + ): + _sk_metric_adapted = partial(_compute_sklearn_metric, metric=sk_metric, reverse=reverse, **metric_args) + + super().run_class_metric_test( + ddp=ddp, + preds=preds, + target=target, + metric_class=metric_class, + sk_metric=_sk_metric_adapted, + dist_sync_on_step=dist_sync_on_step, + metric_args=metric_args, + fragment_kwargs=True, + indexes=indexes, # every additional argument will be passed to metric_class and _sk_metric_adapted + ) + + def run_functional_metric_test( + self, + preds: Tensor, + target: Tensor, + metric_functional: Callable, + sk_metric: Callable, + metric_args: dict, + reverse: bool = False, + **kwargs, + ): + _sk_metric_adapted = partial(_compute_sklearn_metric, metric=sk_metric, reverse=reverse, **metric_args) + + super().run_functional_metric_test( + preds=preds, + target=target, + metric_functional=metric_functional, + sk_metric=_sk_metric_adapted, + metric_args=metric_args, + fragment_kwargs=True, + **kwargs, + ) + + def run_precision_test_cpu( + self, + indexes: Tensor, + preds: Tensor, + target: Tensor, + metric_module: Metric, + metric_functional: Callable, + ): + def metric_functional_ignore_indexes(preds, target, indexes): + return metric_functional(preds, target) + + super().run_precision_test_cpu( + preds=preds, + target=target, + metric_module=metric_module, + metric_functional=metric_functional_ignore_indexes, + metric_args={"empty_target_action": "neg"}, + indexes=indexes, # every additional argument will be passed to RetrievalMAP and _sk_metric_adapted + ) + + def run_precision_test_gpu( + self, + indexes: Tensor, + preds: Tensor, + target: Tensor, + metric_module: Metric, + metric_functional: Callable, + ): + if not B.cuda.is_available(): + pytest.skip() + + def metric_functional_ignore_indexes(preds, target, indexes): + return metric_functional(preds, target) + + super().run_precision_test_gpu( + preds=preds, + target=target, + metric_module=metric_module, + metric_functional=metric_functional_ignore_indexes, + metric_args={"empty_target_action": "neg"}, + indexes=indexes, # every additional argument will be passed to RetrievalMAP and _sk_metric_adapted + ) + + @staticmethod + def run_metric_class_arguments_test( + indexes: Tensor, + preds: Tensor, + target: Tensor, + metric_class: Metric, + message: str = "", + metric_args: dict = None, + exception_type: Type[Exception] = ValueError, + kwargs_update: dict = None, + ): + _errors_test_class_metric( + indexes=indexes, + preds=preds, + target=target, + metric_class=metric_class, + message=message, + metric_args=metric_args, + exception_type=exception_type, + **kwargs_update, + ) + + @staticmethod + def run_functional_metric_arguments_test( + preds: Tensor, + target: Tensor, + metric_functional: Callable, + message: str = "", + exception_type: Type[Exception] = ValueError, + kwargs_update: dict = None, + ): + _errors_test_functional_metric( + preds=preds, + target=target, + metric_functional=metric_functional, + message=message, + exception_type=exception_type, + kwargs_update=kwargs_update, + ) diff --git a/RE/paddlemetric/src/tests/retrieval/inputs.py b/RE/paddlemetric/src/tests/retrieval/inputs.py new file mode 100644 index 00000000..d1e40b81 --- /dev/null +++ b/RE/paddlemetric/src/tests/retrieval/inputs.py @@ -0,0 +1,82 @@ +# Copyright The PyTorch Lightning team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from collections import namedtuple + +import paddleext.torchapi as B + +from tests.helpers.testers import BATCH_SIZE, EXTRA_DIM, NUM_BATCHES + +Input = namedtuple("InputMultiple", ["indexes", "preds", "target"]) + +# correct +_input_retrieval_scores = Input( + indexes=B.randint(high=10, size=(NUM_BATCHES, BATCH_SIZE)), + preds=B.rand(NUM_BATCHES, BATCH_SIZE), + target=B.randint(high=2, size=(NUM_BATCHES, BATCH_SIZE)), +) + +_input_retrieval_scores_extra = Input( + indexes=B.randint(high=10, size=(NUM_BATCHES, BATCH_SIZE, EXTRA_DIM)), + preds=B.rand(NUM_BATCHES, BATCH_SIZE, EXTRA_DIM), + target=B.randint(high=2, size=(NUM_BATCHES, BATCH_SIZE, EXTRA_DIM)), +) + +_input_retrieval_scores_int_target = Input( + indexes=B.randint(high=10, size=(NUM_BATCHES, 2 * BATCH_SIZE)), + preds=B.rand(NUM_BATCHES, 2 * BATCH_SIZE), + target=B.randint(low=-1, high=4, size=(NUM_BATCHES, 2 * BATCH_SIZE)), +) + +_input_retrieval_scores_float_target = Input( + indexes=B.randint(high=10, size=(NUM_BATCHES, 2 * BATCH_SIZE)), + preds=B.rand(NUM_BATCHES, 2 * BATCH_SIZE), + target=B.rand(NUM_BATCHES, 2 * BATCH_SIZE), +) + +# with errors +_input_retrieval_scores_no_target = Input( + indexes=B.randint(high=10, size=(NUM_BATCHES, BATCH_SIZE)), + preds=B.rand(NUM_BATCHES, BATCH_SIZE), + target=B.randint(high=1, size=(NUM_BATCHES, BATCH_SIZE)), +) + +_input_retrieval_scores_all_target = Input( + indexes=B.randint(high=10, size=(NUM_BATCHES, BATCH_SIZE)), + preds=B.rand(NUM_BATCHES, BATCH_SIZE), + target=B.randint(low=1, high=2, size=(NUM_BATCHES, BATCH_SIZE)), +) + +_input_retrieval_scores_empty = Input( + indexes=B.randint(high=10, size=[0]), + preds=B.rand(0), + target=B.randint(high=2, size=[0]), +) + +_input_retrieval_scores_mismatching_sizes = Input( + indexes=B.randint(high=10, size=(NUM_BATCHES, BATCH_SIZE - 2)), + preds=B.rand(NUM_BATCHES, BATCH_SIZE), + target=B.randint(high=2, size=(NUM_BATCHES, BATCH_SIZE)), +) + +_input_retrieval_scores_mismatching_sizes_func = Input( + indexes=B.randint(high=10, size=(NUM_BATCHES, BATCH_SIZE)), + preds=B.rand(NUM_BATCHES, BATCH_SIZE - 2), + target=B.randint(high=2, size=(NUM_BATCHES, BATCH_SIZE)), +) + +_input_retrieval_scores_wrong_targets = Input( + indexes=B.randint(high=10, size=(NUM_BATCHES, BATCH_SIZE)), + preds=B.rand(NUM_BATCHES, BATCH_SIZE), + target=B.randint(low=-(2 ** 31), high=2 ** 31, size=(NUM_BATCHES, BATCH_SIZE)), +) diff --git a/RE/paddlemetric/src/tests/retrieval/test_fallout.py b/RE/paddlemetric/src/tests/retrieval/test_fallout.py new file mode 100644 index 00000000..e69ddd59 --- /dev/null +++ b/RE/paddlemetric/src/tests/retrieval/test_fallout.py @@ -0,0 +1,152 @@ +# Copyright The PyTorch Lightning team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import numpy as np +import pytest +from paddleext.torchapi import Tensor + +from tests.helpers import seed_all +from tests.retrieval.helpers import ( + RetrievalMetricTester, + _concat_tests, + _default_metric_class_input_arguments, + _default_metric_functional_input_arguments, + _errors_test_class_metric_parameters_default, + _errors_test_class_metric_parameters_k, + _errors_test_class_metric_parameters_no_neg_target, + _errors_test_functional_metric_parameters_default, + _errors_test_functional_metric_parameters_k, +) +from paddlemetrics.functional.retrieval.fall_out import retrieval_fall_out +from paddlemetrics.retrieval.retrieval_fallout import RetrievalFallOut + +seed_all(42) + + +def _fallout_at_k(target: np.ndarray, preds: np.ndarray, k: int = None): + """Didn't find a reliable implementation of Fall-out in Information Retrieval, so, reimplementing here. + + See Wikipedia for `Fall-out`_ for more information about the metric definition. + """ + assert target.shape == preds.shape + assert len(target.shape) == 1 # works only with single dimension inputs + + k = len(preds) if k is None else k + + target = 1 - target + if target.sum(): + order_indexes = np.argsort(preds, axis=0)[::-1] + relevant = np.sum(target[order_indexes][:k]) + return relevant * 1.0 / target.sum() + return np.NaN + + +class TestFallOut(RetrievalMetricTester): + @pytest.mark.parametrize("ddp", [True, False]) + @pytest.mark.parametrize("dist_sync_on_step", [True, False]) + @pytest.mark.parametrize("empty_target_action", ["skip", "neg", "pos"]) + @pytest.mark.parametrize("k", [None, 1, 4, 10]) + @pytest.mark.parametrize(**_default_metric_class_input_arguments) + def test_class_metric( + self, + ddp: bool, + indexes: Tensor, + preds: Tensor, + target: Tensor, + dist_sync_on_step: bool, + empty_target_action: str, + k: int, + ): + metric_args = {"empty_target_action": empty_target_action, "k": k} + + self.run_class_metric_test( + ddp=ddp, + indexes=indexes, + preds=preds, + target=target, + metric_class=RetrievalFallOut, + sk_metric=_fallout_at_k, + dist_sync_on_step=dist_sync_on_step, + reverse=True, + metric_args=metric_args, + ) + + @pytest.mark.parametrize(**_default_metric_functional_input_arguments) + @pytest.mark.parametrize("k", [None, 1, 4, 10]) + def test_functional_metric(self, preds: Tensor, target: Tensor, k: int): + self.run_functional_metric_test( + preds=preds, + target=target, + metric_functional=retrieval_fall_out, + sk_metric=_fallout_at_k, + reverse=True, + metric_args={}, + k=k, + ) + + @pytest.mark.parametrize(**_default_metric_class_input_arguments) + def test_precision_cpu(self, indexes: Tensor, preds: Tensor, target: Tensor): + self.run_precision_test_cpu( + indexes=indexes, + preds=preds, + target=target, + metric_module=RetrievalFallOut, + metric_functional=retrieval_fall_out, + ) + + @pytest.mark.parametrize(**_default_metric_class_input_arguments) + def test_precision_gpu(self, indexes: Tensor, preds: Tensor, target: Tensor): + self.run_precision_test_gpu( + indexes=indexes, + preds=preds, + target=target, + metric_module=RetrievalFallOut, + metric_functional=retrieval_fall_out, + ) + + @pytest.mark.parametrize( + **_concat_tests( + _errors_test_class_metric_parameters_default, + _errors_test_class_metric_parameters_no_neg_target, + _errors_test_class_metric_parameters_k, + ) + ) + def test_arguments_class_metric( + self, indexes: Tensor, preds: Tensor, target: Tensor, message: str, metric_args: dict + ): + self.run_metric_class_arguments_test( + indexes=indexes, + preds=preds, + target=target, + metric_class=RetrievalFallOut, + message=message, + metric_args=metric_args, + exception_type=ValueError, + kwargs_update={}, + ) + + @pytest.mark.parametrize( + **_concat_tests( + _errors_test_functional_metric_parameters_default, + _errors_test_functional_metric_parameters_k, + ) + ) + def test_arguments_functional_metric(self, preds: Tensor, target: Tensor, message: str, metric_args: dict): + self.run_functional_metric_arguments_test( + preds=preds, + target=target, + metric_functional=retrieval_fall_out, + message=message, + exception_type=ValueError, + kwargs_update=metric_args, + ) diff --git a/RE/paddlemetric/src/tests/retrieval/test_hit_rate.py b/RE/paddlemetric/src/tests/retrieval/test_hit_rate.py new file mode 100644 index 00000000..a9d90838 --- /dev/null +++ b/RE/paddlemetric/src/tests/retrieval/test_hit_rate.py @@ -0,0 +1,147 @@ +# Copyright The PyTorch Lightning team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import numpy as np +import pytest +from paddleext.torchapi import Tensor + +from tests.helpers import seed_all +from tests.retrieval.helpers import ( + RetrievalMetricTester, + _concat_tests, + _default_metric_class_input_arguments, + _default_metric_functional_input_arguments, + _errors_test_class_metric_parameters_default, + _errors_test_class_metric_parameters_k, + _errors_test_class_metric_parameters_no_pos_target, + _errors_test_functional_metric_parameters_default, + _errors_test_functional_metric_parameters_k, +) +from paddlemetrics.functional.retrieval.hit_rate import retrieval_hit_rate +from paddlemetrics.retrieval.retrieval_hit_rate import RetrievalHitRate + +seed_all(42) + + +def _hit_rate_at_k(target: np.ndarray, preds: np.ndarray, k: int = None): + """Didn't find a reliable implementation of Hit Rate in Information Retrieval, so, reimplementing here.""" + assert target.shape == preds.shape + assert len(target.shape) == 1 # works only with single dimension inputs + + if k is None: + k = len(preds) + + if target.sum() > 0: + order_indexes = np.argsort(preds, axis=0)[::-1] + relevant = np.sum(target[order_indexes][:k]) + return float(relevant > 0.0) + return np.NaN + + +class TestHitRate(RetrievalMetricTester): + @pytest.mark.parametrize("ddp", [True, False]) + @pytest.mark.parametrize("dist_sync_on_step", [True, False]) + @pytest.mark.parametrize("empty_target_action", ["skip", "neg", "pos"]) + @pytest.mark.parametrize("k", [None, 1, 4, 10]) + @pytest.mark.parametrize(**_default_metric_class_input_arguments) + def test_class_metric( + self, + ddp: bool, + indexes: Tensor, + preds: Tensor, + target: Tensor, + dist_sync_on_step: bool, + empty_target_action: str, + k: int, + ): + metric_args = {"empty_target_action": empty_target_action, "k": k} + + self.run_class_metric_test( + ddp=ddp, + indexes=indexes, + preds=preds, + target=target, + metric_class=RetrievalHitRate, + sk_metric=_hit_rate_at_k, + dist_sync_on_step=dist_sync_on_step, + metric_args=metric_args, + ) + + @pytest.mark.parametrize(**_default_metric_functional_input_arguments) + @pytest.mark.parametrize("k", [None, 1, 4, 10]) + def test_functional_metric(self, preds: Tensor, target: Tensor, k: int): + self.run_functional_metric_test( + preds=preds, + target=target, + metric_functional=retrieval_hit_rate, + sk_metric=_hit_rate_at_k, + metric_args={}, + k=k, + ) + + @pytest.mark.parametrize(**_default_metric_class_input_arguments) + def test_precision_cpu(self, indexes: Tensor, preds: Tensor, target: Tensor): + self.run_precision_test_cpu( + indexes=indexes, + preds=preds, + target=target, + metric_module=RetrievalHitRate, + metric_functional=retrieval_hit_rate, + ) + + @pytest.mark.parametrize(**_default_metric_class_input_arguments) + def test_precision_gpu(self, indexes: Tensor, preds: Tensor, target: Tensor): + self.run_precision_test_gpu( + indexes=indexes, + preds=preds, + target=target, + metric_module=RetrievalHitRate, + metric_functional=retrieval_hit_rate, + ) + + @pytest.mark.parametrize( + **_concat_tests( + _errors_test_class_metric_parameters_default, + _errors_test_class_metric_parameters_no_pos_target, + _errors_test_class_metric_parameters_k, + ) + ) + def test_arguments_class_metric( + self, indexes: Tensor, preds: Tensor, target: Tensor, message: str, metric_args: dict + ): + self.run_metric_class_arguments_test( + indexes=indexes, + preds=preds, + target=target, + metric_class=RetrievalHitRate, + message=message, + metric_args=metric_args, + exception_type=ValueError, + kwargs_update={}, + ) + + @pytest.mark.parametrize( + **_concat_tests( + _errors_test_functional_metric_parameters_default, + _errors_test_functional_metric_parameters_k, + ) + ) + def test_arguments_functional_metric(self, preds: Tensor, target: Tensor, message: str, metric_args: dict): + self.run_functional_metric_arguments_test( + preds=preds, + target=target, + metric_functional=retrieval_hit_rate, + message=message, + exception_type=ValueError, + kwargs_update=metric_args, + ) diff --git a/RE/paddlemetric/src/tests/retrieval/test_map.py b/RE/paddlemetric/src/tests/retrieval/test_map.py new file mode 100644 index 00000000..bef75b55 --- /dev/null +++ b/RE/paddlemetric/src/tests/retrieval/test_map.py @@ -0,0 +1,120 @@ +# Copyright The PyTorch Lightning team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import pytest +from sklearn.metrics import average_precision_score as sk_average_precision_score +from paddleext.torchapi import Tensor + +from tests.helpers import seed_all +from tests.retrieval.helpers import ( + RetrievalMetricTester, + _concat_tests, + _default_metric_class_input_arguments, + _default_metric_functional_input_arguments, + _errors_test_class_metric_parameters_default, + _errors_test_class_metric_parameters_no_pos_target, + _errors_test_functional_metric_parameters_default, +) +from paddlemetrics.functional.retrieval.average_precision import retrieval_average_precision +from paddlemetrics.retrieval.mean_average_precision import RetrievalMAP + +seed_all(42) + + +class TestMAP(RetrievalMetricTester): + @pytest.mark.parametrize("ddp", [True, False]) + @pytest.mark.parametrize("dist_sync_on_step", [True, False]) + @pytest.mark.parametrize("empty_target_action", ["skip", "neg", "pos"]) + @pytest.mark.parametrize(**_default_metric_class_input_arguments) + def test_class_metric( + self, + ddp: bool, + indexes: Tensor, + preds: Tensor, + target: Tensor, + dist_sync_on_step: bool, + empty_target_action: str, + ): + metric_args = {"empty_target_action": empty_target_action} + + self.run_class_metric_test( + ddp=ddp, + indexes=indexes, + preds=preds, + target=target, + metric_class=RetrievalMAP, + sk_metric=sk_average_precision_score, + dist_sync_on_step=dist_sync_on_step, + metric_args=metric_args, + ) + + @pytest.mark.parametrize(**_default_metric_functional_input_arguments) + def test_functional_metric(self, preds: Tensor, target: Tensor): + self.run_functional_metric_test( + preds=preds, + target=target, + metric_functional=retrieval_average_precision, + sk_metric=sk_average_precision_score, + metric_args={}, + ) + + @pytest.mark.parametrize(**_default_metric_class_input_arguments) + def test_precision_cpu(self, indexes: Tensor, preds: Tensor, target: Tensor): + self.run_precision_test_cpu( + indexes=indexes, + preds=preds, + target=target, + metric_module=RetrievalMAP, + metric_functional=retrieval_average_precision, + ) + + @pytest.mark.parametrize(**_default_metric_class_input_arguments) + def test_precision_gpu(self, indexes: Tensor, preds: Tensor, target: Tensor): + self.run_precision_test_gpu( + indexes=indexes, + preds=preds, + target=target, + metric_module=RetrievalMAP, + metric_functional=retrieval_average_precision, + ) + + @pytest.mark.parametrize( + **_concat_tests( + _errors_test_class_metric_parameters_default, + _errors_test_class_metric_parameters_no_pos_target, + ) + ) + def test_arguments_class_metric( + self, indexes: Tensor, preds: Tensor, target: Tensor, message: str, metric_args: dict + ): + self.run_metric_class_arguments_test( + indexes=indexes, + preds=preds, + target=target, + metric_class=RetrievalMAP, + message=message, + metric_args=metric_args, + exception_type=ValueError, + kwargs_update={}, + ) + + @pytest.mark.parametrize(**_errors_test_functional_metric_parameters_default) + def test_arguments_functional_metric(self, preds: Tensor, target: Tensor, message: str, metric_args: dict): + self.run_functional_metric_arguments_test( + preds=preds, + target=target, + metric_functional=retrieval_average_precision, + message=message, + exception_type=ValueError, + kwargs_update=metric_args, + ) diff --git a/RE/paddlemetric/src/tests/retrieval/test_mrr.py b/RE/paddlemetric/src/tests/retrieval/test_mrr.py new file mode 100644 index 00000000..d5a80af9 --- /dev/null +++ b/RE/paddlemetric/src/tests/retrieval/test_mrr.py @@ -0,0 +1,142 @@ +# Copyright The PyTorch Lightning team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import numpy as np +import pytest +from sklearn.metrics import label_ranking_average_precision_score +from paddleext.torchapi import Tensor + +from tests.helpers import seed_all +from tests.retrieval.helpers import ( + RetrievalMetricTester, + _concat_tests, + _default_metric_class_input_arguments, + _default_metric_functional_input_arguments, + _errors_test_class_metric_parameters_default, + _errors_test_class_metric_parameters_no_pos_target, + _errors_test_functional_metric_parameters_default, +) +from paddlemetrics.functional.retrieval.reciprocal_rank import retrieval_reciprocal_rank +from paddlemetrics.retrieval.mean_reciprocal_rank import RetrievalMRR + +seed_all(42) + + +def _reciprocal_rank(target: np.ndarray, preds: np.ndarray): + """Adaptation of `sklearn.metrics.label_ranking_average_precision_score`. + + Since the original sklearn metric works as RR only when the number of positive targets is exactly 1, here we remove + every positive target that is not the most important. Remember that in RR only the positive target with the highest + score is considered. + """ + assert target.shape == preds.shape + assert len(target.shape) == 1 # works only with single dimension inputs + + # going to remove T targets that are not ranked as highest + indexes = preds[target.astype(bool)] + if len(indexes) > 0: + target[preds != indexes.max(-1, keepdims=True)[0]] = 0 # ensure that only 1 positive label is present + + if target.sum() > 0: + # sklearn `label_ranking_average_precision_score` requires at most 2 dims + return label_ranking_average_precision_score(np.expand_dims(target, axis=0), np.expand_dims(preds, axis=0)) + return 0.0 + + +class TestMRR(RetrievalMetricTester): + @pytest.mark.parametrize("ddp", [True, False]) + @pytest.mark.parametrize("dist_sync_on_step", [True, False]) + @pytest.mark.parametrize("empty_target_action", ["skip", "neg", "pos"]) + @pytest.mark.parametrize(**_default_metric_class_input_arguments) + def test_class_metric( + self, + ddp: bool, + indexes: Tensor, + preds: Tensor, + target: Tensor, + dist_sync_on_step: bool, + empty_target_action: str, + ): + metric_args = {"empty_target_action": empty_target_action} + + self.run_class_metric_test( + ddp=ddp, + indexes=indexes, + preds=preds, + target=target, + metric_class=RetrievalMRR, + sk_metric=_reciprocal_rank, + dist_sync_on_step=dist_sync_on_step, + metric_args=metric_args, + ) + + @pytest.mark.parametrize(**_default_metric_functional_input_arguments) + def test_functional_metric(self, preds: Tensor, target: Tensor): + self.run_functional_metric_test( + preds=preds, + target=target, + metric_functional=retrieval_reciprocal_rank, + sk_metric=_reciprocal_rank, + metric_args={}, + ) + + @pytest.mark.parametrize(**_default_metric_class_input_arguments) + def test_precision_cpu(self, indexes: Tensor, preds: Tensor, target: Tensor): + self.run_precision_test_cpu( + indexes=indexes, + preds=preds, + target=target, + metric_module=RetrievalMRR, + metric_functional=retrieval_reciprocal_rank, + ) + + @pytest.mark.parametrize(**_default_metric_class_input_arguments) + def test_precision_gpu(self, indexes: Tensor, preds: Tensor, target: Tensor): + self.run_precision_test_gpu( + indexes=indexes, + preds=preds, + target=target, + metric_module=RetrievalMRR, + metric_functional=retrieval_reciprocal_rank, + ) + + @pytest.mark.parametrize( + **_concat_tests( + _errors_test_class_metric_parameters_default, + _errors_test_class_metric_parameters_no_pos_target, + ) + ) + def test_arguments_class_metric( + self, indexes: Tensor, preds: Tensor, target: Tensor, message: str, metric_args: dict + ): + self.run_metric_class_arguments_test( + indexes=indexes, + preds=preds, + target=target, + metric_class=RetrievalMRR, + message=message, + metric_args=metric_args, + exception_type=ValueError, + kwargs_update={}, + ) + + @pytest.mark.parametrize(**_errors_test_functional_metric_parameters_default) + def test_arguments_functional_metric(self, preds: Tensor, target: Tensor, message: str, metric_args: dict): + self.run_functional_metric_arguments_test( + preds=preds, + target=target, + metric_functional=retrieval_reciprocal_rank, + message=message, + exception_type=ValueError, + kwargs_update=metric_args, + ) diff --git a/RE/paddlemetric/src/tests/retrieval/test_ndcg.py b/RE/paddlemetric/src/tests/retrieval/test_ndcg.py new file mode 100644 index 00000000..4fa099f1 --- /dev/null +++ b/RE/paddlemetric/src/tests/retrieval/test_ndcg.py @@ -0,0 +1,151 @@ +# Copyright The PyTorch Lightning team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import numpy as np +import pytest +from sklearn.metrics import ndcg_score +from paddleext.torchapi import Tensor + +from tests.helpers import seed_all +from tests.retrieval.helpers import ( + RetrievalMetricTester, + _concat_tests, + _default_metric_class_input_arguments_with_non_binary_target, + _default_metric_functional_input_arguments_with_non_binary_target, + _errors_test_class_metric_parameters_k, + _errors_test_class_metric_parameters_with_nonbinary, + _errors_test_functional_metric_parameters_k, + _errors_test_functional_metric_parameters_with_nonbinary, +) +from paddlemetrics.functional.retrieval.ndcg import retrieval_normalized_dcg +from paddlemetrics.retrieval.retrieval_ndcg import RetrievalNormalizedDCG + +seed_all(42) + + +def _ndcg_at_k(target: np.ndarray, preds: np.ndarray, k: int = None): + """Adapting `from sklearn.metrics.ndcg_score`.""" + assert target.shape == preds.shape + assert len(target.shape) == 1 # works only with single dimension inputs + + if target.shape[0] < 2: # ranking is equal to ideal ranking with a single document + return np.array(1.0) + + preds = np.expand_dims(preds, axis=0) + target = np.expand_dims(target, axis=0) + + return ndcg_score(target, preds, k=k) + + +class TestNDCG(RetrievalMetricTester): + @pytest.mark.parametrize("ddp", [True, False]) + @pytest.mark.parametrize("dist_sync_on_step", [True, False]) + @pytest.mark.parametrize("empty_target_action", ["skip", "neg", "pos"]) + @pytest.mark.parametrize("k", [None, 1, 4, 10]) + @pytest.mark.parametrize(**_default_metric_class_input_arguments_with_non_binary_target) + def test_class_metric( + self, + ddp: bool, + indexes: Tensor, + preds: Tensor, + target: Tensor, + dist_sync_on_step: bool, + empty_target_action: str, + k: int, + ): + metric_args = {"empty_target_action": empty_target_action, "k": k} + + self.run_class_metric_test( + ddp=ddp, + indexes=indexes, + preds=preds, + target=target, + metric_class=RetrievalNormalizedDCG, + sk_metric=_ndcg_at_k, + dist_sync_on_step=dist_sync_on_step, + metric_args=metric_args, + ) + + @pytest.mark.parametrize(**_default_metric_functional_input_arguments_with_non_binary_target) + @pytest.mark.parametrize("k", [None, 1, 4, 10]) + def test_functional_metric(self, preds: Tensor, target: Tensor, k: int): + self.run_functional_metric_test( + preds=preds, + target=target, + metric_functional=retrieval_normalized_dcg, + sk_metric=_ndcg_at_k, + metric_args={}, + k=k, + ) + + @pytest.mark.parametrize(**_default_metric_class_input_arguments_with_non_binary_target) + def test_precision_cpu(self, indexes: Tensor, preds: Tensor, target: Tensor): + self.run_precision_test_cpu( + indexes=indexes, + preds=preds, + target=target, + metric_module=RetrievalNormalizedDCG, + metric_functional=retrieval_normalized_dcg, + ) + + @pytest.mark.parametrize(**_default_metric_class_input_arguments_with_non_binary_target) + def test_precision_gpu(self, indexes: Tensor, preds: Tensor, target: Tensor): + self.run_precision_test_gpu( + indexes=indexes, + preds=preds, + target=target, + metric_module=RetrievalNormalizedDCG, + metric_functional=retrieval_normalized_dcg, + ) + + @pytest.mark.parametrize( + **_concat_tests( + _errors_test_class_metric_parameters_with_nonbinary, + _errors_test_class_metric_parameters_k, + ) + ) + def test_arguments_class_metric( + self, indexes: Tensor, preds: Tensor, target: Tensor, message: str, metric_args: dict + ): + if target.is_floating_point(): + pytest.skip("NDCG metric works with float target input") + + self.run_metric_class_arguments_test( + indexes=indexes, + preds=preds, + target=target, + metric_class=RetrievalNormalizedDCG, + message=message, + metric_args=metric_args, + exception_type=ValueError, + kwargs_update={}, + ) + + @pytest.mark.parametrize( + **_concat_tests( + _errors_test_functional_metric_parameters_with_nonbinary, + _errors_test_functional_metric_parameters_k, + ) + ) + def test_arguments_functional_metric(self, preds: Tensor, target: Tensor, message: str, metric_args: dict): + if target.is_floating_point(): + pytest.skip("NDCG metric works with float target input") + + self.run_functional_metric_arguments_test( + preds=preds, + target=target, + metric_functional=retrieval_normalized_dcg, + message=message, + exception_type=ValueError, + kwargs_update=metric_args, + ) diff --git a/RE/paddlemetric/src/tests/retrieval/test_precision.py b/RE/paddlemetric/src/tests/retrieval/test_precision.py new file mode 100644 index 00000000..260e0242 --- /dev/null +++ b/RE/paddlemetric/src/tests/retrieval/test_precision.py @@ -0,0 +1,151 @@ +# Copyright The PyTorch Lightning team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import numpy as np +import pytest +from paddleext.torchapi import Tensor + +from tests.helpers import seed_all +from tests.retrieval.helpers import ( + RetrievalMetricTester, + _concat_tests, + _default_metric_class_input_arguments, + _default_metric_functional_input_arguments, + _errors_test_class_metric_parameters_default, + _errors_test_class_metric_parameters_k, + _errors_test_class_metric_parameters_no_pos_target, + _errors_test_functional_metric_parameters_default, + _errors_test_functional_metric_parameters_k, +) +from paddlemetrics.functional.retrieval.precision import retrieval_precision +from paddlemetrics.retrieval.retrieval_precision import RetrievalPrecision + +seed_all(42) + + +def _precision_at_k(target: np.ndarray, preds: np.ndarray, k: int = None): + """Didn't find a reliable implementation of Precision in Information Retrieval, so, reimplementing here. + + A good explanation can be found + `here _`. + """ + assert target.shape == preds.shape + assert len(target.shape) == 1 # works only with single dimension inputs + + if k is None: + k = len(preds) + + if target.sum() > 0: + order_indexes = np.argsort(preds, axis=0)[::-1] + relevant = np.sum(target[order_indexes][:k]) + return relevant * 1.0 / k + return np.NaN + + +class TestPrecision(RetrievalMetricTester): + @pytest.mark.parametrize("ddp", [True, False]) + @pytest.mark.parametrize("dist_sync_on_step", [True, False]) + @pytest.mark.parametrize("empty_target_action", ["skip", "neg", "pos"]) + @pytest.mark.parametrize("k", [None, 1, 4, 10]) + @pytest.mark.parametrize(**_default_metric_class_input_arguments) + def test_class_metric( + self, + ddp: bool, + indexes: Tensor, + preds: Tensor, + target: Tensor, + dist_sync_on_step: bool, + empty_target_action: str, + k: int, + ): + metric_args = {"empty_target_action": empty_target_action, "k": k} + + self.run_class_metric_test( + ddp=ddp, + indexes=indexes, + preds=preds, + target=target, + metric_class=RetrievalPrecision, + sk_metric=_precision_at_k, + dist_sync_on_step=dist_sync_on_step, + metric_args=metric_args, + ) + + @pytest.mark.parametrize(**_default_metric_functional_input_arguments) + @pytest.mark.parametrize("k", [None, 1, 4, 10]) + def test_functional_metric(self, preds: Tensor, target: Tensor, k: int): + self.run_functional_metric_test( + preds=preds, + target=target, + metric_functional=retrieval_precision, + sk_metric=_precision_at_k, + metric_args={}, + k=k, + ) + + @pytest.mark.parametrize(**_default_metric_class_input_arguments) + def test_precision_cpu(self, indexes: Tensor, preds: Tensor, target: Tensor): + self.run_precision_test_cpu( + indexes=indexes, + preds=preds, + target=target, + metric_module=RetrievalPrecision, + metric_functional=retrieval_precision, + ) + + @pytest.mark.parametrize(**_default_metric_class_input_arguments) + def test_precision_gpu(self, indexes: Tensor, preds: Tensor, target: Tensor): + self.run_precision_test_gpu( + indexes=indexes, + preds=preds, + target=target, + metric_module=RetrievalPrecision, + metric_functional=retrieval_precision, + ) + + @pytest.mark.parametrize( + **_concat_tests( + _errors_test_class_metric_parameters_default, + _errors_test_class_metric_parameters_no_pos_target, + _errors_test_class_metric_parameters_k, + ) + ) + def test_arguments_class_metric( + self, indexes: Tensor, preds: Tensor, target: Tensor, message: str, metric_args: dict + ): + self.run_metric_class_arguments_test( + indexes=indexes, + preds=preds, + target=target, + metric_class=RetrievalPrecision, + message=message, + metric_args=metric_args, + exception_type=ValueError, + kwargs_update={}, + ) + + @pytest.mark.parametrize( + **_concat_tests( + _errors_test_functional_metric_parameters_default, + _errors_test_functional_metric_parameters_k, + ) + ) + def test_arguments_functional_metric(self, preds: Tensor, target: Tensor, message: str, metric_args: dict): + self.run_functional_metric_arguments_test( + preds=preds, + target=target, + metric_functional=retrieval_precision, + message=message, + exception_type=ValueError, + kwargs_update=metric_args, + ) diff --git a/RE/paddlemetric/src/tests/retrieval/test_r_precision.py b/RE/paddlemetric/src/tests/retrieval/test_r_precision.py new file mode 100644 index 00000000..e9787482 --- /dev/null +++ b/RE/paddlemetric/src/tests/retrieval/test_r_precision.py @@ -0,0 +1,136 @@ +# Copyright The PyTorch Lightning team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import numpy as np +import pytest +from paddleext.torchapi import Tensor + +from tests.helpers import seed_all +from tests.retrieval.helpers import ( + RetrievalMetricTester, + _concat_tests, + _default_metric_class_input_arguments, + _default_metric_functional_input_arguments, + _errors_test_class_metric_parameters_default, + _errors_test_class_metric_parameters_no_pos_target, + _errors_test_functional_metric_parameters_default, +) +from paddlemetrics.functional.retrieval.r_precision import retrieval_r_precision +from paddlemetrics.retrieval.retrieval_r_precision import RetrievalRPrecision + +seed_all(42) + + +def _r_precision(target: np.ndarray, preds: np.ndarray): + """Didn't find a reliable implementation of R-Precision in Information Retrieval, so, reimplementing here. + + A good explanation can be found + `here _`. + """ + assert target.shape == preds.shape + assert len(target.shape) == 1 # works only with single dimension inputs + + if target.sum() > 0: + order_indexes = np.argsort(preds, axis=0)[::-1] + relevant = np.sum(target[order_indexes][: target.sum()]) + return relevant * 1.0 / target.sum() + return np.NaN + + +class TestRPrecision(RetrievalMetricTester): + @pytest.mark.parametrize("ddp", [True, False]) + @pytest.mark.parametrize("dist_sync_on_step", [True, False]) + @pytest.mark.parametrize("empty_target_action", ["skip", "neg", "pos"]) + @pytest.mark.parametrize(**_default_metric_class_input_arguments) + def test_class_metric( + self, + ddp: bool, + indexes: Tensor, + preds: Tensor, + target: Tensor, + dist_sync_on_step: bool, + empty_target_action: str, + ): + metric_args = {"empty_target_action": empty_target_action} + + self.run_class_metric_test( + ddp=ddp, + indexes=indexes, + preds=preds, + target=target, + metric_class=RetrievalRPrecision, + sk_metric=_r_precision, + dist_sync_on_step=dist_sync_on_step, + metric_args=metric_args, + ) + + @pytest.mark.parametrize(**_default_metric_functional_input_arguments) + def test_functional_metric(self, preds: Tensor, target: Tensor): + self.run_functional_metric_test( + preds=preds, + target=target, + metric_functional=retrieval_r_precision, + sk_metric=_r_precision, + metric_args={}, + ) + + @pytest.mark.parametrize(**_default_metric_class_input_arguments) + def test_precision_cpu(self, indexes: Tensor, preds: Tensor, target: Tensor): + self.run_precision_test_cpu( + indexes=indexes, + preds=preds, + target=target, + metric_module=RetrievalRPrecision, + metric_functional=retrieval_r_precision, + ) + + @pytest.mark.parametrize(**_default_metric_class_input_arguments) + def test_precision_gpu(self, indexes: Tensor, preds: Tensor, target: Tensor): + self.run_precision_test_gpu( + indexes=indexes, + preds=preds, + target=target, + metric_module=RetrievalRPrecision, + metric_functional=retrieval_r_precision, + ) + + @pytest.mark.parametrize( + **_concat_tests( + _errors_test_class_metric_parameters_default, + _errors_test_class_metric_parameters_no_pos_target, + ) + ) + def test_arguments_class_metric( + self, indexes: Tensor, preds: Tensor, target: Tensor, message: str, metric_args: dict + ): + self.run_metric_class_arguments_test( + indexes=indexes, + preds=preds, + target=target, + metric_class=RetrievalRPrecision, + message=message, + metric_args=metric_args, + exception_type=ValueError, + kwargs_update={}, + ) + + @pytest.mark.parametrize(**_errors_test_functional_metric_parameters_default) + def test_arguments_functional_metric(self, preds: Tensor, target: Tensor, message: str, metric_args: dict): + self.run_functional_metric_arguments_test( + preds=preds, + target=target, + metric_functional=retrieval_r_precision, + message=message, + exception_type=ValueError, + kwargs_update=metric_args, + ) diff --git a/RE/paddlemetric/src/tests/retrieval/test_recall.py b/RE/paddlemetric/src/tests/retrieval/test_recall.py new file mode 100644 index 00000000..8f01120b --- /dev/null +++ b/RE/paddlemetric/src/tests/retrieval/test_recall.py @@ -0,0 +1,150 @@ +# Copyright The PyTorch Lightning team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import numpy as np +import pytest +from paddleext.torchapi import Tensor + +from tests.helpers import seed_all +from tests.retrieval.helpers import ( + RetrievalMetricTester, + _concat_tests, + _default_metric_class_input_arguments, + _default_metric_functional_input_arguments, + _errors_test_class_metric_parameters_default, + _errors_test_class_metric_parameters_k, + _errors_test_class_metric_parameters_no_pos_target, + _errors_test_functional_metric_parameters_default, + _errors_test_functional_metric_parameters_k, +) +from paddlemetrics.functional.retrieval.recall import retrieval_recall +from paddlemetrics.retrieval.retrieval_recall import RetrievalRecall + +seed_all(42) + + +def _recall_at_k(target: np.ndarray, preds: np.ndarray, k: int = None): + """Didn't find a reliable implementation of Recall in Information Retrieval, so, reimplementing here. + + See wikipedia for more information about definition. + """ + assert target.shape == preds.shape + assert len(target.shape) == 1 # works only with single dimension inputs + + if k is None: + k = len(preds) + + if target.sum() > 0: + order_indexes = np.argsort(preds, axis=0)[::-1] + relevant = np.sum(target[order_indexes][:k]) + return relevant * 1.0 / target.sum() + return np.NaN + + +class TestRecall(RetrievalMetricTester): + @pytest.mark.parametrize("ddp", [True, False]) + @pytest.mark.parametrize("dist_sync_on_step", [True, False]) + @pytest.mark.parametrize("empty_target_action", ["skip", "neg", "pos"]) + @pytest.mark.parametrize("k", [None, 1, 4, 10]) + @pytest.mark.parametrize(**_default_metric_class_input_arguments) + def test_class_metric( + self, + ddp: bool, + indexes: Tensor, + preds: Tensor, + target: Tensor, + dist_sync_on_step: bool, + empty_target_action: str, + k: int, + ): + metric_args = {"empty_target_action": empty_target_action, "k": k} + + self.run_class_metric_test( + ddp=ddp, + indexes=indexes, + preds=preds, + target=target, + metric_class=RetrievalRecall, + sk_metric=_recall_at_k, + dist_sync_on_step=dist_sync_on_step, + metric_args=metric_args, + ) + + @pytest.mark.parametrize(**_default_metric_functional_input_arguments) + @pytest.mark.parametrize("k", [None, 1, 4, 10]) + def test_functional_metric(self, preds: Tensor, target: Tensor, k: int): + self.run_functional_metric_test( + preds=preds, + target=target, + metric_functional=retrieval_recall, + sk_metric=_recall_at_k, + metric_args={}, + k=k, + ) + + @pytest.mark.parametrize(**_default_metric_class_input_arguments) + def test_precision_cpu(self, indexes: Tensor, preds: Tensor, target: Tensor): + self.run_precision_test_cpu( + indexes=indexes, + preds=preds, + target=target, + metric_module=RetrievalRecall, + metric_functional=retrieval_recall, + ) + + @pytest.mark.parametrize(**_default_metric_class_input_arguments) + def test_precision_gpu(self, indexes: Tensor, preds: Tensor, target: Tensor): + self.run_precision_test_gpu( + indexes=indexes, + preds=preds, + target=target, + metric_module=RetrievalRecall, + metric_functional=retrieval_recall, + ) + + @pytest.mark.parametrize( + **_concat_tests( + _errors_test_class_metric_parameters_default, + _errors_test_class_metric_parameters_no_pos_target, + _errors_test_class_metric_parameters_k, + ) + ) + def test_arguments_class_metric( + self, indexes: Tensor, preds: Tensor, target: Tensor, message: str, metric_args: dict + ): + self.run_metric_class_arguments_test( + indexes=indexes, + preds=preds, + target=target, + metric_class=RetrievalRecall, + message=message, + metric_args=metric_args, + exception_type=ValueError, + kwargs_update={}, + ) + + @pytest.mark.parametrize( + **_concat_tests( + _errors_test_functional_metric_parameters_default, + _errors_test_functional_metric_parameters_k, + ) + ) + def test_arguments_functional_metric(self, preds: Tensor, target: Tensor, message: str, metric_args: dict): + self.run_functional_metric_arguments_test( + preds=preds, + target=target, + metric_functional=retrieval_recall, + message=message, + exception_type=ValueError, + kwargs_update=metric_args, + ) diff --git a/RE/paddlemetric/src/tests/test_utilities.py b/RE/paddlemetric/src/tests/test_utilities.py new file mode 100644 index 00000000..0f7aacd0 --- /dev/null +++ b/RE/paddlemetric/src/tests/test_utilities.py @@ -0,0 +1,21 @@ +# Copyright The PyTorch Lightning team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from paddlemetrics.utilities import rank_zero_debug, rank_zero_info, rank_zero_warn + + +def test_prints(): + rank_zero_debug("DEBUG") + rank_zero_info("INFO") + rank_zero_warn("WARN") diff --git a/RE/paddlemetric/src/tests/text/__init__.py b/RE/paddlemetric/src/tests/text/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/RE/paddlemetric/src/tests/text/helpers.py b/RE/paddlemetric/src/tests/text/helpers.py new file mode 100644 index 00000000..ee896504 --- /dev/null +++ b/RE/paddlemetric/src/tests/text/helpers.py @@ -0,0 +1,479 @@ +# Copyright The PyTorch Lightning team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import pickle +import sys +from enum import Enum, unique +from functools import partial +from typing import Any, Callable, Sequence, Union + +import pytest +import paddleext.torchapi as B +from paddleext.torchapi import Tensor +from B.multiprocessing import set_start_method + +from tests.helpers.testers import MetricTester, _assert_allclose, _assert_requires_grad, _assert_tensor +from paddlemetrics import Metric + +try: + set_start_method("spawn") +except RuntimeError: + pass + + +@unique +class INPUT_ORDER(Enum): + PREDS_FIRST = 1 + TARGETS_FIRST = 2 + + +TEXT_METRIC_INPUT = Union[Sequence[str], Sequence[Sequence[str]], Sequence[Sequence[Sequence[str]]]] +NUM_BATCHES = 2 + + +def _class_test( + rank: int, + worldsize: int, + preds: TEXT_METRIC_INPUT, + targets: TEXT_METRIC_INPUT, + metric_class: Metric, + sk_metric: Callable, + dist_sync_on_step: bool, + metric_args: dict = None, + check_dist_sync_on_step: bool = True, + check_batch: bool = True, + atol: float = 1e-8, + device: str = "cpu", + fragment_kwargs: bool = False, + check_scriptable: bool = True, + input_order: INPUT_ORDER = INPUT_ORDER.PREDS_FIRST, + key: str = None, + **kwargs_update: Any, +): + """Utility function doing the actual comparison between lightning class metric and reference metric. + + Args: + rank: rank of current process + worldsize: number of processes + preds: Sequence of predicted tokens or predicted sentences + targets: Sequence of target tokens or target sentences + metric_class: lightning metric class that should be tested + sk_metric: callable function that is used for comparison + dist_sync_on_step: bool, if true will synchronize metric state across + processes at each ``forward()`` + metric_args: dict with additional arguments used for class initialization + check_dist_sync_on_step: bool, if true will check if the metric is also correctly + calculated per batch per device (and not just at the end) + check_batch: bool, if true will check if the metric is also correctly + calculated across devices for each batch (and not just at the end) + device: determine which device to run on, either 'cuda' or 'cpu' + fragment_kwargs: whether tensors in kwargs should be divided as `preds` and `targets` among processes + input_order: Define the ordering for the preds and targets positional arguments. + key: The key passed onto the `_assert_allclose` to compare the respective metric from the Dict output against + the sk_metric. + kwargs_update: Additional keyword arguments that will be passed with preds and + targets when running update on the metric. + """ + if not metric_args: + metric_args = {} + + # Instanciate lightning metric + metric = metric_class( + compute_on_step=check_dist_sync_on_step or check_batch, dist_sync_on_step=dist_sync_on_step, **metric_args + ) + + # check that the metric is scriptable + if check_scriptable: + B.jit.script(metric) + + # move to device + metric = metric.to(device) + kwargs_update = {k: v.to(device) if isinstance(v, Tensor) else v for k, v in kwargs_update.items()} + + # verify metrics work after being loaded from pickled state + pickled_metric = pickle.dumps(metric) + metric = pickle.loads(pickled_metric) + + for i in range(rank, NUM_BATCHES, worldsize): + batch_kwargs_update = {k: v[i] if isinstance(v, Tensor) else v for k, v in kwargs_update.items()} + + if input_order == INPUT_ORDER.PREDS_FIRST: + batch_result = metric(preds[i], targets[i], **batch_kwargs_update) + elif input_order == INPUT_ORDER.TARGETS_FIRST: + batch_result = metric(targets[i], preds[i], **batch_kwargs_update) + + if metric.dist_sync_on_step and check_dist_sync_on_step and rank == 0: + # Concatenation of Sequence of strings + ddp_preds = type(preds)() + ddp_targets = type(targets)() + for r in range(worldsize): + ddp_preds = ddp_preds + preds[i + r] + ddp_targets = ddp_targets + targets[i + r] + ddp_kwargs_upd = { + k: B.cat([v[i + r] for r in range(worldsize)]).cpu() if isinstance(v, Tensor) else v + for k, v in (kwargs_update if fragment_kwargs else batch_kwargs_update).items() + } + + if input_order == INPUT_ORDER.PREDS_FIRST: + sk_batch_result = sk_metric(ddp_preds, ddp_targets, **ddp_kwargs_upd) + elif input_order == INPUT_ORDER.TARGETS_FIRST: + sk_batch_result = sk_metric(ddp_targets, ddp_preds, **ddp_kwargs_upd) + _assert_allclose(batch_result, sk_batch_result, atol=atol, key=key) + + elif check_batch and not metric.dist_sync_on_step: + batch_kwargs_update = { + k: v.cpu() if isinstance(v, Tensor) else v + for k, v in (batch_kwargs_update if fragment_kwargs else kwargs_update).items() + } + if input_order == INPUT_ORDER.PREDS_FIRST: + sk_batch_result = sk_metric(preds[i], targets[i], **batch_kwargs_update) + elif input_order == INPUT_ORDER.TARGETS_FIRST: + sk_batch_result = sk_metric(targets[i], preds[i], **batch_kwargs_update) + + _assert_allclose(batch_result, sk_batch_result, atol=atol, key=key) + + # check that metrics are hashable + assert hash(metric) + + # check on all batches on all ranks + result = metric.compute() + _assert_tensor(result, key=key) + + # Concatenation of Sequence of strings + total_preds = type(preds)() + total_targets = type(targets)() + for i in range(NUM_BATCHES): + total_preds = total_preds + preds[i] + total_targets = total_targets + targets[i] + total_kwargs_update = { + k: B.cat([v[i] for i in range(NUM_BATCHES)]).cpu() if isinstance(v, Tensor) else v + for k, v in kwargs_update.items() + } + if input_order == INPUT_ORDER.PREDS_FIRST: + sk_result = sk_metric(total_preds, total_targets, **total_kwargs_update) + elif input_order == INPUT_ORDER.TARGETS_FIRST: + sk_result = sk_metric(total_targets, total_preds, **total_kwargs_update) + + # assert after aggregation + _assert_allclose(result, sk_result, atol=atol, key=key) + + +def _functional_test( + preds: TEXT_METRIC_INPUT, + targets: TEXT_METRIC_INPUT, + metric_functional: Callable, + sk_metric: Callable, + metric_args: dict = None, + atol: float = 1e-8, + device: str = "cpu", + fragment_kwargs: bool = False, + input_order: INPUT_ORDER = INPUT_ORDER.PREDS_FIRST, + key: str = None, + **kwargs_update, +): + """Utility function doing the actual comparison between lightning functional metric and reference metric. + + Args: + preds: torch tensor with predictions + targets: torch tensor with targets + metric_functional: lightning metric functional that should be tested + sk_metric: callable function that is used for comparison + metric_args: dict with additional arguments used for class initialization + device: determine which device to run on, either 'cuda' or 'cpu' + fragment_kwargs: whether tensors in kwargs should be divided as `preds` and `targets` among processes + input_order: Define the ordering for the preds and targets positional arguments. + key: The key passed onto the `_assert_allclose` to compare the respective metric from the Dict output against + the sk_metric. + kwargs_update: Additional keyword arguments that will be passed with preds and + targets when running update on the metric. + """ + if not metric_args: + metric_args = {} + + metric = partial(metric_functional, **metric_args) + + # Move to device + kwargs_update = {k: v.to(device) if isinstance(v, Tensor) else v for k, v in kwargs_update.items()} + + for i in range(NUM_BATCHES): + extra_kwargs = {k: v[i] if isinstance(v, Tensor) else v for k, v in kwargs_update.items()} + if input_order == INPUT_ORDER.PREDS_FIRST: + lightning_result = metric(preds[i], targets[i], **extra_kwargs) + elif input_order == INPUT_ORDER.TARGETS_FIRST: + lightning_result = metric(targets[i], preds[i], **extra_kwargs) + + extra_kwargs = { + k: v.cpu() if isinstance(v, Tensor) else v + for k, v in (extra_kwargs if fragment_kwargs else kwargs_update).items() + } + if input_order == INPUT_ORDER.PREDS_FIRST: + sk_result = sk_metric(preds[i], targets[i], **extra_kwargs) + elif input_order == INPUT_ORDER.TARGETS_FIRST: + sk_result = sk_metric(targets[i], preds[i], **extra_kwargs) + + # assert its the same + _assert_allclose(lightning_result, sk_result, atol=atol, key=key) + + +def _assert_half_support( + metric_module: Metric, + metric_functional: Callable, + preds: TEXT_METRIC_INPUT, + targets: TEXT_METRIC_INPUT, + device: str = "cpu", + **kwargs_update, +): + """Test if an metric can be used with half precision tensors. + + Args: + metric_module: the metric module to test + metric_functional: the metric functional to test + preds: torch tensor with predictions + targets: torch tensor with targets + device: determine device, either "cpu" or "cuda" + kwargs_update: Additional keyword arguments that will be passed with preds and + targets when running update on the metric. + """ + y_hat = preds[0] + y = targets[0] + kwargs_update = { + k: (v[0].half() if v.is_floating_point() else v[0]).to(device) if isinstance(v, Tensor) else v + for k, v in kwargs_update.items() + } + metric_module = metric_module.to(device) + _assert_tensor(metric_module(y_hat, y, **kwargs_update)) + _assert_tensor(metric_functional(y_hat, y, **kwargs_update)) + + +class TextTester(MetricTester): + """Class used for efficiently run alot of parametrized tests in ddp mode. Makes sure that ddp is only setup + once and that pool of processes are used for all tests. + + All tests for text metrics should subclass from this and implement a new method called `test_metric_name` where the + method `self.run_metric_test` is called inside. + """ + + def run_functional_metric_test( + self, + preds: TEXT_METRIC_INPUT, + targets: TEXT_METRIC_INPUT, + metric_functional: Callable, + sk_metric: Callable, + metric_args: dict = None, + fragment_kwargs: bool = False, + input_order: INPUT_ORDER = INPUT_ORDER.PREDS_FIRST, + key: str = None, + **kwargs_update, + ): + """Main method that should be used for testing functions. Call this inside testing method. + + Args: + preds: torch tensor with predictions + targets: torch tensor with targets + metric_functional: lightning metric class that should be tested + sk_metric: callable function that is used for comparison + metric_args: dict with additional arguments used for class initialization + fragment_kwargs: whether tensors in kwargs should be divided as `preds` and `targets` among processes + input_order: Define the ordering for the preds and targets positional arguments. + key: The key passed onto the `_assert_allclose` to compare the respective metric from the Dict output + against the sk_metric. + kwargs_update: Additional keyword arguments that will be passed with preds and + targets when running update on the metric. + """ + device = "cuda" if (B.cuda.is_available() and B.cuda.device_count() > 0) else "cpu" + + _functional_test( + preds=preds, + targets=targets, + metric_functional=metric_functional, + sk_metric=sk_metric, + metric_args=metric_args, + atol=self.atol, + device=device, + fragment_kwargs=fragment_kwargs, + input_order=input_order, + key=key, + **kwargs_update, + ) + + def run_class_metric_test( + self, + ddp: bool, + preds: TEXT_METRIC_INPUT, + targets: TEXT_METRIC_INPUT, + metric_class: Metric, + sk_metric: Callable, + dist_sync_on_step: bool, + metric_args: dict = None, + check_dist_sync_on_step: bool = True, + check_batch: bool = True, + fragment_kwargs: bool = False, + check_scriptable: bool = True, + input_order: INPUT_ORDER = INPUT_ORDER.PREDS_FIRST, + key: str = None, + **kwargs_update, + ): + """Main method that should be used for testing class. Call this inside testing methods. + + Args: + ddp: bool, if running in ddp mode or not + preds: torch tensor with predictions + targets: torch tensor with targets + metric_class: lightning metric class that should be tested + sk_metric: callable function that is used for comparison + dist_sync_on_step: bool, if true will synchronize metric state across + processes at each ``forward()`` + metric_args: dict with additional arguments used for class initialization + check_dist_sync_on_step: bool, if true will check if the metric is also correctly + calculated per batch per device (and not just at the end) + check_batch: bool, if true will check if the metric is also correctly + calculated across devices for each batch (and not just at the end) + fragment_kwargs: whether tensors in kwargs should be divided as `preds` and `targets` among processes + input_order: Define the ordering for the preds and targets positional arguments. + key: The key passed onto the `_assert_allclose` to compare the respective metric from the Dict output + against the sk_metric. + kwargs_update: Additional keyword arguments that will be passed with preds and + targets when running update on the metric. + """ + if not metric_args: + metric_args = {} + if ddp: + if sys.platform == "win32": + pytest.skip("DDP not supported on windows") + + self.pool.starmap( + partial( + _class_test, + preds=preds, + targets=targets, + metric_class=metric_class, + sk_metric=sk_metric, + dist_sync_on_step=dist_sync_on_step, + metric_args=metric_args, + check_dist_sync_on_step=check_dist_sync_on_step, + check_batch=check_batch, + atol=self.atol, + fragment_kwargs=fragment_kwargs, + check_scriptable=check_scriptable, + input_order=input_order, + key=key, + **kwargs_update, + ), + [(rank, self.poolSize) for rank in range(self.poolSize)], + ) + else: + device = "cuda" if (B.cuda.is_available() and B.cuda.device_count() > 0) else "cpu" + + _class_test( + rank=0, + worldsize=1, + preds=preds, + targets=targets, + metric_class=metric_class, + sk_metric=sk_metric, + dist_sync_on_step=dist_sync_on_step, + metric_args=metric_args, + check_dist_sync_on_step=check_dist_sync_on_step, + check_batch=check_batch, + atol=self.atol, + device=device, + fragment_kwargs=fragment_kwargs, + check_scriptable=check_scriptable, + input_order=input_order, + key=key, + **kwargs_update, + ) + + @staticmethod + def run_precision_test_cpu( + preds: TEXT_METRIC_INPUT, + targets: TEXT_METRIC_INPUT, + metric_module: Metric, + metric_functional: Callable, + metric_args: dict = None, + **kwargs_update, + ): + """Test if a metric can be used with half precision tensors on cpu + Args: + preds: torch tensor with predictions + targets: torch tensor with targets + metric_module: the metric module to test + metric_functional: the metric functional to test + metric_args: dict with additional arguments used for class initialization + kwargs_update: Additional keyword arguments that will be passed with preds and + targets when running update on the metric. + """ + metric_args = metric_args or {} + _assert_half_support( + metric_module(**metric_args), metric_functional, preds, targets, device="cpu", **kwargs_update + ) + + @staticmethod + def run_precision_test_gpu( + preds: TEXT_METRIC_INPUT, + targets: TEXT_METRIC_INPUT, + metric_module: Metric, + metric_functional: Callable, + metric_args: dict = None, + **kwargs_update, + ): + """Test if a metric can be used with half precision tensors on gpu + Args: + preds: torch tensor with predictions + targets: torch tensor with targets + metric_module: the metric module to test + metric_functional: the metric functional to test + metric_args: dict with additional arguments used for class initialization + kwargs_update: Additional keyword arguments that will be passed with preds and + targets when running update on the metric. + """ + metric_args = metric_args or {} + _assert_half_support( + metric_module(**metric_args), metric_functional, preds, targets, device="cuda", **kwargs_update + ) + + @staticmethod + def run_differentiability_test( + preds: TEXT_METRIC_INPUT, + targets: TEXT_METRIC_INPUT, + metric_module: Metric, + metric_functional: Callable, + metric_args: dict = None, + input_order: INPUT_ORDER = INPUT_ORDER.PREDS_FIRST, + key: str = None, + ): + """Test if a metric is differentiable or not. + + Args: + preds: torch tensor with predictions + targets: torch tensor with targets + metric_module: the metric module to test + metric_args: dict with additional arguments used for class initialization + input_order: Define the ordering for the preds and targets positional arguments. + key: The key passed onto the `_assert_allclose` to compare the respective metric from the Dict output + against the sk_metric. + """ + metric_args = metric_args or {} + # only floating point tensors can require grad + metric = metric_module(**metric_args) + if input_order == INPUT_ORDER.PREDS_FIRST: + out = metric(preds[0], targets[0]) + elif input_order == INPUT_ORDER.TARGETS_FIRST: + out = metric(targets[0], preds[0]) + + # Check if requires_grad matches is_differentiable attribute + _assert_requires_grad(metric, out, key=key) + + if metric.is_differentiable: + # check for numerical correctness + assert B.autograd.gradcheck(partial(metric_functional, **metric_args), (preds[0], targets[0])) diff --git a/RE/paddlemetric/src/tests/text/test_bertscore.py b/RE/paddlemetric/src/tests/text/test_bertscore.py new file mode 100644 index 00000000..68e51568 --- /dev/null +++ b/RE/paddlemetric/src/tests/text/test_bertscore.py @@ -0,0 +1,318 @@ +import os +from typing import Any, Dict, List + +import numpy as np +import pytest +import paddleext.torchapi as B +import paddleext.torchapi as B.distributed as dist +import paddleext.torchapi as B.multiprocessing as mp + +from paddlemetrics.functional import bert_score as metrics_bert_score +from paddlemetrics.text import BERTScore +from paddlemetrics.utilities.imports import _BERTSCORE_AVAILABLE + +if _BERTSCORE_AVAILABLE: + from bert_score import score as original_bert_score + +os.environ["TOKENIZERS_PARALLELISM"] = "1" + +# Examples and expected values taken from: +# https://github.com/Tiiiger/bert_score/blob/master/tests/test_scorer.py +preds = [ + "28-year-old chef found dead in San Francisco mall", + "A 28-year-old chef who recently moved to San Francisco was " + "found dead in the staircase of a local shopping center.", + "The victim's brother said he cannot imagine anyone who would want to harm him,\"Finally, it went uphill again at " + 'him."', +] +refs = [ + "28-Year-Old Chef Found Dead at San Francisco Mall", + "A 28-year-old chef who had recently moved to San Francisco was found dead in the stairwell of a local mall this " + "week.", + "But the victim's brother says he can't think of anyone who would want to hurt him, saying, \"Things were finally " + 'going well for him."', +] + + +_METRICS = ["precision", "recall", "f1"] + +MODEL_NAME = "albert-base-v2" + + +def _assert_list(preds: Any, refs: Any, threshold: float = 1e-8): + """Assert two lists are equal.""" + assert np.allclose(preds, refs, atol=threshold, equal_nan=True) + + +def _parse_original_bert_score(score: B.Tensor) -> Dict[str, List[float]]: + """Parse the BERT score returned by the original `bert-score` package.""" + score_dict = {metric: value.tolist() for metric, value in zip(_METRICS, score)} + return score_dict + + +preds_batched = [preds[0:2], preds[2:]] +refs_batched = [refs[0:2], refs[2:]] + + +@pytest.mark.parametrize( + "preds,refs", + [(preds, refs)], +) +@pytest.mark.skipif(not _BERTSCORE_AVAILABLE, reason="test requires bert_score") +def test_score_fn(preds, refs): + """Tests for functional.""" + original_score = original_bert_score(preds, refs, model_type=MODEL_NAME, num_layers=8, idf=False, batch_size=3) + original_score = _parse_original_bert_score(original_score) + + metrics_score = metrics_bert_score( + preds, refs, model_name_or_path=MODEL_NAME, num_layers=8, idf=False, batch_size=3 + ) + + for metric in _METRICS: + _assert_list(metrics_score[metric], original_score[metric]) + + +@pytest.mark.parametrize( + "preds,refs", + [(preds, refs)], +) +@pytest.mark.skipif(not _BERTSCORE_AVAILABLE, reason="test requires bert_score") +def test_score_fn_with_idf(preds, refs): + """Tests for functional with IDF rescaling.""" + original_score = original_bert_score(preds, refs, model_type=MODEL_NAME, num_layers=12, idf=True, batch_size=3) + original_score = _parse_original_bert_score(original_score) + + metrics_score = metrics_bert_score( + preds, refs, model_name_or_path=MODEL_NAME, num_layers=12, idf=True, batch_size=3 + ) + + for metric in _METRICS: + _assert_list(metrics_score[metric], original_score[metric]) + + +@pytest.mark.parametrize( + "preds,refs", + [(preds, refs)], +) +@pytest.mark.skipif(not _BERTSCORE_AVAILABLE, reason="test requires bert_score") +def test_score_fn_all_layers(preds, refs): + """Tests for functional and all layers.""" + original_score = original_bert_score(preds, refs, model_type=MODEL_NAME, all_layers=True, idf=False, batch_size=3) + original_score = _parse_original_bert_score(original_score) + + metrics_score = metrics_bert_score( + preds, refs, model_name_or_path=MODEL_NAME, all_layers=True, idf=False, batch_size=3 + ) + + for metric in _METRICS: + _assert_list(metrics_score[metric], original_score[metric]) + + +@pytest.mark.parametrize( + "preds,refs", + [(preds, refs)], +) +@pytest.mark.skipif(not _BERTSCORE_AVAILABLE, reason="test requires bert_score") +def test_score_fn_all_layers_with_idf(preds, refs): + """Tests for functional and all layers with IDF rescaling.""" + original_score = original_bert_score(preds, refs, model_type=MODEL_NAME, all_layers=True, idf=True, batch_size=3) + original_score = _parse_original_bert_score(original_score) + + metrics_score = metrics_bert_score( + preds, refs, model_name_or_path=MODEL_NAME, all_layers=True, idf=True, batch_size=3 + ) + + for metric in _METRICS: + _assert_list(metrics_score[metric], original_score[metric]) + + +@pytest.mark.parametrize( + "preds,refs", + [(preds, refs)], +) +@pytest.mark.skipif(not _BERTSCORE_AVAILABLE, reason="test requires bert_score") +def test_score_fn_all_layers_rescale_with_baseline(preds, refs): + """Tests for functional with baseline rescaling.""" + original_score = original_bert_score( + preds, + refs, + model_type=MODEL_NAME, + lang="en", + num_layers=8, + idf=False, + batch_size=3, + rescale_with_baseline=True, + ) + original_score = _parse_original_bert_score(original_score) + + metrics_score = metrics_bert_score( + preds, + refs, + model_name_or_path=MODEL_NAME, + lang="en", + num_layers=8, + idf=False, + batch_size=3, + rescale_with_baseline=True, + ) + + for metric in _METRICS: + _assert_list(metrics_score[metric], original_score[metric]) + + +@pytest.mark.parametrize( + "preds,refs", + [(preds, refs)], +) +@pytest.mark.skipif(not _BERTSCORE_AVAILABLE, reason="test requires bert_score") +def test_score_fn_rescale_with_baseline(preds, refs): + """Tests for functional with baseline rescaling with all layers.""" + original_score = original_bert_score( + preds, + refs, + model_type=MODEL_NAME, + lang="en", + all_layers=True, + idf=False, + batch_size=3, + rescale_with_baseline=True, + ) + original_score = _parse_original_bert_score(original_score) + + metrics_score = metrics_bert_score( + preds, + refs, + model_name_or_path=MODEL_NAME, + lang="en", + all_layers=True, + idf=False, + batch_size=3, + rescale_with_baseline=True, + ) + + for metric in _METRICS: + _assert_list(metrics_score[metric], original_score[metric]) + + +@pytest.mark.parametrize( + "preds,refs", + [(preds, refs)], +) +@pytest.mark.skipif(not _BERTSCORE_AVAILABLE, reason="test requires bert_score") +def test_score(preds, refs): + """Tests for metric.""" + original_score = original_bert_score(preds, refs, model_type=MODEL_NAME, num_layers=8, idf=False, batch_size=3) + original_score = _parse_original_bert_score(original_score) + + Scorer = BERTScore(model_name_or_path=MODEL_NAME, num_layers=8, idf=False, batch_size=3) + Scorer.update(predictions=preds, references=refs) + metrics_score = Scorer.compute() + + for metric in _METRICS: + _assert_list(metrics_score[metric], original_score[metric]) + + +@pytest.mark.parametrize( + "preds,refs", + [(preds, refs)], +) +@pytest.mark.skipif(not _BERTSCORE_AVAILABLE, reason="test requires bert_score") +def test_score_with_idf(preds, refs): + """Tests for metric with IDF rescaling.""" + original_score = original_bert_score(preds, refs, model_type=MODEL_NAME, num_layers=8, idf=True, batch_size=3) + original_score = _parse_original_bert_score(original_score) + + Scorer = BERTScore(model_name_or_path=MODEL_NAME, num_layers=8, idf=True, batch_size=3) + Scorer.update(predictions=preds, references=refs) + metrics_score = Scorer.compute() + + for metric in _METRICS: + _assert_list(metrics_score[metric], original_score[metric]) + + +@pytest.mark.parametrize( + "preds,refs", + [(preds, refs)], +) +@pytest.mark.skipif(not _BERTSCORE_AVAILABLE, reason="test requires bert_score") +def test_score_all_layers(preds, refs): + """Tests for metric and all layers.""" + original_score = original_bert_score(preds, refs, model_type=MODEL_NAME, all_layers=True, idf=False, batch_size=3) + original_score = _parse_original_bert_score(original_score) + + Scorer = BERTScore(model_name_or_path=MODEL_NAME, all_layers=True, idf=False, batch_size=3) + Scorer.update(predictions=preds, references=refs) + metrics_score = Scorer.compute() + + for metric in _METRICS: + _assert_list(metrics_score[metric], original_score[metric]) + + +@pytest.mark.parametrize( + "preds,refs", + [(preds, refs)], +) +@pytest.mark.skipif(not _BERTSCORE_AVAILABLE, reason="test requires bert_score") +def test_score_all_layers_with_idf(preds, refs): + """Tests for metric and all layers with IDF rescaling.""" + original_score = original_bert_score(preds, refs, model_type=MODEL_NAME, all_layers=True, idf=True, batch_size=3) + original_score = _parse_original_bert_score(original_score) + + Scorer = BERTScore(model_name_or_path=MODEL_NAME, all_layers=True, idf=True, batch_size=3) + Scorer.update(predictions=preds, references=refs) + metrics_score = Scorer.compute() + + for metric in _METRICS: + _assert_list(metrics_score[metric], original_score[metric]) + + +@pytest.mark.parametrize( + "preds,refs", + [(preds_batched, refs_batched)], +) +@pytest.mark.skipif(not _BERTSCORE_AVAILABLE, reason="test requires bert_score") +def test_accumulation(preds, refs): + """Tests for metric works with accumulation.""" + original_score = original_bert_score( + sum(preds, []), sum(refs, []), model_type=MODEL_NAME, num_layers=8, idf=False, batch_size=3 + ) + original_score = _parse_original_bert_score(original_score) + + Scorer = BERTScore(model_name_or_path=MODEL_NAME, num_layers=8, idf=False, batch_size=3) + for p, r in zip(preds, refs): + Scorer.update(predictions=p, references=r) + metrics_score = Scorer.compute() + + for metric in _METRICS: + _assert_list(metrics_score[metric], original_score[metric]) + + +def _bert_score_ddp(rank, world_size, preds, refs, original_score): + """Define a DDP process for BERTScore.""" + os.environ["MASTER_ADDR"] = "localhost" + os.environ["MASTER_PORT"] = "12355" + dist.init_process_group("gloo", rank=rank, world_size=world_size) + Scorer = BERTScore(model_name_or_path=MODEL_NAME, num_layers=8, idf=False, batch_size=3, max_length=128) + Scorer.update(preds, refs) + metrics_score = Scorer.compute() + for metric in _METRICS: + _assert_list(metrics_score[metric], original_score[metric]) + dist.destroy_process_group() + + +def _test_score_ddp_fn(rank, world_size, preds, refs): + """Core functionality for the `test_score_ddp` test.""" + original_score = original_bert_score(preds, refs, model_type=MODEL_NAME, num_layers=8, idf=False, batch_size=3) + original_score = _parse_original_bert_score(original_score) + _bert_score_ddp(rank, world_size, preds, refs, original_score) + + +@pytest.mark.parametrize( + "preds,refs", + [(preds, refs)], +) +@pytest.mark.skipif(not (_BERTSCORE_AVAILABLE and dist.is_available()), reason="test requires bert_score") +def test_score_ddp(preds, refs): + """Tests for metric using DDP.""" + world_size = 2 + mp.spawn(_test_score_ddp_fn, args=(world_size, preds, refs), nprocs=world_size, join=False) diff --git a/RE/paddlemetric/src/tests/text/test_bleu.py b/RE/paddlemetric/src/tests/text/test_bleu.py new file mode 100644 index 00000000..168dc760 --- /dev/null +++ b/RE/paddlemetric/src/tests/text/test_bleu.py @@ -0,0 +1,141 @@ +# Copyright The PyTorch Lightning team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from functools import partial + +import pytest +from nltk.translate.bleu_score import SmoothingFunction, corpus_bleu +from paddleext.torchapi import tensor + +from tests.text.helpers import INPUT_ORDER, TextTester +from paddlemetrics.functional.text.bleu import bleu_score +from paddlemetrics.text.bleu import BLEUScore + +# example taken from +# https://www.nltk.org/api/nltk.translate.html?highlight=bleu%20score#nltk.translate.bleu_score.corpus_bleu +# EXAMPLE 1 +HYPOTHESIS_A = tuple( + "It is a guide to action which ensures that the military always obeys the commands of the party".split() +) +REFERENCE_1A = tuple("It is a guide to action that ensures that the military will forever heed Party commands".split()) +REFERENCE_2A = tuple( + "It is a guiding principle which makes the military forces always being under the command of the Party".split() +) +REFERENCE_3A = tuple("It is the practical guide for the army always to heed the directions of the party".split()) + +# EXAMPLE 2 +HYPOTHESIS_B = tuple("he read the book because he was interested in world history".split()) +REFERENCE_1B = tuple("he was interested in world history because he read the book".split()) + +# EXAMPLE 3 +HYPOTHESIS_C = tuple("the cat the cat on the mat".split()) +REFERENCE_1C = tuple("the cat is on the mat".split()) +REFERENCE_2C = tuple("there is a cat on the mat".split()) + +TUPLE_OF_REFERENCES = ( + ((REFERENCE_1A, REFERENCE_2A, REFERENCE_3A), tuple([REFERENCE_1B])), + (tuple([REFERENCE_1B]), (REFERENCE_1C, REFERENCE_2C)), +) +TUPLE_OF_HYPOTHESES = ((HYPOTHESIS_A, HYPOTHESIS_B), (HYPOTHESIS_B, HYPOTHESIS_C)) + +BATCHES = {"preds": TUPLE_OF_HYPOTHESES, "targets": TUPLE_OF_REFERENCES} + +# https://www.nltk.org/api/nltk.translate.html?highlight=bleu%20score#nltk.translate.bleu_score.SmoothingFunction +smooth_func = SmoothingFunction().method2 + + +@pytest.mark.parametrize( + ["weights", "n_gram", "smooth_func", "smooth"], + [ + pytest.param([1], 1, None, False), + pytest.param([0.5, 0.5], 2, smooth_func, True), + pytest.param([0.333333, 0.333333, 0.333333], 3, None, False), + pytest.param([0.25, 0.25, 0.25, 0.25], 4, smooth_func, True), + ], +) +@pytest.mark.parametrize( + ["preds", "targets"], + [ + pytest.param(BATCHES["preds"], BATCHES["targets"]), + ], +) +class TestBLEUScore(TextTester): + @pytest.mark.parametrize("ddp", [False, True]) + @pytest.mark.parametrize("dist_sync_on_step", [False, True]) + def test_bleu_score_class(self, ddp, dist_sync_on_step, preds, targets, weights, n_gram, smooth_func, smooth): + metric_args = {"n_gram": n_gram, "smooth": smooth} + + nltk_metric = partial(corpus_bleu, weights=weights, smoothing_function=smooth_func) + + self.run_class_metric_test( + ddp=ddp, + preds=preds, + targets=targets, + metric_class=BLEUScore, + sk_metric=nltk_metric, + dist_sync_on_step=dist_sync_on_step, + metric_args=metric_args, + input_order=INPUT_ORDER.TARGETS_FIRST, + ) + + def test_bleu_score_functional(self, preds, targets, weights, n_gram, smooth_func, smooth): + metric_args = {"n_gram": n_gram, "smooth": smooth} + nltk_metric = partial(corpus_bleu, weights=weights, smoothing_function=smooth_func) + + self.run_functional_metric_test( + preds, + targets, + metric_functional=bleu_score, + sk_metric=nltk_metric, + metric_args=metric_args, + input_order=INPUT_ORDER.TARGETS_FIRST, + ) + + def test_bleu_score_differentiability(self, preds, targets, weights, n_gram, smooth_func, smooth): + metric_args = {"n_gram": n_gram, "smooth": smooth} + + self.run_differentiability_test( + preds=preds, + targets=targets, + metric_module=BLEUScore, + metric_functional=bleu_score, + metric_args=metric_args, + input_order=INPUT_ORDER.TARGETS_FIRST, + ) + + +def test_bleu_empty_functional(): + hyp = [[]] + ref = [[[]]] + assert bleu_score(ref, hyp) == tensor(0.0) + + +def test_no_4_gram_functional(): + hyps = [["My", "full", "pytorch-lightning"]] + refs = [[["My", "full", "pytorch-lightning", "test"], ["Completely", "Different"]]] + assert bleu_score(refs, hyps) == tensor(0.0) + + +def test_bleu_empty_class(): + bleu = BLEUScore() + hyp = [[]] + ref = [[[]]] + assert bleu(ref, hyp) == tensor(0.0) + + +def test_no_4_gram_class(): + bleu = BLEUScore() + hyps = [["My", "full", "pytorch-lightning"]] + refs = [[["My", "full", "pytorch-lightning", "test"], ["Completely", "Different"]]] + assert bleu(refs, hyps) == tensor(0.0) diff --git a/RE/paddlemetric/src/tests/text/test_rouge.py b/RE/paddlemetric/src/tests/text/test_rouge.py new file mode 100644 index 00000000..4696dcee --- /dev/null +++ b/RE/paddlemetric/src/tests/text/test_rouge.py @@ -0,0 +1,147 @@ +# Copyright The PyTorch Lightning team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from functools import partial +from typing import List + +import pytest + +from tests.text.helpers import INPUT_ORDER, TextTester +from paddlemetrics.functional.text.rouge import rouge_score +from paddlemetrics.text.rouge import ROUGEScore +from paddlemetrics.utilities.imports import _NLTK_AVAILABLE, _ROUGE_SCORE_AVAILABLE + +if _ROUGE_SCORE_AVAILABLE: + from rouge_score.rouge_scorer import RougeScorer + from rouge_score.scoring import BootstrapAggregator +else: + RougeScorer, BootstrapAggregator = object, object + +ROUGE_KEYS = ("rouge1", "rouge2", "rougeL", "rougeLsum") + +SINGLE_SENTENCE_EXAMPLE_PREDS = "The quick brown fox jumps over the lazy dog" +SINGLE_SENTENCE_EXAMPLE_TARGET = "The quick brown dog jumps on the log." + +PREDS = "My name is John" +TARGETS = "Is your name John" + + +BATCHES_1 = { + "preds": [["the cat was under the bed"], ["the cat was found under the bed"]], + "targets": [["the cat was found under the bed"], ["the tiny little cat was found under the big funny bed "]], +} + + +BATCHES_2 = { + "preds": [["The quick brown fox jumps over the lazy dog"], ["My name is John"]], + "targets": [["The quick brown dog jumps on the log."], ["Is your name John"]], +} + + +def _compute_rouge_score(preds: List[str], targets: List[str], use_stemmer: bool, rouge_level: str, metric: str): + if isinstance(preds, str): + preds = [preds] + if isinstance(targets, str): + targets = [targets] + scorer = RougeScorer(ROUGE_KEYS, use_stemmer=use_stemmer) + aggregator = BootstrapAggregator() + for pred, target in zip(preds, targets): + aggregator.add_scores(scorer.score(target, pred)) + rs_scores = aggregator.aggregate() + rs_result = getattr(rs_scores[rouge_level].mid, metric) + return rs_result + + +@pytest.mark.skipif(not _NLTK_AVAILABLE, reason="test requires nltk") +@pytest.mark.parametrize( + ["pl_rouge_metric_key", "use_stemmer"], + [ + pytest.param("rouge1_precision", True), + pytest.param("rouge1_recall", True), + pytest.param("rouge1_fmeasure", False), + pytest.param("rouge2_precision", False), + pytest.param("rouge2_recall", True), + pytest.param("rouge2_fmeasure", True), + pytest.param("rougeL_precision", False), + pytest.param("rougeL_recall", False), + pytest.param("rougeL_fmeasure", True), + pytest.param("rougeLsum_precision", True), + pytest.param("rougeLsum_recall", False), + pytest.param("rougeLsum_fmeasure", False), + ], +) +@pytest.mark.parametrize( + ["preds", "targets"], + [ + pytest.param(BATCHES_1["preds"], BATCHES_1["targets"]), + pytest.param(BATCHES_2["preds"], BATCHES_2["targets"]), + ], +) +class TestROUGEScore(TextTester): + @pytest.mark.parametrize("ddp", [False, True]) + @pytest.mark.parametrize("dist_sync_on_step", [False, True]) + def test_rouge_score_class(self, ddp, dist_sync_on_step, preds, targets, pl_rouge_metric_key, use_stemmer): + metric_args = {"use_stemmer": use_stemmer} + + rouge_level, metric = pl_rouge_metric_key.split("_") + rouge_metric = partial(_compute_rouge_score, use_stemmer=use_stemmer, rouge_level=rouge_level, metric=metric) + + self.run_class_metric_test( + ddp=ddp, + preds=preds, + targets=targets, + metric_class=ROUGEScore, + sk_metric=rouge_metric, + dist_sync_on_step=dist_sync_on_step, + metric_args=metric_args, + input_order=INPUT_ORDER.PREDS_FIRST, + key=pl_rouge_metric_key, + ) + + def test_rouge_score_functional(self, preds, targets, pl_rouge_metric_key, use_stemmer): + metric_args = {"use_stemmer": use_stemmer} + + rouge_level, metric = pl_rouge_metric_key.split("_") + rouge_metric = partial(_compute_rouge_score, use_stemmer=use_stemmer, rouge_level=rouge_level, metric=metric) + + self.run_functional_metric_test( + preds, + targets, + metric_functional=rouge_score, + sk_metric=rouge_metric, + metric_args=metric_args, + input_order=INPUT_ORDER.PREDS_FIRST, + key=pl_rouge_metric_key, + ) + + +def test_rouge_metric_raises_errors_and_warnings(): + """Test that expected warnings and errors are raised.""" + if not _NLTK_AVAILABLE: + with pytest.raises( + ValueError, + match="ROUGE metric requires that nltk is installed." + "Either as `pip install paddlemetrics[text]` or `pip install nltk`", + ): + ROUGEScore() + + +def test_rouge_metric_wrong_key_value_error(): + key = ("rouge1", "rouge") + + with pytest.raises(ValueError): + ROUGEScore(rouge_keys=key) + + with pytest.raises(ValueError): + rouge_score(PREDS, TARGETS, rouge_keys=key) diff --git a/RE/paddlemetric/src/tests/text/test_sacre_bleu.py b/RE/paddlemetric/src/tests/text/test_sacre_bleu.py new file mode 100644 index 00000000..289a7d70 --- /dev/null +++ b/RE/paddlemetric/src/tests/text/test_sacre_bleu.py @@ -0,0 +1,73 @@ +# Copyright The PyTorch Lightning team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import pytest +import paddleext.torchapi as B + +from tests.text.helpers import TextTester +from paddlemetrics.functional.text.sacre_bleu import sacre_bleu_score +from paddlemetrics.text.sacre_bleu import SacreBLEUScore +from paddlemetrics.utilities.imports import _SACREBLEU_AVAILABLE + +if _SACREBLEU_AVAILABLE: + from sacrebleu.metrics import BLEU + +# example taken from https://github.com/mjpost/sacrebleu +REFERENCES = ( + # First set of references + ("The dog bit the man.", "It was not unexpected.", "The man bit him first."), + # Second set of references + ("The dog had bit the man.", "No one was surprised.", "The man had bitten the dog."), +) + +HYPOTHESES = ("The dog bit the man.", "It wasn't surprising.", "The man had just bitten him.") + +TOKENIZERS = ("none", "13a", "zh", "intl", "char") + +ROUND_N_DIGITS = 4 + + +def metrics_score_fn(targets, preds, tokenize): + metrics_score = sacre_bleu_score(targets, preds, tokenize=tokenize) + # rescale to 0-100 and round to 4 decimals to match blue + metrics_score_normed = B.round(100 * metrics_score * 10 ** ROUND_N_DIGITS) / 10 ** ROUND_N_DIGITS + return metrics_score_normed + + +@pytest.mark.parametrize( + ["preds", "targets"], + [ + (HYPOTHESES, REFERENCES), + ], +) +@pytest.mark.parametrize("tokenize", TOKENIZERS) +@pytest.mark.skipif(not _SACREBLEU_AVAILABLE, reason="test requires sacrebleu") +class TestSacreBLEUScore(TextTester): + def test_sacrebleu_score_functional(self, preds, targets, tokenize): + sacrebleu_metrics = BLEU(tokenize=tokenize) + original_score = B.tensor(round(sacrebleu_metrics.corpus_score(preds, targets).score, ROUND_N_DIGITS)) + + metrics_targets = [[ref[i] for ref in targets] for i in range(len(targets[0]))] + metrics_score = metrics_score_fn(metrics_targets, preds, tokenize) + assert metrics_score == original_score + + def test_sacrebleu_score_metrics(self, preds, targets, tokenize): + sacrebleu_metrics = BLEU(tokenize=tokenize) + original_score = B.tensor(round(sacrebleu_metrics.corpus_score(preds, targets).score, ROUND_N_DIGITS)) + + metrics_targets = [[ref[i] for ref in targets] for i in range(len(targets[0]))] + tm_metrics = SacreBLEUScore(tokenize=tokenize) + tm_metrics.update(metrics_targets, preds) + metrics_score = metrics_score_fn(metrics_targets, preds, tokenize) + assert metrics_score == original_score diff --git a/RE/paddlemetric/src/tests/text/test_wer.py b/RE/paddlemetric/src/tests/text/test_wer.py new file mode 100644 index 00000000..65a7ca8b --- /dev/null +++ b/RE/paddlemetric/src/tests/text/test_wer.py @@ -0,0 +1,75 @@ +from typing import Callable, List, Union + +import pytest + +from tests.text.helpers import INPUT_ORDER, TextTester +from paddlemetrics.utilities.imports import _JIWER_AVAILABLE + +if _JIWER_AVAILABLE: + from jiwer import compute_measures +else: + compute_measures = Callable + +from paddlemetrics.functional.text.wer import wer +from paddlemetrics.text.wer import WER + +BATCHES_1 = {"preds": [["hello world"], ["what a day"]], "targets": [["hello world"], ["what a wonderful day"]]} + +BATCHES_2 = { + "preds": [ + ["i like python", "what you mean or swallow"], + ["hello duck", "i like python"], + ], + "targets": [ + ["i like monthy python", "what do you mean, african or european swallow"], + ["hello world", "i like monthy python"], + ], +} + + +def _compute_wer_metric_jiwer(prediction: Union[str, List[str]], reference: Union[str, List[str]]): + return compute_measures(reference, prediction)["wer"] + + +@pytest.mark.skipif(not _JIWER_AVAILABLE, reason="test requires jiwer") +@pytest.mark.parametrize( + ["preds", "targets"], + [ + pytest.param(BATCHES_1["preds"], BATCHES_1["targets"]), + pytest.param(BATCHES_2["preds"], BATCHES_2["targets"]), + ], +) +class TestWER(TextTester): + @pytest.mark.parametrize("ddp", [False, True]) + @pytest.mark.parametrize("dist_sync_on_step", [False, True]) + def test_wer_class(self, ddp, dist_sync_on_step, preds, targets): + + self.run_class_metric_test( + ddp=ddp, + preds=preds, + targets=targets, + metric_class=WER, + sk_metric=_compute_wer_metric_jiwer, + dist_sync_on_step=dist_sync_on_step, + input_order=INPUT_ORDER.PREDS_FIRST, + ) + + def test_wer_functional(self, preds, targets): + + self.run_functional_metric_test( + preds, + targets, + metric_functional=wer, + sk_metric=_compute_wer_metric_jiwer, + input_order=INPUT_ORDER.PREDS_FIRST, + ) + + def test_wer_differentiability(self, preds, targets): + + self.run_differentiability_test( + preds=preds, + targets=targets, + metric_module=WER, + metric_functional=wer, + input_order=INPUT_ORDER.PREDS_FIRST, + ) diff --git a/RE/paddlemetric/src/tests/wrappers/__init__.py b/RE/paddlemetric/src/tests/wrappers/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/RE/paddlemetric/src/tests/wrappers/test_bootstrapping.py b/RE/paddlemetric/src/tests/wrappers/test_bootstrapping.py new file mode 100644 index 00000000..ec74c4bf --- /dev/null +++ b/RE/paddlemetric/src/tests/wrappers/test_bootstrapping.py @@ -0,0 +1,123 @@ +# Copyright The PyTorch Lightning team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import operator +from functools import partial + +import numpy as np +import pytest +import paddleext.torchapi as B +from sklearn.metrics import mean_squared_error, precision_score, recall_score +from paddleext.torchapi import Tensor + +from tests.helpers import seed_all +from paddlemetrics import MeanSquaredError, Precision, Recall +from paddlemetrics.utilities import apply_to_collection +from paddlemetrics.utilities.imports import _TORCH_GREATER_EQUAL_1_7 +from paddlemetrics.wrappers.bootstrapping import BootStrapper, _bootstrap_sampler + +seed_all(42) + +_preds = B.randint(10, (10, 32)) +_target = B.randint(10, (10, 32)) + + +class TestBootStrapper(BootStrapper): + """For testing purpose, we subclass the bootstrapper class so we can get the exact permutation the class is + creating.""" + + def update(self, *args) -> None: + self.out = [] + for idx in range(self.num_bootstraps): + size = len(args[0]) + sample_idx = _bootstrap_sampler(size, sampling_strategy=self.sampling_strategy).to(self.device) + new_args = apply_to_collection(args, Tensor, B.index_select, dim=0, index=sample_idx) + self.metrics[idx].update(*new_args) + self.out.append(new_args) + + +def _sample_checker(old_samples, new_samples, op: operator, threshold: int): + found_one = False + for os in old_samples: + cond = op(os, new_samples) + if cond.sum() > threshold: + found_one = True + break + return found_one + + +@pytest.mark.parametrize("sampling_strategy", ["poisson", "multinomial"]) +def test_bootstrap_sampler(sampling_strategy): + """make sure that the bootstrap sampler works as intended.""" + old_samples = B.randn(20, 2) + + # make sure that the new samples are only made up of old samples + idx = _bootstrap_sampler(20, sampling_strategy=sampling_strategy) + new_samples = old_samples[idx] + for ns in new_samples: + assert ns in old_samples + + found_one = _sample_checker(old_samples, new_samples, operator.eq, 2) + assert found_one, "resampling did not work because no samples were sampled twice" + + found_zero = _sample_checker(old_samples, new_samples, operator.ne, 0) + assert found_zero, "resampling did not work because all samples were atleast sampled once" + + +@pytest.mark.parametrize("device", ["cpu", "cuda"]) +@pytest.mark.parametrize("sampling_strategy", ["poisson", "multinomial"]) +@pytest.mark.parametrize( + "metric, sk_metric", + [ + [Precision(average="micro"), partial(precision_score, average="micro")], + [Recall(average="micro"), partial(recall_score, average="micro")], + [MeanSquaredError(), mean_squared_error], + ], +) +def test_bootstrap(device, sampling_strategy, metric, sk_metric): + """Test that the different bootstraps gets updated as we expected and that the compute method works.""" + if device == "cuda" and not B.cuda.is_available(): + pytest.skip("Test with device='cuda' requires gpu") + + _kwargs = {"base_metric": metric, "mean": True, "std": True, "raw": True, "sampling_strategy": sampling_strategy} + if _TORCH_GREATER_EQUAL_1_7: + _kwargs.update(dict(quantile=B.tensor([0.05, 0.95], device=device))) + + bootstrapper = TestBootStrapper(**_kwargs) + bootstrapper.to(device) + + collected_preds = [[] for _ in range(10)] + collected_target = [[] for _ in range(10)] + for p, t in zip(_preds, _target): + p, t = p.to(device), t.to(device) + bootstrapper.update(p, t) + + for i, o in enumerate(bootstrapper.out): + + collected_preds[i].append(o[0]) + collected_target[i].append(o[1]) + + collected_preds = [B.cat(cp).cpu() for cp in collected_preds] + collected_target = [B.cat(ct).cpu() for ct in collected_target] + + sk_scores = [sk_metric(ct, cp) for ct, cp in zip(collected_target, collected_preds)] + + output = bootstrapper.compute() + # quantile only avaible for pytorch v1.7 and forward + if _TORCH_GREATER_EQUAL_1_7: + assert np.allclose(output["quantile"][0].cpu(), np.quantile(sk_scores, 0.05)) + assert np.allclose(output["quantile"][1].cpu(), np.quantile(sk_scores, 0.95)) + + assert np.allclose(output["mean"].cpu(), np.mean(sk_scores)) + assert np.allclose(output["std"].cpu(), np.std(sk_scores, ddof=1)) + assert np.allclose(output["raw"].cpu(), sk_scores) diff --git a/RE/paddlemetric/src/tests/wrappers/test_multioutput.py b/RE/paddlemetric/src/tests/wrappers/test_multioutput.py new file mode 100644 index 00000000..421dd722 --- /dev/null +++ b/RE/paddlemetric/src/tests/wrappers/test_multioutput.py @@ -0,0 +1,142 @@ +from collections import namedtuple +from functools import partial +from typing import Any, Callable, Optional + +import pytest +import paddleext.torchapi as B +from sklearn.metrics import accuracy_score +from sklearn.metrics import r2_score as sk_r2score + +from tests.helpers import seed_all +from tests.helpers.testers import BATCH_SIZE, NUM_BATCHES, NUM_CLASSES, MetricTester +from paddlemetrics import Metric +from paddlemetrics.classification import Accuracy +from paddlemetrics.regression import R2Score +from paddlemetrics.wrappers.multioutput import MultioutputWrapper + +seed_all(42) + + +class _MultioutputMetric(Metric): + """Test class that allows passing base metric as a class rather than its instantiation to the wrapper.""" + + def __init__( + self, + base_metric_class, + num_outputs: int = 1, + compute_on_step: bool = True, + dist_sync_on_step: bool = False, + process_group: Any = None, + dist_sync_fn: Optional[Callable] = None, + **base_metric_kwargs, + ) -> None: + super().__init__( + compute_on_step=compute_on_step, + dist_sync_on_step=dist_sync_on_step, + process_group=process_group, + dist_sync_fn=dist_sync_fn, + ) + self.metric = MultioutputWrapper( + base_metric_class( + compute_on_step=compute_on_step, + dist_sync_on_step=dist_sync_on_step, + process_group=process_group, + dist_sync_fn=dist_sync_fn, + **base_metric_kwargs, + ), + num_outputs=num_outputs, + compute_on_step=compute_on_step, + dist_sync_on_step=dist_sync_on_step, + dist_sync_fn=dist_sync_fn, + ) + + def update(self, preds: B.Tensor, target: B.Tensor) -> None: + """Update the each pair of outputs and predictions.""" + return self.metric.update(preds, target) + + def compute(self) -> B.Tensor: + """Compute the R2 score between each pair of outputs and predictions.""" + return self.metric.compute() + + @B.jit.unused + def forward(self, *args, **kwargs): + """Run forward on the underlying metric.""" + return self.metric(*args, **kwargs) + + def reset(self) -> None: + """Reset the underlying metric state.""" + self.metric.reset() + + +num_targets = 2 + +Input = namedtuple("Input", ["preds", "target"]) + +_multi_target_regression_inputs = Input( + preds=B.rand(NUM_BATCHES, BATCH_SIZE, num_targets), + target=B.rand(NUM_BATCHES, BATCH_SIZE, num_targets), +) +_multi_target_classification_inputs = Input( + preds=B.rand(NUM_BATCHES, BATCH_SIZE, NUM_CLASSES, num_targets), + target=B.randint(NUM_CLASSES, (NUM_BATCHES, BATCH_SIZE, num_targets)), +) + + +def _multi_target_sk_r2score(preds, target, adjusted=0, multioutput="raw_values"): + """Compute R2 score over multiple outputs.""" + sk_preds = preds.view(-1, num_targets).numpy() + sk_target = target.view(-1, num_targets).numpy() + r2_score = sk_r2score(sk_target, sk_preds, multioutput=multioutput) + if adjusted != 0: + r2_score = 1 - (1 - r2_score) * (sk_preds.shape[0] - 1) / (sk_preds.shape[0] - adjusted - 1) + return r2_score + + +def _multi_target_sk_accuracy(preds, target, num_outputs): + """Compute accuracy over multiple outputs.""" + accs = [] + for i in range(num_outputs): + accs.append(accuracy_score(B.argmax(preds[:, :, i], dim=1), target[:, i])) + return accs + + +@pytest.mark.parametrize( + "base_metric_class, compare_metric, preds, target, num_outputs, metric_kwargs", + [ + ( + R2Score, + _multi_target_sk_r2score, + _multi_target_regression_inputs.preds, + _multi_target_regression_inputs.target, + num_targets, + {}, + ), + ( + Accuracy, + partial(_multi_target_sk_accuracy, num_outputs=2), + _multi_target_classification_inputs.preds, + _multi_target_classification_inputs.target, + num_targets, + dict(num_classes=NUM_CLASSES), + ), + ], +) +class TestMultioutputWrapper(MetricTester): + """Test the MultioutputWrapper class with regression and classification inner metrics.""" + + @pytest.mark.parametrize("ddp", [True, False]) + @pytest.mark.parametrize("dist_sync_on_step", [True, False]) + def test_multioutput_wrapper( + self, base_metric_class, compare_metric, preds, target, num_outputs, metric_kwargs, ddp, dist_sync_on_step + ): + """Test that the multioutput wrapper properly slices and computes outputs along the output dimension for + both classification and regression metrics.""" + self.run_class_metric_test( + ddp, + preds, + target, + _MultioutputMetric, + compare_metric, + dist_sync_on_step, + metric_args=dict(num_outputs=num_outputs, base_metric_class=base_metric_class, **metric_kwargs), + ) diff --git a/RE/paddlemetric/src/tests/wrappers/test_tracker.py b/RE/paddlemetric/src/tests/wrappers/test_tracker.py new file mode 100644 index 00000000..07a94eea --- /dev/null +++ b/RE/paddlemetric/src/tests/wrappers/test_tracker.py @@ -0,0 +1,76 @@ +# Copyright The PyTorch Lightning team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from functools import partial + +import pytest +import paddleext.torchapi as B + +from tests.helpers import seed_all +from paddlemetrics import Accuracy, MeanAbsoluteError, MeanSquaredError, Precision, Recall +from paddlemetrics.wrappers import MetricTracker + +seed_all(42) + + +def test_raises_error_on_wrong_input(): + with pytest.raises(TypeError, match="metric arg need to be an instance of a paddlemetrics metric .*"): + MetricTracker([1, 2, 3]) + + +@pytest.mark.parametrize( + "method, method_input", + [ + ("update", (B.randint(10, (50,)), B.randint(10, (50,)))), + ("forward", (B.randint(10, (50,)), B.randint(10, (50,)))), + ("compute", None), + ], +) +def test_raises_error_if_increment_not_called(method, method_input): + tracker = MetricTracker(Accuracy(num_classes=10)) + with pytest.raises(ValueError, match=f"`{method}` cannot be called before .*"): + if method_input is not None: + getattr(tracker, method)(*method_input) + else: + getattr(tracker, method)() + + +@pytest.mark.parametrize( + "base_metric, metric_input, maximize", + [ + (partial(Accuracy, num_classes=10), (B.randint(10, (50,)), B.randint(10, (50,))), True), + (partial(Precision, num_classes=10), (B.randint(10, (50,)), B.randint(10, (50,))), True), + (partial(Recall, num_classes=10), (B.randint(10, (50,)), B.randint(10, (50,))), True), + (MeanSquaredError, (B.randn(50), B.randn(50)), False), + (MeanAbsoluteError, (B.randn(50), B.randn(50)), False), + ], +) +def test_tracker(base_metric, metric_input, maximize): + tracker = MetricTracker(base_metric(), maximize=maximize) + for i in range(5): + tracker.increment() + # check both update and forward works + for _ in range(5): + tracker.update(*metric_input) + for _ in range(5): + tracker(*metric_input) + + val = tracker.compute() + assert val != 0.0 + assert tracker.n_steps == i + 1 + + assert tracker.n_steps == 5 + assert tracker.compute_all().shape[0] == 5 + val, idx = tracker.best_metric(return_step=True) + assert val != 0.0 + assert idx in list(range(5)) diff --git a/README.md b/README.md index b2b936ee..30fcc4c1 100644 --- a/README.md +++ b/README.md @@ -7,6 +7,7 @@ * [自然语言处理(Natrual Language Processing)](#自然语言处理) * [知识图谱(Knowledge Graph)](#知识图谱) * [时空数据挖掘(Spatial-Temporal Data-Mining)](#时空数据挖掘) +* [研发效率(Research Efficiency)](#研发效率) ## 计算机视觉 | 任务类型 | 目录 | 简介 | 论文链接 | @@ -68,6 +69,11 @@ | 兴趣点生成 |[P3AC](ST_DM/KDD2020-P3AC)| 具备个性化的前缀嵌入的POI自动生成。 | - | | 区域生成 |[P3AC](ST_DM/GenRegion)| 基于路网进行区域划分的方法, 实现对特定区域基于路网的全划分,区域之间无交叠,无空隙,算法支持对全球的区域划分。| - | +## 研发效率 +| 软件名称 | 目录 | 简介 | +|--------------|------------------------|-------------------------------------------------| +| paddleext | [paddleext](RE/paddleext) | paddle的扩展功能插件,可以让部分pytorch code 无缝运行在paddle平台上。 | +| paddlemetric | [paddlemetric](RE/paddlemetric) | torchmetric 的 paddle迁移版本,目前支持分类测度。 | ## 许可证书 此向导由[PaddlePaddle](https://github.com/PaddlePaddle/Paddle)贡献,受[Apache-2.0 license](LICENSE)许可认证。