diff --git a/CC/README.md b/CC/README.md
new file mode 100644
index 00000000..242d07df
--- /dev/null
+++ b/CC/README.md
@@ -0,0 +1,2 @@
+# Cognitive Computing
+
diff --git a/RE/README.md b/RE/README.md
new file mode 100644
index 00000000..b4966c38
--- /dev/null
+++ b/RE/README.md
@@ -0,0 +1,4 @@
+# Research Efficiency
+
+
+
diff --git a/RE/paddleext/CHANGELOG.md b/RE/paddleext/CHANGELOG.md
new file mode 100644
index 00000000..9aba09d6
--- /dev/null
+++ b/RE/paddleext/CHANGELOG.md
@@ -0,0 +1,21 @@
+Changelog
+===
+以下记录了项目中所有值得关注的变更内容，其格式基于[Keep a Changelog]。
+
+本项目版本遵守[Semantic Versioning]和[PEP-440]。
+
+## [v1.0]- 2022-07-04
+---
+### Added
+- Support the testing of some classification modules for paddlemetric
+### Changed
+
+
+
+
+
+[v1.0]: https://console.cloud.baidu-int.com/devops/icode/repos/baidu/ccl/torch2paddle/commits/7476c4f8477d6161f8d5aaaf78f47d6bee990d42
+
+[Keep a Changelog]: https://keepachangelog.com/zh-CN/1.0.0/
+[Semantic Versioning]: https://semver.org/lang/zh-CN/
+[PEP-440]: https://www.python.org/dev/peps/pep-0440/
diff --git a/RE/paddleext/README.md b/RE/paddleext/README.md
new file mode 100644
index 00000000..03a9162c
--- /dev/null
+++ b/RE/paddleext/README.md
@@ -0,0 +1,103 @@
+# Paddle Extension
+
+Paddle extensions, including implementation for torch apis. 
+
+## Install 
+
+* Clone the repo
+* Add the path of paddleext folder to PYTHONPATH
+
+## Document
+
+### Seameless shift backend between Paddle and PyTorch
+
+* Add following code to the root __init__.py of your project
+(assume your project name is PROJECT):
+
+```python
+
+import importlib
+import sys
+import os 
+
+BACKEND = os.environ.get('BACKEND', 'paddle')
+
+if BACKEND == "paddle":
+ 
+    from paddleext import torchapi
+    sys.modules["PROJECT.backend"] = torchapi
+
+    try:
+        import paddlemetrics
+        sys.modules["PROJECT.metrics"] = paddlemetrics
+    except Exception as e:
+        pass
+
+elif BACKEND == "torch":
+    try:
+        import torch
+        import types
+
+        class VirtualModule(types.ModuleType):
+            def __init__(self, module_name, sub_modules):
+
+                super().__init__(module_name)
+                try:
+                    import sys
+                    sys.modules[module_name] = self
+                    self._module_name = module_name
+                    self._sub_modules = sub_modules
+                    for sub_name, module in sub_modules.items():
+                        if sub_name is None:
+                            sys.modules[f"{module_name}"] = module
+                        else:
+                            sys.modules[f"{module_name}.{sub_name}"] = module
+                except ImportError as err:
+                    raise err  # please signal error in some useful way :-)
+
+            def __repr__(self):
+                return "Virtual module for " + self._module_name
+
+            def __getattr__(self, attrname):
+
+                if attrname in self._sub_modules.keys():
+                    import sys
+                    return self._sub_modules[attrname]
+                else:
+                    return super().__getattr__(attrname)
+
+
+        import pkgutil
+
+        sub_modules = {None: torch}
+        for module_info in pkgutil.iter_modules(torch.__path__):
+            if not module_info.name.startswith("_"):
+                try:
+                    module = importlib.import_module("torch." + module_info.name)
+                    sub_modules[module_info.name] = module
+                except:
+                    pass
+
+        VirtualModule("PROJECT.backend", sub_modules)
+
+
+    except Exception as e:
+        raise e
+
+    try:
+        import torchmetrics
+
+        sys.modules["PROJECT.metrics"] = torchmetrics
+    except Exception as e:
+        pass
+
+```
+* set the environment variable BACKEND to "paddle" or "torch" to switch backend
+* import the backend module in your code
+
+```python
+import PROJECT.backend as B
+from PROJECT.backend import nn
+import PROJECT.metrics as M
+```
+* replace all "torch." or "paddle." with "B." in your code
\ No newline at end of file
diff --git a/RE/paddleext/__init__.py b/RE/paddleext/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/RE/paddleext/paddleext/__init__.py b/RE/paddleext/paddleext/__init__.py
new file mode 100644
index 00000000..7fd01739
--- /dev/null
+++ b/RE/paddleext/paddleext/__init__.py
@@ -0,0 +1,3 @@
+
+
+from . import torchapi
\ No newline at end of file
diff --git a/RE/paddleext/paddleext/torchapi/__init__.py b/RE/paddleext/paddleext/torchapi/__init__.py
new file mode 100644
index 00000000..7ba3f89a
--- /dev/null
+++ b/RE/paddleext/paddleext/torchapi/__init__.py
@@ -0,0 +1,74 @@
+import inspect
+
+from .core import *
+from .tensor_ import *
+from .functional import *
+from . import sampler
+from . import data
+from . import nn
+from . import distributed
+from . import cuda
+from . import optim
+
+#from . import paddle_func
+
+this_module = sys.modules[__name__]
+
+
+def get_module_attribute(module, *args, **kwargs):
+    # Perform custom logic here
+
+    obj = object.__getattribute__(module, *args, **kwargs)
+
+    print("input module:", module)
+    print("result object", obj)
+    if isinstance(obj, types.FunctionType):
+        if not obj.__module__.startswith("paddleext.torchapi."):
+            return partial(paddle_delegate_func, obj)
+        else:
+            return obj
+    elif isinstance(obj, types.ModuleType):
+        print("result module: " + obj.__name__)
+        return ModuleDelegate(obj)
+    elif inspect.isclass(obj):
+        print("result class: " + obj.__name__)
+        return obj
+    else:
+        return obj
+
+class ModuleDelegate(object):
+    def __init__(self, module):
+        self.module = module
+
+    def __getattribute__(self, *args, **kwargs):
+
+        module = object.__getattribute__(self, "module")
+        result = object.__getattribute__(module, *args, **kwargs)
+        if isinstance(result, types.ModuleType):
+            return ModuleDelegate(result)
+        elif isinstance(result, types.FunctionType):
+            if not result.__module__.startswith("paddleext.torchapi."):
+                return partial(paddle_delegate_func, result)
+            else:
+                return result
+        elif inspect.isclass(result):
+            if result.__module__.startswith("paddle."):
+                return make_delegate_class(result)
+            else:
+                return result
+        else:
+            return result
+
+
+    # def __getattr__(self, *args, **kwargs):
+    #     return get_module_attribute(self.module, *args, **kwargs),
+
+    # def __delattr__(self, *args, **kwargs):
+    #     return object.__delattr__(self.module, *args, **kwargs)
+    #
+    # def __dir__(self):
+    #     return dir(self.module)
+
+
+
+sys.modules[__name__] = ModuleDelegate(sys.modules[__name__])
diff --git a/RE/paddleext/paddleext/torchapi/core.py b/RE/paddleext/paddleext/torchapi/core.py
new file mode 100644
index 00000000..f7f1db0a
--- /dev/null
+++ b/RE/paddleext/paddleext/torchapi/core.py
@@ -0,0 +1,115 @@
+"""
+paddle core
+"""
+import sys
+import types
+from functools import partial
+from types import MethodType
+from typing import Any
+
+import paddle
+import random
+import numpy as np
+
+Module = paddle.nn.Layer
+ModuleBase = paddle.nn.Layer
+ModuleDict = paddle.nn.LayerDict
+ModuleList = paddle.nn.LayerList
+device=str
+
+dtype=paddle.dtype
+
+def load_state_dict(module: Module, state_dict, *args, **kwargs):
+    module.set_state_dict(state_dict, *args, **kwargs)
+
+
+Module.load_state_dict = load_state_dict
+
+from paddle import *
+
+def deterministic(seed=0):
+    seed = 0
+    random.seed(seed)
+    paddle.seed(seed)
+    np.random.seed(seed)
+
+
+import paddle
+
+from paddle import bool, int32, int64, int8, float32, float64, float16
+
+long = paddle.int64
+int = paddle.int32
+float = paddle.float32
+double = paddle.float64
+
+
+def platform():
+    """
+
+    Returns:
+
+    """
+
+    return "paddle"
+
+
+
+from paddle import no_grad, autograd
+
+class set_detect_anomaly(object):
+    r"""Context-manager that sets the anomaly detection for the autograd engine on or off.
+    ``set_detect_anomaly`` will enable or disable the autograd anomaly detection
+    based on its argument :attr:`mode`.
+    It can be used as a context-manager or as a function.
+    See ``detect_anomaly`` above for details of the anomaly detection behaviour.
+    Args:
+        mode (bool): Flag whether to enable anomaly detection (``True``),
+                     or disable (``False``).
+    """
+
+    def __init__(self, mode: bool) -> None:
+        pass
+
+    def __enter__(self) -> None:
+        pass
+
+    def __exit__(self, *args: Any) -> None:
+        pass
+
+
+setattr(autograd, "set_detect_anomaly", set_detect_anomaly)
+
+
+def paddle_delegate_func(func, *args, **kwargs):
+    if "dim" in kwargs:
+        kwargs["axis"] = kwargs["dim"]
+        del kwargs["dim"]
+
+    if "device" in kwargs:
+        del kwargs["device"]
+
+    return func(*args, **kwargs)
+
+def make_delegate_class(class_):
+
+    class DelegateClass(class_):
+        def __init__(self, *args, **kwargs):
+
+            if class_.__name__.endswith("Linear"):
+                if "bias" in kwargs:
+                    kwargs["bias_attr"] = kwargs["bias"]
+                    del kwargs["bias"]
+                if "weight" in kwargs:
+                    kwargs["weight_attr"] = kwargs["weight"]
+                    del kwargs["weight"]
+            if class_.__name__.endswith("LayerNorm"):
+                if "eps" in kwargs:
+                    kwargs["epsilon"] = kwargs["eps"]
+                    del kwargs["eps"]
+            super().__init__(*args, **kwargs)
+#            self.__class__ = class_
+
+    return DelegateClass
+
+
diff --git a/RE/paddleext/paddleext/torchapi/cuda.py b/RE/paddleext/paddleext/torchapi/cuda.py
new file mode 100644
index 00000000..23c774c3
--- /dev/null
+++ b/RE/paddleext/paddleext/torchapi/cuda.py
@@ -0,0 +1,27 @@
+
+import paddle
+
+_initialized=True
+def is_available():
+
+    return paddle.device.cuda.device_count() > 0
+
+def manual_seed_all(seed):
+    paddle.seed(seed)
+
+
+def manual_seed(seed):
+    paddle.seed(seed)
+
+
+def set_device(device):
+    return paddle.set_device(device)
+
+
+def empty_cache():
+    return
+
+
+def device_count():
+    
+    return paddle.device.cuda.device_count()
\ No newline at end of file
diff --git a/RE/paddleext/paddleext/torchapi/data.py b/RE/paddleext/paddleext/torchapi/data.py
new file mode 100644
index 00000000..bd61f562
--- /dev/null
+++ b/RE/paddleext/paddleext/torchapi/data.py
@@ -0,0 +1,5 @@
+"""
+data for paddle
+"""
+
+from paddle.io import DataLoader, Dataset
\ No newline at end of file
diff --git a/RE/paddleext/paddleext/torchapi/distributed.py b/RE/paddleext/paddleext/torchapi/distributed.py
new file mode 100644
index 00000000..b9bf7698
--- /dev/null
+++ b/RE/paddleext/paddleext/torchapi/distributed.py
@@ -0,0 +1,17 @@
+
+import paddle
+
+
+def is_available():
+    return True
+
+DISTRIBUTED = False
+
+def is_initialized():
+    return DISTRIBUTED
+
+
+def init_process_group(*args, **kwargs):
+
+    pass
+
diff --git a/RE/paddleext/paddleext/torchapi/functional.py b/RE/paddleext/paddleext/torchapi/functional.py
new file mode 100644
index 00000000..8d2ad2d2
--- /dev/null
+++ b/RE/paddleext/paddleext/torchapi/functional.py
@@ -0,0 +1,485 @@
+import builtins
+from collections import namedtuple
+
+import paddle
+from paddle import Tensor
+import numpy as np
+
+from paddle import is_tensor
+
+from paddle import less_than, less_equal, greater_than, greater_equal, equal
+
+from paddle.nn.functional import *
+
+from paddle import arange, ones_like, zeros_like, ones
+
+from paddle import logical_and, logical_not, logical_or, logical_xor
+
+from paddle import all, any
+
+from paddle import argmax, argmin
+
+from paddle import stack
+
+from paddle import einsum
+
+from paddle import inverse
+
+from paddle.linalg import * 
+
+
+def max_along_dim(input, dim=None, keepdim=False, *, out=None):
+
+    if dim is None:
+        result = paddle.max(input)
+        return paddle.ones([], dtype=result.dtype) * result.item()
+
+    max_val = paddle.max(input, axis=dim, keepdim=keepdim)
+    max_index = paddle.argmax(input, axis=dim)
+
+    if out is not None:
+        out[0] = max_val
+        out[1] = max_index
+
+    return (max_val, max_index)
+
+def max(input, *args, **kwargs):
+
+    if len(args) == 0:
+        return max_along_dim(input, **kwargs)
+
+    if isinstance(args[0], (int, list, tuple)):
+        return max_along_dim(input, *args, **kwargs)
+    elif isinstance(args[0], Tensor):
+        return paddle.maximum(input, args[0], *args[1:], **kwargs)
+    else:
+        raise Exception(f"unknown parameter combination")
+
+
+def min_along_dim(input, dim=None, keepdim=False, *, out=None):
+
+    if dim is None:
+        result = paddle.min(input)
+        return paddle.ones([], dtype=result.dtype) * result.item()
+
+    min_val = paddle.min(input, axis=dim, keepdim=keepdim)
+    min_index = paddle.argmin(input, axis=dim)
+
+    if out is not None:
+        out[0] = min_val
+        out[1] = min_index
+
+    return (min_val, min_index)
+
+
+def min(input, *args, **kwargs):
+
+    if len(args) == 0:
+        return min_along_dim(input, **kwargs)
+
+    if isinstance(args[0], (int, list, tuple)):
+        return min_along_dim(input, *args, **kwargs)
+    elif isinstance(args[0], Tensor):
+        return paddle.minimum(input, args[0], *args[1:], **kwargs)
+    else:
+        raise Exception(f"unknown parameter combination")
+
+ 
+def lt(a, b):
+    if np.isscalar(a) or np.isscalar(b):
+        return a < b
+    else:
+        return less_than(a, b)
+
+ 
+def le(a, b):
+    if np.isscalar(a) or np.isscalar(b):
+        return a <= b
+    else:
+        return less_equal(a, b)
+
+ 
+def gt(a, b):
+    if np.isscalar(a) or np.isscalar(b):
+        return a > b
+    else:
+        return greater_than(a, b)
+
+ 
+def ge(a, b):
+    if np.isscalar(a) or np.isscalar(b):
+        return a >= b
+    else:
+        return greater_equal(a, b)
+
+ 
+def eq(a, b):
+    if np.isscalar(a) or np.isscalar(b):
+        return a == b
+    else:
+        return equal(a, b)
+
+
+def standardize_dtype(type):
+
+    if type == int:
+        return paddle.int64
+    elif type == float:
+        return paddle.float32
+
+    return type
+
+def empty(*size, dtype=None, device=None):
+
+    if len(size) == 1 and isinstance(size[0], (list, tuple)):
+        size = size[0]
+
+    dtype = standardize_dtype(dtype)
+    x = paddle.empty(size, dtype=dtype)
+
+    return x
+
+def zeros(*size, dtype=None, device=None):
+    if len(size) == 1 and isinstance(size[0], (list, tuple)):
+        size = size[0]
+
+    dtype = standardize_dtype(dtype)
+    x = paddle.zeros(size, dtype=dtype)
+    # if device is not None:
+    #     x = x.to(device)
+    return x
+
+ 
+def ones(*size, dtype=None, device=None):
+    if len(size) == 1 and isinstance(size[0], (list, tuple)):
+        size = size[0]
+    dtype = standardize_dtype(dtype)
+    x = paddle.ones(size, dtype=dtype)
+    # if device is not None:
+    #     x = x.to(device)
+    return x
+
+ 
+def rand(*size, dtype=None, device=None):
+    if len(size) == 1 and isinstance(size[0], (list, tuple)):
+        size = size[0]
+    dtype = standardize_dtype(dtype)
+    x = paddle.rand(size, dtype=dtype)
+    # if device is not None:
+    #     x = x.to(device)
+    return x
+
+ 
+def randint(low=None, high=None, size=None, dtype=None, name=None, device=None):
+
+    arg1 = low
+    arg2 = high
+    arg3 = size
+
+    dtype = standardize_dtype(dtype)
+
+    if dtype == paddle.int32 or dtype == paddle.int64:
+        int_dtype = dtype
+        target_dtype = None
+    else:
+        int_dtype = None
+        target_dtype = None
+
+    if arg3 is not None:
+        assert isinstance(arg3, (list, tuple))
+        if low is None and high is not None:
+            arg1 = high
+            arg2 = None
+        result = paddle.randint(low=arg1, high=arg2, shape=arg3, dtype=int_dtype, name=name)
+        return result.astype(target_dtype) if target_dtype else result
+    else:
+        assert isinstance(arg2, (list, tuple))
+        result = paddle.randint(low=arg1, high=None, shape=arg2, dtype=int_dtype, name=name)
+        return result.astype(target_dtype) if target_dtype else result
+
+
+def randn(*size, out=None, dtype=None, device=None):
+    if len(size) == 1 and isinstance(size[0], (list, tuple)):
+        size = size[0]
+
+    dtype = standardize_dtype(dtype)
+    x = paddle.randn(size, dtype=dtype)
+
+    if out is not None:
+        paddle.assign(x, out)
+        return out
+
+    return x
+
+
+def manual_seed_all(seed):
+    paddle.seed(seed)
+
+
+def manual_seed(seed):
+    paddle.seed(seed)
+
+
+def scalar_dtype(x):
+    from . import core
+    return getattr(core, type(x).__name__)
+
+ 
+def tensor(x, dtype=None, device=None):
+    if np.isscalar(x):
+        if dtype is None:
+            dtype = scalar_dtype(x)
+        result = paddle.ones([], dtype=dtype)
+        if np.isnan(x):
+            result = (result * (-1)).sqrt()
+        else:
+            result.fill_(x)
+        return result
+
+    return paddle.to_tensor(x, dtype=dtype)
+
+def from_numpy(x):
+    return paddle.to_tensor(x)
+
+
+cat = paddle.concat
+
+
+# different meaning of scatter
+# in tensorflow/ paddle, scatter is :
+# for idx, l in enumerate(index):
+#   output[l] = update[idx]
+# in torch, scatter is:
+# for i, j, k:
+#   output[i, j, index[i,j,k]] = update[i, j, k]
+
+ 
+def sum(x, dim=None, dtype=None, keepdim=False, name=None):
+
+    if x.ndim == 0:
+        return x
+
+    result = paddle.sum(x, axis=dim, dtype=dtype, keepdim=keepdim, name=name)
+
+    dim_len = 1 if np.isscalar(dim) else x.ndim if dim is None else len(dim)
+
+    if not keepdim and x.ndim == dim_len:
+        return tensor(result.item(), dtype=result.dtype)
+    else:
+        return result
+
+ 
+def nonzero(input, *, out=None, as_tuple=False):
+
+    result = paddle.nonzero(input, as_tuple=as_tuple)
+    if not as_tuple:
+        if out is not None:
+            paddle.assign(result, out)
+            return out
+        else:
+            return result
+    else:
+        assert out is None
+        return tuple([x.squeeze(-1) for x in result])
+
+ 
+def where(condition, x=None, y=None, name=None):
+
+    if x is not None and y is not None:
+        assert is_tensor(x) or is_tensor(y)
+
+        if np.isscalar(x):
+            x = paddle.ones_like(condition, dtype=scalar_dtype(x)) * x
+        if x.ndim == 0:
+            x = paddle.ones_like(condition, dtype=x.dtype) * x.item()
+
+        if np.isscalar(y):
+            y = paddle.ones_like(condition, dtype=scalar_dtype(y)) * y
+        if x.ndim == 0:
+            y = paddle.ones_like(condition, dtype=y.dtype) * y.item()
+
+        return paddle.where(condition, x, y, name=name)
+        
+    elif x is None and y is None:
+        result = nonzero(condition, as_tuple=True)
+
+        return result
+    else:
+        raise Exception("x and y must be None or not None at the sametime")
+
+ 
+def is_nonzero(input):
+
+    assert paddle.numel(input) == 1
+
+    return input.item() != 0.0
+
+ 
+def allclose(input, other, rtol=1e-05, atol=1e-08, equal_nan=False, name=None):
+
+    if np.isscalar(input):
+        input = tensor(input)
+    if np.isscalar(other):
+        other = tensor(other)
+
+    return paddle.allclose(input.float(), other.float(), rtol=rtol, atol=atol, equal_nan=equal_nan, name=name)
+
+ 
+def scatter(input: Tensor, dim, index, value) -> Tensor:
+    if input.ndim == 1:
+        output = paddle.scatter(input, index, value, overwrite=True)
+    else:
+
+#        index, selected = paddle.unique(index, axis=dim, return_index=True)
+#        if isinstance(value, Tensor):
+#            value = paddle.index_select(value, selected, axis=dim)
+
+        grids = [paddle.arange(index.shape[x]) for x in range(index.ndim)]
+        inner_indexes = list(paddle.meshgrid(*grids))
+        inner_indexes[dim] = index
+        inner_indexes = [x.flatten() for x in inner_indexes]
+        inner_indexes = paddle.stack(inner_indexes, axis=1)
+
+        value_shape = list(inner_indexes.shape[:-1]) + list(input.shape[inner_indexes.shape[-1]:])
+
+        if paddle.is_tensor(value):
+            value = paddle.reshape(value, value_shape)
+        elif isinstance(value, (builtins.bool, builtins.int, builtins.float, np.integer, np.float32, np.float64)):
+            value = paddle.full(shape=value_shape, fill_value=value)
+        else:
+            raise Exception(f"unknown value type: {type(value)}")
+
+        to_overwrite = paddle.scatter_nd(inner_indexes, value, shape=input.shape)
+        condition = paddle.scatter_nd(inner_indexes, paddle.ones_like(value), shape=input.shape)
+        output = paddle.where(condition > 0, to_overwrite.float(), input.float()).cast(input.dtype)
+
+    return output
+
+def gather(x,dim,index):
+    index_shape=index.shape
+    index_flatten=index.flatten()
+    if dim<0:
+        dim=len(x.shape)+dim
+    nd_index=[]
+    for k in range(len(x.shape)):
+        if k==dim:
+            nd_index.append(index_flatten)
+        else:
+            reshape_shape=[1]*len(x.shape)
+            reshape_shape[k]=x.shape[k]
+            dim_index=paddle.expand( paddle.reshape(paddle.arange(x.shape[k],dtype=index.dtype), reshape_shape), index_shape).flatten()
+            nd_index.append(dim_index)
+
+    ind2 = paddle.transpose(paddle.stack(nd_index),[1, 0])
+    # ind2 = paddle.stack(nd_index).transpose([1, 0])
+    paddle_out = paddle.gather_nd(x, ind2).reshape(index_shape)
+    return paddle_out
+
+ 
+def scatter_(input: Tensor, dim, index, value):
+
+    output = scatter(input, dim, index, value)
+    # return output
+    paddle.assign(output, input)
+
+    return input
+
+
+ 
+def scatter_add(input: Tensor, dim, index, update) -> Tensor:
+    # donot use scatter with overwrite=False even for 1-d case;
+    # It does not produce correct result for duplicated indexes
+    # if input.ndim == 1:
+    #     output = paddle.scatter(input, index, update, overwrite=False)
+    # else:
+    if index.ndim > 1:
+        grids = [paddle.arange(index.shape[x]) for x in range(index.ndim)]
+        inner_indexes = list(paddle.meshgrid(*grids))
+        inner_indexes[dim] = index
+    else:
+        inner_indexes = [index]
+    inner_indexes = [x.flatten() for x in inner_indexes]
+    inner_indexes = paddle.stack(inner_indexes, axis=1)
+
+    update_shape = list(inner_indexes.shape[:-1]) + list(input.shape[inner_indexes.shape[-1]:])
+    update = paddle.reshape(update, update_shape)
+    output = paddle.scatter_nd_add(input, inner_indexes, update)
+
+    return output
+
+ 
+def scatter_add_(input: Tensor, dim, index, update) -> Tensor:
+    output = scatter_add(input, dim, index, update)
+    paddle.assign(output, input)
+    # return output
+    return input
+
+
+def norm(input, p='fro', dim=None, keepdim=False, out=None, dtype=None):
+
+    result = paddle.linalg.norm(input, p, axis=dim, keepdim=keepdim)
+    if dtype is not None:
+        result = result.cast(dtype)
+
+    if out is not None:
+        out.assign(result)
+
+    return result
+
+def isinf(x, name=None):
+    if x.dtype == paddle.bool:
+        return paddle.zeros_like(x, dtype=paddle.bool)
+    else:
+        return paddle.isinf(x, name=name)
+
+def isnan(x, name=None):
+    if x.dtype == paddle.bool:
+        return paddle.zeros_like(x, dtype=paddle.bool)
+    else:
+        return paddle.isnan(x, name=name)
+
+def broadcast_to(x, shape, name=None):
+
+    if len(shape) == 1 and shape[0] == 0:
+        assert x.numel() == 1
+        return tensor(x.item())
+    else:
+        return paddle.broadcast_to(x, shape, name)
+
+
+def as_tensor(data, dtype=None, device=None):
+
+    return paddle.to_tensor(data, dtype=dtype)
+
+
+TopKResult = namedtuple("TopKResult", ["values", "indices"])
+def topk(input, k, dim=None, largest=True, sorted=True, *, out=None):
+
+    result, indice = paddle.topk(input, k, axis=dim, largest=largest, sorted=sorted)
+
+    if out is not None:
+        out[0].set_value(result)
+        out[1].set_value(indice)
+
+    return TopKResult(values=result, indices=indice)
+
+
+def split(tensor, split_size_or_sections, dim=0):
+    """
+    paddle interface is different from pytorch
+
+    Args:
+        tensor:
+        split_size_or_sections:
+        dim:
+
+    Returns:
+
+    """
+    if isinstance(split_size_or_sections, int):
+        sizes = [split_size_or_sections] * (tensor.shape[dim] // split_size_or_sections)
+        if tensor.shape[dim] % split_size_or_sections != 0:
+            sizes.append(tensor.shape[dim] % split_size_or_sections)
+        split_size_or_sections = sizes
+
+    return paddle.split(tensor, split_size_or_sections, axis=dim)
\ No newline at end of file
diff --git a/RE/paddleext/paddleext/torchapi/machine.py b/RE/paddleext/paddleext/torchapi/machine.py
new file mode 100644
index 00000000..bb0ee241
--- /dev/null
+++ b/RE/paddleext/paddleext/torchapi/machine.py
@@ -0,0 +1,35 @@
+"""
+machine for paddle
+"""
+
+import paddle
+
+
+class PaddleTrainer(object):
+    """
+    PaddleTrainer
+    """
+
+    def __init__(self, machine, loss, optimizer,
+                 evaluator, *args, **kwargs):
+
+        self.model = paddle.Model(machine)
+
+        self.model.prepare(optimizer=optimizer, loss=loss,
+                           metrics=evaluator)
+
+    def fit(self, train_data_streams):
+        """
+
+        Args:
+            train_dataloader ():
+            val_dataloaders ():
+            test_dataloaders ():
+
+        Returns:
+
+        """
+
+        self.model.fit(train_data_streams.train, eval_data=train_data_streams.dev)
+
+Trainer = PaddleTrainer
\ No newline at end of file
diff --git a/RE/paddleext/paddleext/torchapi/metrics.py b/RE/paddleext/paddleext/torchapi/metrics.py
new file mode 100644
index 00000000..8d5f431c
--- /dev/null
+++ b/RE/paddleext/paddleext/torchapi/metrics.py
@@ -0,0 +1,7 @@
+
+import sys
+
+import paddlemetrics
+
+sys.modules[__name__] = paddlemetrics
+
diff --git a/RE/paddleext/paddleext/torchapi/nn/__init__.py b/RE/paddleext/paddleext/torchapi/nn/__init__.py
new file mode 100644
index 00000000..64b48d08
--- /dev/null
+++ b/RE/paddleext/paddleext/torchapi/nn/__init__.py
@@ -0,0 +1,47 @@
+import paddle
+import random
+import numpy as np
+
+ModuleBase = paddle.nn.Layer
+ModuleDict = paddle.nn.LayerDict
+ModuleList = paddle.nn.LayerList
+
+from paddle.nn import *
+
+Conv2d = Conv2D
+Conv3d = Conv3D
+from . import functional
+from paddle.nn import initializer
+
+from . import init
+
+def Parameter(data, requires_grad=True):
+    """
+
+    Args:
+        data:
+        requires_grad:
+
+    Returns:
+
+    """
+
+    param = paddle.create_parameter(data.shape, dtype=data.dtype, default_initializer=initializer.Assign(data))
+
+    param.stop_gradient = not requires_grad
+
+    return param
+
+from paddle.fluid import framework
+
+class Module(paddle.nn.Layer):
+    """
+    Module with add_parameter
+    """
+
+    def __setattr__(self, key, value):
+
+        if isinstance(value, framework.Parameter):
+            self.add_parameter(key, value)
+        else:
+            super().__setattr__(key, value)
\ No newline at end of file
diff --git a/RE/paddleext/paddleext/torchapi/nn/functional.py b/RE/paddleext/paddleext/torchapi/nn/functional.py
new file mode 100644
index 00000000..cf489074
--- /dev/null
+++ b/RE/paddleext/paddleext/torchapi/nn/functional.py
@@ -0,0 +1,15 @@
+
+
+import paddle
+from more_itertools import chunked
+from paddle.nn.functional import *
+
+def pad(input, pad, mode='constant', value=0.0):
+
+    pad = sum(reversed(list(chunked(pad, 2))), [])
+
+    if len(pad) < 2 * input.ndim:
+        pad = [0] * (2 * input.ndim - len(pad)) + pad
+
+    return paddle.nn.functional.pad(input, pad, mode=mode, value=value)
+
diff --git a/RE/paddleext/paddleext/torchapi/nn/init.py b/RE/paddleext/paddleext/torchapi/nn/init.py
new file mode 100644
index 00000000..4a74a80c
--- /dev/null
+++ b/RE/paddleext/paddleext/torchapi/nn/init.py
@@ -0,0 +1,49 @@
+"""
+init function for paddle
+"""
+import paddle
+
+
+def normal_(tensor, mean=0.0, std=1.0):
+    """
+
+    Args:
+        tensor:
+        mean:
+        std:
+
+    Returns:
+
+    """
+
+    paddle.assign(paddle.normal(mean=mean, std=std, shape=tensor.shape), tensor)
+
+    return tensor
+
+def zeros_(tensor):
+    """
+
+    Args:
+        tensor:
+
+    Returns:
+
+    """
+
+    paddle.assign(paddle.zeros_like(tensor), tensor)
+
+    return tensor
+
+def ones_(tensor):
+    """
+
+    Args:
+        tensor:
+
+    Returns:
+
+    """
+
+    paddle.assign(paddle.ones_like(tensor), tensor)
+
+    return tensor
\ No newline at end of file
diff --git a/RE/paddleext/paddleext/torchapi/optim/__init__.py b/RE/paddleext/paddleext/torchapi/optim/__init__.py
new file mode 100644
index 00000000..c31d1d6c
--- /dev/null
+++ b/RE/paddleext/paddleext/torchapi/optim/__init__.py
@@ -0,0 +1,48 @@
+from functools import partial
+
+from paddle.optimizer import *
+
+from . import lr_scheduler
+
+# import sys
+# this_module = sys.modules[__name__]
+# import inspect
+#
+# def paddle_optim_class_creator(paddle_optim_class, *args, **kwargs):
+#     """
+#
+#     Args:
+#         paddle_optim_class:
+#         *args:
+#         **kwargs:
+#
+#     Returns:
+#
+#     """
+#     if "params" in kwargs:
+#         kwargs["parameters"] = kwargs["params"]
+#         del kwargs["params"]
+#     if "lr" in kwargs:
+#         kwargs["learning_rate"] = kwargs["lr"]
+#         del kwargs["lr"]
+#
+#     return paddle_optim_class(*args, **kwargs)
+#
+# from . import lr
+#
+# class PaddleOptimModuleProxy(object):
+#
+#     def __getattribute__(self, *args, **kwargs):
+#         # Perform custom logic here
+#
+#         obj = object.__getattribute__(this_module, *args, **kwargs)
+#
+#         if inspect.isclass(obj) and obj.__module__.startswith("paddle.optimization"):
+#             print(obj.__module__)
+#             return partial(paddle_optim_class_creator, obj)
+#         else:
+#             return obj
+#
+#
+#
+# sys.modules[__name__] = PaddleOptimModuleProxy()
\ No newline at end of file
diff --git a/RE/paddleext/paddleext/torchapi/optim/lr_scheduler.py b/RE/paddleext/paddleext/torchapi/optim/lr_scheduler.py
new file mode 100644
index 00000000..1ba3040a
--- /dev/null
+++ b/RE/paddleext/paddleext/torchapi/optim/lr_scheduler.py
@@ -0,0 +1,34 @@
+from functools import partial
+
+from paddle.optimizer.lr import *
+
+StepLR = StepDecay
+ExponentialLR = ExponentialDecay
+
+#
+# def paddle_lr_class_creator(paddle_lr_class, *args, **kwargs):
+#
+#     if "lr" in kwargs:
+#         kwargs["learning_rate"] = kwargs["lr"]
+#         del kwargs["lr"]
+#
+#     return paddle_lr_class(*args, **kwargs)
+#
+# import sys
+# this_module = sys.modules[__name__]
+# import inspect
+#
+# class PaddleLRModuleProxy(object):
+#
+#     def __getattribute__(self, *args, **kwargs):
+#         # Perform custom logic here
+#
+#         obj = object.__getattribute__(this_module, *args, **kwargs)
+#
+#         if inspect.isclass(obj) and obj.__module__.startswith("paddle.optimization"):
+#             print("LR", obj.__module__)
+#             return partial(paddle_lr_class_creator, obj)
+#         else:
+#             return obj
+#
+# sys.modules[__name__] = PaddleLRModuleProxy()
\ No newline at end of file
diff --git a/RE/paddleext/paddleext/torchapi/sampler.py b/RE/paddleext/paddleext/torchapi/sampler.py
new file mode 100644
index 00000000..7e4f1cb5
--- /dev/null
+++ b/RE/paddleext/paddleext/torchapi/sampler.py
@@ -0,0 +1,154 @@
+"""
+paddle operation for sampler
+"""
+from typing import Iterator, Optional, Sequence, List, TypeVar, Generic, Sized
+
+import paddle
+
+from paddle.io import Sampler, DistributedBatchSampler, SequenceSampler, RandomSampler
+import math
+
+
+def identity(x):
+    """
+
+    Args:
+        x ():
+
+    Returns:
+
+    """
+    return x
+
+
+
+class BatchSampler(Sampler):
+    r"""Wraps another sampler to yield a mini-batch of indices.
+
+    Args:
+        sampler (Sampler or Iterable): Base sampler. Can be any iterable object
+        batch_size (int): Size of mini-batch.
+        drop_last (bool): If ``True``, the sampler will drop the last batch if
+            its size would be less than ``batch_size``
+
+    Example:
+        >>> list(BatchSampler(SequentialSampler(range(10)), batch_size=3, drop_last=False))
+        [[0, 1, 2], [3, 4, 5], [6, 7, 8], [9]]
+        >>> list(BatchSampler(SequentialSampler(range(10)), batch_size=3, drop_last=True))
+        [[0, 1, 2], [3, 4, 5], [6, 7, 8]]
+    """
+
+    def __init__(self, sampler: Sampler, batch_size: int, drop_last: bool) -> None:
+        # Since collections.abc.Iterable does not check for `__getitem__`, which
+        # is one way for an object to be an iterable, we don't do an `isinstance`
+        # check here.
+        super().__init__()
+        if not isinstance(batch_size, int) or isinstance(batch_size, bool) or \
+                batch_size <= 0:
+            raise ValueError("batch_size should be a positive integer value, "
+                             "but got batch_size={}".format(batch_size))
+        if not isinstance(drop_last, bool):
+            raise ValueError("drop_last should be a boolean value, but got "
+                             "drop_last={}".format(drop_last))
+        self.sampler = sampler
+        self.batch_size = batch_size
+        self.drop_last = drop_last
+
+
+    def __iter__(self) -> Iterator[List[int]]:
+        batch = []
+        for idx in self.sampler:
+            batch.append(idx)
+            if len(batch) == self.batch_size:
+                yield batch
+                batch = []
+        if len(batch) > 0 and not self.drop_last:
+            yield batch
+
+    def __len__(self) -> int:
+        # Can only be called if self.sampler has __len__ implemented
+        # We cannot enforce this condition, so we turn off typechecking for the
+        # implementation below.
+        # Somewhat related: see NOTE [ Lack of Default `__len__` in Python Abstract Base Classes ]
+        if self.drop_last:
+            return len(self.sampler) // self.batch_size  # type: ignore[arg-type]
+        else:
+            return (len(self.sampler) + self.batch_size - 1) // self.batch_size  # type: ignore[arg-type]
+
+
+
+class SequentialSampler(Sampler):
+    r"""Samples elements sequentially, always in the same order.
+
+    Args:
+        data_source (Dataset): dataset to sample from
+    """
+    data_source: Sized
+
+    def __init__(self, data_source: Sized) -> None:
+        self.data_source = data_source
+
+    def __iter__(self) -> Iterator[int]:
+        return iter(range(len(self.data_source)))
+
+    def __len__(self) -> int:
+        return len(self.data_source)
+
+class SubsetRandomSampler(Sampler):
+    r"""Samples elements randomly from a given list of indices, without replacement.
+
+    Arguments:
+        indices (sequence): a sequence of indices
+        generator (Generator): Generator used in sampling.
+    """
+    indices: Sequence[int]
+
+    def __init__(self, indices: Sequence[int]) -> None:
+        self.indices = indices
+
+    def __iter__(self):
+        return (self.indices[i] for i in paddle.randperm(len(self.indices)))
+
+    def __len__(self):
+        return len(self.indices)
+
+
+
+class DistributedSampler(Sampler):
+    """ Iterable wrapper that distributes data across multiple workers.
+
+    Args:
+        iterable (iterable)
+        num_replicas (int, optional): Number of processes participating in distributed training.
+        rank (int, optional): Rank of the current process within ``num_replicas``.
+
+    Example:
+        >>> list(DistributedSampler(range(10), num_replicas=2, rank=0))
+        [0, 2, 4, 6, 8]
+        >>> list(DistributedSampler(range(10), num_replicas=2, rank=1))
+        [1, 3, 5, 7, 9]
+    """
+
+    def __init__(self, iterable, num_replicas=None, rank=None):
+        self.iterable = iterable
+        self.num_replicas = num_replicas
+        self.rank = rank
+
+        if num_replicas is None or rank is None:  # pragma: no cover
+#                if not paddle.distributed.is_initialized():
+#                    raise RuntimeError('Requires `torch.distributed` to be initialized.')
+
+            self.num_replicas = (
+                paddle.distributed.get_world_size() if num_replicas is None else num_replicas)
+            self.rank = paddle.distributed.get_rank() if rank is None else rank
+
+        if self.rank >= self.num_replicas:
+            raise IndexError('`rank` must be smaller than the `num_replicas`.')
+
+    def __iter__(self):
+        return iter(
+            [e for i, e in enumerate(self.iterable) if (i - self.rank) % self.num_replicas == 0])
+
+    def __len__(self):
+        return len(self.iterable)
+
diff --git a/RE/paddleext/paddleext/torchapi/tensor_.py b/RE/paddleext/paddleext/torchapi/tensor_.py
new file mode 100644
index 00000000..838c75a8
--- /dev/null
+++ b/RE/paddleext/paddleext/torchapi/tensor_.py
@@ -0,0 +1,547 @@
+"""
+paddle tensor
+"""
+from functools import partial
+
+import numpy as np
+from collections.abc import Iterable
+
+from . import paddle_delegate_func
+from .functional import *
+import paddle
+
+"""
+paddle tensor
+"""
+import types
+import paddle
+from paddle import Tensor
+
+# just for type hint. If there are statements like isinstance(x, FloatTensor), this may cause error
+FloatTensor = Tensor
+
+def size(self, dim=None):
+    shape = self.shape
+    if dim is None:
+        return shape
+    else:
+        return shape[dim]
+
+
+# def __new__(cls, *args, **kwargs):
+#
+#     obj = cls.__default_new__(cls, *args, **kwargs)
+#
+#     setattr(obj, "size", types.MethodType(size, obj))
+#
+#     return obj
+#
+# setattr(Tensor, "__default_new__", Tensor.__new__)
+# setattr(Tensor, "__new__", __new__)
+
+
+def bool_(self):
+    return self.astype("bool")
+
+def float_(self):
+    return self.astype('float32')
+
+
+def double_(self):
+    return self.astype("float64")
+
+
+def int_(self):
+    return self.astype("int32")
+
+
+def long_(self):
+    return self.astype('int64')
+
+
+def expand(self, *sizes):
+    if isinstance(sizes[0], Iterable):
+        sizes = sizes[0]
+    ##handle -1 case
+    if len(sizes) > len(self.shape):
+        for _ in range(len(sizes) - len(self.shape)):
+            self = self.unsqueeze(dim=0)
+    expand_times = [x // y if x >= y else 1 for x, y in zip(sizes, self.shape)]
+    x = paddle.fluid.layers.expand(self, expand_times, name=None)
+    return x
+
+
+def masked_fill(self, mask, value):
+    if self.ndim == 0:
+        assert mask.ndim == 0
+        if mask.item():
+            return paddle.full([], value, self.dtype) 
+        else:
+            return self 
+
+    y = paddle.full(self.shape, value, self.dtype)
+    mask_shape = [1] * (self.ndim - mask.ndim) + mask.shape
+    mask = paddle.reshape(mask, mask_shape)
+    mask = paddle.expand_as(mask, self)
+    new_values = paddle.where(mask, y, self)
+    return new_values
+    # mask_float = mask.astype("float32")
+    # if self.dtype == paddle.bool:
+    #     self_float = self.astype("float32")
+    # else:
+    #     self_float = self
+    # result = self_float * (1 - mask_float) + mask_float * value
+    # if self.dtype == paddle.bool:
+    #     result = result.astype(paddle.bool)
+    # return result
+
+# def masked_fill_(self, mask, value):
+#
+#     new_values = masked_fill(self, mask, value)
+#     paddle.assign(new_values, self)
+#
+#     return self
+
+
+def to(self, arg):
+    if isinstance(arg, paddle.dtype):
+        return self.astype(arg)
+    elif isinstance(arg, Tensor):
+        return self.astype(arg.dtype)
+    else:
+        return self
+
+def is_floating_point(self):
+    return self.dtype in {paddle.float16, paddle.float32, paddle.float64}
+
+
+def reshape(self, *size):
+
+    if len(size) == 1 and isinstance(size[0], Iterable):
+        size = size[0]
+
+    return paddle.reshape(self, size)
+
+
+def view(self, *size):
+    if len(size) == 1 and isinstance(size[0], Iterable):
+        size = size[0]
+
+    return reshape(self, size)
+
+def view_as(self, other):
+
+    return view(self, *other.size())
+
+
+Tensor.__native__size = Tensor.size
+
+Tensor.device = None
+Tensor.float = float_
+Tensor.double = double_
+Tensor.int = int_
+Tensor.long = long_
+Tensor.bool = bool_
+Tensor.scatter_explicit_index = Tensor.scatter
+Tensor.scatter = scatter
+Tensor.scatter_explicit_index_ = Tensor.scatter_
+Tensor.scatter_ = scatter_
+Tensor.scatter_add = scatter_add
+Tensor.scatter_add_ = scatter_add_
+Tensor.expand = expand
+Tensor.masked_fill = masked_fill
+#Tensor.masked_fill_ = masked_fill_
+Tensor.to = to
+Tensor.is_floating_point = is_floating_point
+Tensor.reshape = reshape
+Tensor.view = view
+Tensor.view_as = view_as
+
+Tensor.__invert__ = paddle.logical_not
+
+Tensor.__native__numel = Tensor.numel
+def numel(x):
+    return x.__native__numel().item()
+
+Tensor.numel = numel
+
+import math
+
+class SizeObject(int):
+
+    def __new__(cls, sizes, *args, **kwargs):
+        size = int(math.prod(sizes))
+        instance = int.__new__(cls, size, *args, **kwargs)
+        instance.sizes = sizes
+        return instance
+
+    def __call__(self, index=None):
+        if index is None:
+            return self.sizes
+        else:
+            return self.sizes[index]
+
+Tensor.size = property(lambda self: SizeObject(self.shape))
+
+
+def flatten(self, *args, **kwargs):
+
+    if self.dtype == paddle.bool:
+        return flatten(self.int(), *args, **kwargs) > 0
+    else:
+        return paddle.flatten(self, *args, **kwargs)
+
+Tensor.flatten = flatten
+
+
+Tensor.__getitem__official__ = Tensor.__getitem__
+
+import builtins
+
+def getitem(self, args):
+
+    if self.dtype == paddle.bool:
+        return getitem(self.int(), args) > 0
+
+    if isinstance(args, (list, tuple)):
+        ellipsis_num = builtins.sum(x is Ellipsis for x in args)
+        if ellipsis_num > 1:
+            raise Exception(f"multiple ellipsis found in args: {args}")
+        elif ellipsis_num == 1:
+            args = list(args)
+            ellips_idx = args.index(Ellipsis)
+            args_before_ellips = args[:ellips_idx]
+            args_after_ellips = args[ellips_idx+1:]
+            ommited_dims = [builtins.slice(None, None, None) for _ in range(self.ndim - len(args) + 1)]
+            args = tuple(args_before_ellips + ommited_dims + args_after_ellips)
+
+        return self.__getitem__official__(args)
+
+    elif isinstance(args, Tensor):
+        if args.dtype == paddle.bool and args.ndim > 1:
+            # paddle do not support boolean indexing with ndim > 1
+            return self.flatten(start_axis=0, stop_axis=args.ndim-1)[args.flatten().nonzero()]
+        if args.ndim == 0:
+            assert args.dtype == paddle.bool
+            assert self.ndim == 0
+            return tensor(self.reshape((1,))[args.reshape((1,))].item(), dtype=self.dtype)
+
+    return self.__getitem__official__(args)
+
+Tensor.__getitem__ = getitem
+
+Tensor.__setitem__official__ = Tensor.__setitem__
+
+def setitem(self, index, value):
+
+    if isinstance(index, Tensor):
+        if self.ndim == 0:
+            index = index.item()
+            assert type(index) == bool
+            if index:
+                self.fill_(value)
+            return
+
+        if index.dtype == paddle.bool and (paddle.any(paddle.isnan(self)) or paddle.any(paddle.isinf(self))):
+
+            result = masked_fill(self, index, value)
+            self.set_value(result)
+            return
+
+    self.__setitem__official__(index, value)
+
+Tensor.__setitem__ = setitem
+
+def getattribute(self, *args, **kwargs):
+    # Perform custom logic here
+
+    obj = object.__getattribute__(self, *args, **kwargs)
+
+    if isinstance(obj, types.MethodType) and not obj.__module__.startswith("paddleext.torchapi."):
+
+        return partial(paddle_delegate_func, obj)
+    else:
+        return obj
+
+
+Tensor.__getattribute__ = getattribute
+
+Tensor.sum = sum
+
+
+
+def permute(self, *perm):
+
+    if len(perm) == 1 and isinstance(perm[0], Iterable):
+        perm = perm[0]
+
+    assert len(perm) == self.ndim
+    perm = [self.ndim + x if x < 0 else x for x in perm]  ##not allow negative values
+
+    if self.dtype == paddle.bool:
+        return permute(self.int(), * perm) > 0
+    else:
+        return paddle.transpose(self, perm)
+
+Tensor.permute = permute
+
+
+def transpose(self, *perm):
+    # if len(perm)==2 and len(self.shape)>2:
+    if isinstance(perm[0], Iterable):
+        assert len(perm) == 1
+        perm = perm[0]
+
+    if len(perm) == 2 and len(perm) < self.ndim:
+
+        perm = [self.ndim + x if x < 0 else x for x in perm]
+        dim1, dim2 = perm
+        perm = list(range(self.rank()))
+        perm[dim1] = dim2
+        perm[dim2] = dim1
+
+        return self.permute(*perm)
+    else:
+        return paddle.transpose(self, perm)
+
+
+Tensor.transpose = transpose
+
+def contiguous(self):
+    return self
+
+Tensor.contiguous = contiguous
+
+
+Tensor.__lt__origin__ = Tensor.__lt__
+def __lt__(self, other):
+    if self.ndim == 0 and np.isscalar(other):
+        other = tensor(other)
+    return self.__lt__origin__(other)
+Tensor.__lt__ = __lt__
+
+
+Tensor.__le__origin__ = Tensor.__le__
+def __le__(self, other):
+    if self.ndim == 0 and np.isscalar(other):
+        other = tensor(other)
+    return self.__le__origin__(other)
+Tensor.__le__ = __le__
+
+
+Tensor.__gt__origin__ = Tensor.__gt__
+def __gt__(self, other):
+    if self.ndim == 0 and np.isscalar(other):
+        other = tensor(other)
+    return self.__gt__origin__(other)
+Tensor.__gt__ = __gt__
+
+
+Tensor.__ge__origin__ = Tensor.__ge__
+def __ge__(self, other):
+    if self.ndim == 0 and np.isscalar(other):
+        other = tensor(other)
+    return self.__ge__origin__(other)
+Tensor.__ge__ = __ge__
+
+
+Tensor.__eq__origin__ = Tensor.__eq__
+def __eq__(self, other):
+    if self.ndim == 0 and np.isscalar(other):
+        other = tensor(other)
+    return self.__eq__origin__(other)
+Tensor.__eq__ = __eq__
+
+
+Tensor.__ne__origin__ = Tensor.__ne__
+def __ne__(self, other):
+    if self.ndim == 0 and np.isscalar(other):
+        other = tensor(other)
+    return self.__ne__origin__(other)
+Tensor.__ne__ = __ne__
+
+
+def __or__(self, other):
+    return paddle.logical_or(self.bool(), other.bool())
+Tensor.__or__ = __or__
+
+def __and__(self, other):
+    return paddle.logical_or(self.bool(), other.bool())
+Tensor.__and__ = __and__
+
+
+Tensor.__native__any = Tensor.any
+def any(x, dim=None, keepdim=False, name=None):
+    if isinstance(x, Tensor) and x.ndim == 0:
+        assert dim is None
+        return x
+    else:
+        return x.__native__any(axis=dim, keepdim=keepdim, name=name)
+
+Tensor.any = any
+
+Tensor.__native__all = Tensor.all
+def all(x, dim=None, keepdim=False, name=None):
+
+    if isinstance(x, Tensor) and x.ndim == 0:
+        assert dim is None
+        return x
+    else:
+        return x.__native__all(axis=dim, keepdim=keepdim, name=name)
+
+Tensor.all = all
+
+Tensor.__native__add__ = Tensor.__add__
+#Tensor.__native__iadd__ = Tensor.__iadd__
+def add(x, y):
+
+    tensor_out = isinstance(x, Tensor) or isinstance(y, Tensor)
+
+    out_dtype = x.dtype if isinstance(x, Tensor) else y.dtype if isinstance(y, Tensor) else None
+
+    if isinstance(x, Tensor) and x.ndim == 0:
+        x = x.item()
+    if isinstance(y, Tensor) and y.ndim == 0:
+        y = y.item()
+
+    if isinstance(x, Tensor):
+        return Tensor.__native__add__(x, y)
+    elif isinstance(y, Tensor):
+        return Tensor.__native__add__(y, x)
+    else:
+        result = x + y
+        if np.isscalar(result) and tensor_out:
+            return tensor(result, dtype=out_dtype)
+        else:
+            return result
+
+
+# def iadd(x, y):
+#     if isinstance(y, Tensor) and y.ndim == 0:
+#         y = y.item()
+#
+#     return Tensor.__native__iadd__(x, y)
+
+Tensor.__add__ = add
+Tensor.__radd__ = add
+# Tensor.__iadd__ = iadd
+
+Tensor.__native__sub__ = Tensor.__sub__
+Tensor.__native__rsub__ = Tensor.__rsub__
+
+def subtract(x, y):
+    tensor_out = isinstance(x, Tensor) or isinstance(y, Tensor)
+
+    out_dtype = x.dtype if isinstance(x, Tensor) else y.dtype if isinstance(y, Tensor) else None
+
+    if isinstance(x, Tensor) and x.ndim == 0:
+        x = x.item()
+    if isinstance(y, Tensor) and y.ndim == 0:
+        y = y.item()
+
+    if isinstance(x, Tensor):
+        return Tensor.__native__sub__(x, y)
+    elif isinstance(y, Tensor):
+        return Tensor.__native__rsub__(y, x)
+    else:
+        result = x - y
+        if np.isscalar(result) and tensor_out:
+            return tensor(result, dtype=out_dtype)
+        else:
+            return result
+
+def rsub(x, y):
+
+    if isinstance(y, Tensor) and y.ndim == 0:
+        y = y.item()
+
+    return Tensor.__native__rsub__(x, y)
+
+
+Tensor.__sub__ = subtract
+Tensor.__rsub__ = rsub
+
+Tensor.__native__mul__ = Tensor.__mul__
+def multiply(x, y):
+    tensor_out = isinstance(x, Tensor) or isinstance(y, Tensor)
+
+    out_dtype = x.dtype if isinstance(x, Tensor) else y.dtype if isinstance(y, Tensor) else None
+
+    if isinstance(x, Tensor) and x.ndim == 0:
+        x = x.item()
+    if isinstance(y, Tensor) and y.ndim == 0:
+        y = y.item()
+
+    if isinstance(x, Tensor):
+        return Tensor.__native__mul__(x, y)
+    elif isinstance(y, Tensor):
+        return Tensor.__native__mul__(y, x)
+    else:
+        result = x * y
+        if np.isscalar(result) and tensor_out:
+            return tensor(result, dtype=out_dtype)
+        else:
+            return result
+
+Tensor.__mul__ = multiply
+Tensor.__rmul__ = multiply
+
+Tensor.__native__truediv__ = Tensor.__truediv__
+Tensor.__native__rdiv__ = Tensor.__rdiv__
+
+def divide(x, y):
+    tensor_out = isinstance(x, Tensor) or isinstance(y, Tensor)
+
+    out_dtype = x.dtype if isinstance(x, Tensor) else y.dtype if isinstance(y, Tensor) else None
+
+    if isinstance(x, Tensor) and x.ndim == 0:
+        x = x.item()
+    if isinstance(y, Tensor) and y.ndim == 0:
+        y = y.item()
+
+    if isinstance(x, Tensor):
+        return Tensor.__native__truediv__(x, y)
+    elif isinstance(y, Tensor):
+        return Tensor.__native__rdiv__(y, x)
+    else:
+        result = x / y
+        if np.isscalar(result) and tensor_out:
+            return tensor(result, dtype=out_dtype)
+        else:
+            return result
+
+def rdiv(x, y):
+    if isinstance(y, Tensor) and y.ndim == 0:
+        y = y.item()
+
+    return Tensor.__native__rdiv__(x, y)
+
+
+Tensor.__truediv__ = divide
+Tensor.__rdiv__ = rdiv
+
+
+def __getstate__(self):
+    state = {"dtype": self.dtype, "value": self.numpy()}
+    return state
+
+
+def __setstate__(self, newstate):
+
+    loaded = paddle.to_tensor(newstate["value"], dtype=newstate["dtype"])
+    self.set_value(loaded)
+
+Tensor.__getstate__ = __getstate__
+Tensor.__setstate__ = __setstate__
+
+## requires_grad property
+
+def getter(x):
+    return not x.stop_gradient
+
+def setter(x, value):
+    x.stop_gradient = not value
+
+Tensor.requires_grad = property(getter, setter)
+
+Tensor.topk = topk
\ No newline at end of file
diff --git a/RE/paddleext/setup.py b/RE/paddleext/setup.py
new file mode 100644
index 00000000..d0e29ea7
--- /dev/null
+++ b/RE/paddleext/setup.py
@@ -0,0 +1,39 @@
+#!/usr/bin/env python
+# -*- coding: UTF-8 -*-
+################################################################################
+#
+# Copyright (c) 2020 Baidu.com, Inc. All Rights Reserved
+#
+################################################################################
+"""
+Setup script.
+
+Authors: sunmingming01(sunmingming01@baidu.com)
+Date:    2020/12/31 12:33:34
+"""
+
+from setuptools import setup, find_packages
+
+with open('README.md') as readme_file:
+    README = readme_file.read()
+
+setup_args = dict(
+    name='paddle-extension',
+    version='1.0.0-beta',
+    description='Paddle extensions, including implementation for torch apis.',
+    long_description_content_type="text/markdown",
+    long_description=README,
+    license='Apache',
+    packages=find_packages(include=["paddleext", "paddleext.*"]),
+    author='Mingming Sun',
+    author_email='sunmingming01@baidu.com',
+    keywords=['Deep Learning', 'Paddlepaddle'],
+    url='',
+    download_url=''
+)
+
+install_requires = [
+]
+
+if __name__ == '__main__':
+    setup(**setup_args, install_requires=install_requires)
\ No newline at end of file
diff --git a/RE/paddleext/test/__init__.py b/RE/paddleext/test/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/RE/paddleext/test/test_diagonal.py b/RE/paddleext/test/test_diagonal.py
new file mode 100644
index 00000000..80714ed1
--- /dev/null
+++ b/RE/paddleext/test/test_diagonal.py
@@ -0,0 +1,32 @@
+
+import numpy as np
+
+from chaos import paddle_ as paddle, torch_ as torch
+
+
+def test_diagonal():
+    import random
+
+    for rank in range(2, 6):
+        for test in range(10):
+            while True:
+                dim1 = random.randint(0, rank - 1)
+                dim2 = random.randint(0, rank - 1)
+                if dim1 != dim2:
+                    break
+
+            shape = [random.randint(5, 10) for _ in range(rank)]
+            offset = random.randint(-shape[dim1] + 1, shape[dim2])
+
+            x = np.random.rand(*shape)
+
+            torch_input = torch.from_numpy(x)
+            torch.fill_diagonal(torch_input, value=100, offset=offset, dim1=dim1, dim2=dim2)
+
+            paddle_input = paddle.from_numpy(x)
+            paddle.fill_diagonal(paddle_input, value=100, offset=offset, dim1=dim1, dim2=dim2)
+
+            paddle_out = paddle_input.numpy()
+            torch_out = torch_input.numpy()
+
+            assert np.sum(np.abs(paddle_out - torch_out)) < 1e-5
\ No newline at end of file
diff --git a/RE/paddleext/test/test_function.py b/RE/paddleext/test/test_function.py
new file mode 100644
index 00000000..db676d1b
--- /dev/null
+++ b/RE/paddleext/test/test_function.py
@@ -0,0 +1,13 @@
+
+
+
+from chaos.backend_.paddle_.functional import fill_diagonal
+import paddle
+
+def test_fill_diagnonal():
+
+    a = paddle.randn((5, 5))
+    fill_diagonal(a, float("-inf"))
+
+if __name__ == "__main__":
+    test_fill_diagnonal()
\ No newline at end of file
diff --git a/RE/paddleext/test/test_pad.py b/RE/paddleext/test/test_pad.py
new file mode 100644
index 00000000..6fb0e2bc
--- /dev/null
+++ b/RE/paddleext/test/test_pad.py
@@ -0,0 +1,29 @@
+
+import numpy as np
+
+from chaos.backend_ import paddle_ as paddle, torch_ as torch
+
+
+def test_pad():
+    import random
+
+    for ndim in range(2, 6):
+        for test in range(5):
+            shape = [random.randint(5, 10) for _ in range(ndim)]
+            x = np.random.rand(*shape)
+
+            torch_input = torch.from_numpy(x)
+            paddle_input = paddle.from_numpy(x)
+
+            for rank in range(1, ndim + 1):
+
+                pad = [random.randint(0, 10) for _ in range(rank)] + [random.randint(0, 10) for _ in range(rank)]
+
+                torch_output = torch.nn.functional.pad(torch_input, pad, mode='constant', value=0.0)
+
+                paddle_output = paddle.nn.functional.pad(paddle_input, pad, mode='constant', value=0.0)
+
+                paddle_out = paddle_output.numpy()
+                torch_out = torch_output.numpy()
+
+                assert np.allclose(paddle_out, torch_out)
\ No newline at end of file
diff --git a/RE/paddleext/test/test_scatter.py b/RE/paddleext/test/test_scatter.py
new file mode 100644
index 00000000..52c2d29a
--- /dev/null
+++ b/RE/paddleext/test/test_scatter.py
@@ -0,0 +1,146 @@
+from chaos.backend_ import paddle_ as paddle, torch_ as torch
+import numpy as np
+
+def test_scatter_1d():
+
+    x = np.random.rand(100)
+
+    indices = np.random.randint(low=0, high=100, size=50)
+    updates = np.random.rand(50)
+
+    paddle_out = paddle.scatter(paddle.from_numpy(x), 0, paddle.from_numpy(indices), paddle.from_numpy(updates))
+    torch_out = torch.scatter(torch.from_numpy(x), 0, torch.from_numpy(indices), torch.from_numpy(updates))
+
+    paddle_out = paddle_out.numpy()
+    torch_out = torch_out.numpy()
+
+    assert np.all(paddle_out == torch_out)
+
+
+def test_scatter_2d_dim0():
+
+    dim0 = 101
+    dim1 = 31
+    x = np.random.rand(dim0, dim1)
+
+    # for dim = 0
+
+    import random
+
+    indices = list(range(dim0))
+    random.shuffle(indices)
+    indices = np.array(indices[:50]).reshape((25, 2))
+    updates = np.random.rand(indices.shape[0], 2)
+
+    torch_out = torch.scatter(torch.from_numpy(x), 0, torch.from_numpy(indices), torch.from_numpy(updates))
+    paddle_out = paddle.scatter(paddle.from_numpy(x), 0, paddle.from_numpy(indices), paddle.from_numpy(updates))
+
+    paddle_out = paddle_out.numpy()
+    torch_out = torch_out.numpy()
+
+    assert np.allclose(paddle_out, torch_out)
+
+
+def test_scatter_2d_dim1():
+
+    dim0 = 101
+    dim1 = 131
+    x = np.random.rand(dim0, dim1)
+
+    # for dim = 0
+
+    import random
+
+    indices = list(range(dim1))
+    random.shuffle(indices)
+    indices = np.array(indices[:50]).reshape((25, 2))
+    updates = np.random.rand(indices.shape[0], 2)
+
+    torch_out = torch.scatter(torch.from_numpy(x), 1, torch.from_numpy(indices), torch.from_numpy(updates))
+    paddle_out = paddle.scatter(paddle.from_numpy(x), 1, paddle.from_numpy(indices), paddle.from_numpy(updates))
+
+    paddle_out = paddle_out.numpy()
+    torch_out = torch_out.numpy()
+
+    assert np.allclose(paddle_out, torch_out)
+
+
+def test_scatter_nd_dimm():
+    import random, math
+
+    for rank in range(1, 6):
+        for test in range(10):
+            dim = random.randint(0, rank-1)
+
+            shape = [random.randint(5, 10) for _ in range(rank)]
+
+            indice_shape = [random.randint(5, 10) for _ in range(rank)]
+            indice_shape = [min(shape[i], indice_shape[i]) for i in range(rank)]
+            indice_numel = math.prod(indice_shape)
+
+            shape[dim] = 2 * indice_numel
+
+            x = np.random.rand(*shape)
+
+            indice_value = list(range(shape[dim]))
+            random.shuffle(indice_value)
+
+            indices = np.array(indice_value[:indice_numel]).reshape(indice_shape)
+            updates = np.random.rand(*indice_shape)
+
+            torch_out = torch.scatter(torch.from_numpy(x), dim, torch.from_numpy(indices), torch.from_numpy(updates))
+            paddle_out = paddle.scatter(paddle.from_numpy(x), dim, paddle.from_numpy(indices), paddle.from_numpy(updates))
+
+            paddle_out = paddle_out.numpy()
+            torch_out = torch_out.numpy()
+
+            assert np.allclose(paddle_out, torch_out)
+
+def test_scatter_add_1d():
+
+    x = np.random.rand(100)
+
+    indices = np.random.randint(low=0, high=100, size=50)
+    updates = np.random.rand(50)
+
+    paddle_out = paddle.scatter_add(paddle.from_numpy(x), 0, paddle.from_numpy(indices), paddle.from_numpy(updates))
+    torch_out = torch.scatter_add(torch.from_numpy(x), 0, torch.from_numpy(indices), torch.from_numpy(updates))
+
+    paddle_out = paddle_out.numpy()
+    torch_out = torch_out.numpy()
+
+    assert np.all(paddle_out == torch_out)
+
+def test_scatter_add_nd_dimm():
+    import random, math
+
+    for rank in range(1, 6):
+        for test in range(10):
+            dim = random.randint(0, rank-1)
+
+            shape = [random.randint(5, 10) for _ in range(rank)]
+
+            indice_shape = [random.randint(5, 10) for _ in range(rank)]
+            indice_shape = [min(shape[i], indice_shape[i]) for i in range(rank)]
+            indice_numel = math.prod(indice_shape)
+
+            shape[dim] = 2 * indice_numel
+
+            x = np.random.rand(*shape)
+
+
+            indice_value = list(range(shape[dim]))
+            random.shuffle(indice_value)
+
+            indices = np.array(indice_value[:indice_numel]).reshape(indice_shape)
+
+            # indices = np.random.randint(0, shape[dim], size=indice_shape)
+            updates = np.random.rand(*indice_shape)
+
+            torch_out = torch.scatter_add(torch.from_numpy(x), dim, torch.from_numpy(indices), torch.from_numpy(updates))
+            paddle_out = paddle.scatter_add(paddle.from_numpy(x), dim, paddle.from_numpy(indices), paddle.from_numpy(updates))
+
+            paddle_out = paddle_out.numpy()
+            torch_out = torch_out.numpy()
+
+            assert np.allclose(paddle_out, torch_out)
\ No newline at end of file
diff --git a/RE/paddlemetric/.gitignore b/RE/paddlemetric/.gitignore
new file mode 100644
index 00000000..2f78cf5b
--- /dev/null
+++ b/RE/paddlemetric/.gitignore
@@ -0,0 +1,2 @@
+*.pyc
+
diff --git a/RE/paddlemetric/.ignore b/RE/paddlemetric/.ignore
new file mode 100644
index 00000000..0d20b648
--- /dev/null
+++ b/RE/paddlemetric/.ignore
@@ -0,0 +1 @@
+*.pyc
diff --git a/RE/paddlemetric/CHANGELOG.md b/RE/paddlemetric/CHANGELOG.md
new file mode 100644
index 00000000..a843ec77
--- /dev/null
+++ b/RE/paddlemetric/CHANGELOG.md
@@ -0,0 +1,20 @@
+Changelog
+===
+以下记录了项目中所有值得关注的变更内容，其格式基于[Keep a Changelog]。
+
+本项目版本遵守[Semantic Versioning]和[PEP-440]。
+
+## [v1.0]- 2022-07-04
+---
+### Added
+- Support the testing of some classification modules
+### Changed
+
+
+
+
+[v1.0]: https://console.cloud.baidu-int.com/devops/icode/repos/baidu/ccl/torch2paddle/commits/7476c4f8477d6161f8d5aaaf78f47d6bee990d42
+
+[Keep a Changelog]: https://keepachangelog.com/zh-CN/1.0.0/
+[Semantic Versioning]: https://semver.org/lang/zh-CN/
+[PEP-440]: https://www.python.org/dev/peps/pep-0440/
diff --git a/RE/paddlemetric/src/README.md b/RE/paddlemetric/src/README.md
new file mode 100644
index 00000000..1b8004fc
--- /dev/null
+++ b/RE/paddlemetric/src/README.md
@@ -0,0 +1,28 @@
+# Paddle Metrics
+
+Metrics library for paddle, porting from torch metrics 
+## Install 
+
+pip install http://public.bcc-bdbl.baidu.com:8000/Package/paddlemetrics-1.0.0b0-py3-none-any.whl
+
+## Document
+
+### Requirements
+
+* paddleextension
+
+### Progress 
+
+Testing progress:
+
+### Classification 
+
+* classification/test_accuracy.py 
+* classification/test_f_beta.py 
+* classification/test_precision_recall.py 
+* classification/test_stat_scores.py 
+
+### functional
+
+* functional/test_classification.py 
+
diff --git a/RE/paddlemetric/src/paddlemetrics/__about__.py b/RE/paddlemetric/src/paddlemetrics/__about__.py
new file mode 100644
index 00000000..53a9cfa4
--- /dev/null
+++ b/RE/paddlemetric/src/paddlemetrics/__about__.py
@@ -0,0 +1,27 @@
+__version__ = "0.6.0dev"
+__author__ = "PyTorchLightning et al."
+__author_email__ = "name@pytorchlightning.ai"
+__license__ = "Apache-2.0"
+__copyright__ = f"Copyright (c) 2020-2021, {__author__}."
+__homepage__ = "https://github.com/PyTorchLightning/metrics"
+__docs__ = "PyTorch native Metrics"
+__docs_url__ = "https://paddlemetrics.readthedocs.io/en/stable/"
+__long_doc__ = """
+paddlemetrics is a metrics API created for easy metric development and usage in both PyTorch and
+[PyTorch Lightning](https://pytorch-lightning.readthedocs.io/en/stable/). It was originally a part of
+Pytorch Lightning, but got split off so users could take advantage of the large collection of metrics
+implemented without having to install Pytorch Lightning (even though we would love for you to try it out).
+We currently have around 25+ metrics implemented and we continuously is adding more metrics, both within
+already covered domains (classification, regression ect.) but also new domains (object detection ect.).
+We make sure that all our metrics are rigorously tested such that you can trust them.
+"""
+
+__all__ = [
+    "__author__",
+    "__author_email__",
+    "__copyright__",
+    "__docs__",
+    "__homepage__",
+    "__license__",
+    "__version__",
+]
diff --git a/RE/paddlemetric/src/paddlemetrics/__init__.py b/RE/paddlemetric/src/paddlemetrics/__init__.py
new file mode 100644
index 00000000..ea557086
--- /dev/null
+++ b/RE/paddlemetric/src/paddlemetrics/__init__.py
@@ -0,0 +1,143 @@
+r"""Root package info."""
+import logging as __logging
+import os
+import sys
+
+from paddlemetrics.__about__ import *  # noqa: F401, F403
+
+_logger = __logging.getLogger("paddlemetrics")
+_logger.addHandler(__logging.StreamHandler())
+_logger.setLevel(__logging.INFO)
+
+_PACKAGE_ROOT = os.path.dirname(__file__)
+_PROJECT_ROOT = os.path.dirname(_PACKAGE_ROOT)
+
+from paddlemetrics import functional  # noqa: E402
+from paddlemetrics.aggregation import CatMetric, MaxMetric, MeanMetric, MinMetric, SumMetric  # noqa: E402
+from paddlemetrics.audio import PESQ, PIT, SI_SDR, SI_SNR, SNR, STOI  # noqa: E402
+from paddlemetrics.classification import (  # noqa: E402
+    AUC,
+    AUROC,
+    F1,
+    ROC,
+    Accuracy,
+    AveragePrecision,
+    BinnedAveragePrecision,
+    BinnedPrecisionRecallCurve,
+    BinnedRecallAtFixedPrecision,
+    CalibrationError,
+    CohenKappa,
+    ConfusionMatrix,
+    FBeta,
+    HammingDistance,
+    Hinge,
+    IoU,
+    KLDivergence,
+    MatthewsCorrcoef,
+    Precision,
+    PrecisionRecallCurve,
+    Recall,
+    Specificity,
+    StatScores,
+)
+from paddlemetrics.collections import MetricCollection  # noqa: E402
+#from paddlemetrics.image import FID, IS, KID, LPIPS, PSNR, SSIM  # noqa: E402
+from paddlemetrics.metric import Metric  # noqa: E402
+from paddlemetrics.regression import (  # noqa: E402
+    CosineSimilarity,
+    ExplainedVariance,
+    MeanAbsoluteError,
+    MeanAbsolutePercentageError,
+    MeanSquaredError,
+    MeanSquaredLogError,
+    PearsonCorrcoef,
+    R2Score,
+    SpearmanCorrcoef,
+    SymmetricMeanAbsolutePercentageError,
+    TweedieDevianceScore,
+)
+from paddlemetrics.retrieval import (  # noqa: E402
+    RetrievalFallOut,
+    RetrievalHitRate,
+    RetrievalMAP,
+    RetrievalMRR,
+    RetrievalNormalizedDCG,
+    RetrievalPrecision,
+    RetrievalRecall,
+    RetrievalRPrecision,
+)
+from paddlemetrics.text import WER, BLEUScore, ROUGEScore, SacreBLEUScore  # noqa: E402  BERTScore,
+from paddlemetrics.wrappers import BootStrapper, MetricTracker, MultioutputWrapper  # noqa: E402
+
+__all__ = [
+    "functional",
+    "Accuracy",
+    "AUC",
+    "AUROC",
+    "AveragePrecision",
+    "BinnedAveragePrecision",
+    "BinnedPrecisionRecallCurve",
+    "BinnedRecallAtFixedPrecision",
+#    "BERTScore",
+    "BLEUScore",
+    "BootStrapper",
+    "CalibrationError",
+    "CatMetric",
+    "CohenKappa",
+    "ConfusionMatrix",
+    "CosineSimilarity",
+    "TweedieDevianceScore",
+    "ExplainedVariance",
+    "F1",
+    "FBeta",
+#    "FID",
+    "HammingDistance",
+    "Hinge",
+    "IoU",
+#    "IS",
+#    "KID",
+    "KLDivergence",
+#    "LPIPS",
+    "MatthewsCorrcoef",
+    "MaxMetric",
+    "MeanAbsoluteError",
+    "MeanAbsolutePercentageError",
+    "MeanMetric",
+    "MeanSquaredError",
+    "MeanSquaredLogError",
+    "Metric",
+    "MetricCollection",
+    "MetricTracker",
+    "MinMetric",
+    "MultioutputWrapper",
+    "PearsonCorrcoef",
+    "PESQ",
+    "PIT",
+    "Precision",
+    "PrecisionRecallCurve",
+#    "PSNR",
+    "R2Score",
+    "Recall",
+    "RetrievalFallOut",
+    "RetrievalHitRate",
+    "RetrievalMAP",
+    "RetrievalMRR",
+    "RetrievalNormalizedDCG",
+    "RetrievalPrecision",
+    "RetrievalRecall",
+    "RetrievalRPrecision",
+    "ROC",
+    "ROUGEScore",
+    "SacreBLEUScore",
+    "SI_SDR",
+    "SI_SNR",
+    "SNR",
+    "SpearmanCorrcoef",
+    "Specificity",
+#    "SSIM",
+    "StatScores",
+    "STOI",
+    "SumMetric",
+    "SymmetricMeanAbsolutePercentageError",
+    "WER",
+]
diff --git a/RE/paddlemetric/src/paddlemetrics/aggregation.py b/RE/paddlemetric/src/paddlemetrics/aggregation.py
new file mode 100644
index 00000000..a95c51c0
--- /dev/null
+++ b/RE/paddlemetric/src/paddlemetrics/aggregation.py
@@ -0,0 +1,445 @@
+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import warnings
+from typing import Any, Callable, List, Optional, Union
+
+import paddleext.torchapi as B
+from paddleext.torchapi import  Tensor
+
+from paddlemetrics.metric import Metric
+from paddlemetrics.utilities.data import dim_zero_cat
+
+
+class BaseAggregator(Metric):
+    """Base class for aggregation metrics.
+
+    Args:
+        fn: string specifying the reduction function
+        default_value: default tensor value to use for the metric state
+        nan_strategy: options:
+            - ``'error'``: if any `nan` values are encounted will give a RuntimeError
+            - ``'warn'``: if any `nan` values are encounted will give a warning and continue
+            - ``'ignore'``: all `nan` values are silently removed
+            - a float: if a float is provided will impude any `nan` values with this value
+
+        compute_on_step:
+            Forward only calls ``update()`` and returns None if this is
+            set to False. default: True
+        dist_sync_on_step:
+            Synchronize metric state across processes at each ``forward()``
+            before returning the value at the step.
+        process_group:
+            Specify the process group on which synchronization is called.
+            default: None (which selects the entire world)
+        dist_sync_fn:
+            Callback that performs the allgather operation on the metric state.
+            When `None`, DDP will be used to perform the allgather.
+
+    Raises:
+        ValueError:
+            If ``nan_strategy`` is not one of ``error``, ``warn``, ``ignore`` or a float
+    """
+
+    value: Tensor
+    is_differentiable = None
+    higher_is_better = None
+
+    def __init__(
+        self,
+        fn: Union[Callable, str],
+        default_value: Union[Tensor, List],
+        nan_strategy: Union[str, float] = "error",
+        compute_on_step: bool = True,
+        dist_sync_on_step: bool = False,
+        process_group: Optional[Any] = None,
+        dist_sync_fn: Callable = None,
+    ):
+        super().__init__(
+            compute_on_step=compute_on_step,
+            dist_sync_on_step=dist_sync_on_step,
+            process_group=process_group,
+            dist_sync_fn=dist_sync_fn,
+        )
+        allowed_nan_strategy = ("error", "warn", "ignore")
+        if nan_strategy not in allowed_nan_strategy and not isinstance(nan_strategy, float):
+            raise ValueError(
+                f"Arg `nan_strategy` should either be a float or one of {allowed_nan_strategy}"
+                f" but got {nan_strategy}."
+            )
+
+        self.nan_strategy = nan_strategy
+        self.add_state("value", default=default_value, dist_reduce_fx=fn)
+
+    def _cast_and_nan_check_input(self, x: Union[float, Tensor]) -> Tensor:
+        """Converts input x to a tensor if not already and afterwards checks for nans that either give an error,
+        warning or just ignored."""
+        if not isinstance(x, Tensor):
+            x = B.as_tensor(x, dtype=B.float32, device=self.device)
+
+        nans = B.isnan(x)
+        if any(nans.flatten()):
+            if self.nan_strategy == "error":
+                raise RuntimeError("Encounted `nan` values in tensor")
+            if self.nan_strategy == "warn":
+                warnings.warn("Encounted `nan` values in tensor. Will be removed.", UserWarning)
+                x = x[~nans]
+            elif self.nan_strategy == "ignore":
+                x = x[~nans]
+            else:
+                x[nans] = self.nan_strategy
+
+        return x.float()
+
+    def update(self, value: Union[float, Tensor]) -> None:  # type: ignore
+        """Overwrite in child class."""
+        pass
+
+    def compute(self) -> Tensor:
+        """Compute the aggregated value."""
+        return self.value.squeeze() if isinstance(self.value, Tensor) else self.value
+
+
+class MaxMetric(BaseAggregator):
+    """Aggregate a stream of value into their maximum value.
+
+    Args:
+        nan_strategy: options:
+            - ``'error'``: if any `nan` values are encounted will give a RuntimeError
+            - ``'warn'``: if any `nan` values are encounted will give a warning and continue
+            - ``'ignore'``: all `nan` values are silently removed
+            - a float: if a float is provided will impude any `nan` values with this value
+
+        compute_on_step:
+            Forward only calls ``update()`` and returns None if this is
+            set to False. default: True
+        dist_sync_on_step:
+            Synchronize metric state across processes at each ``forward()``
+            before returning the value at the step.
+        process_group:
+            Specify the process group on which synchronization is called.
+            default: None (which selects the entire world)
+        dist_sync_fn:
+            Callback that performs the allgather operation on the metric state.
+            When `None`, DDP will be used to perform the allgather.
+
+    Raises:
+        ValueError:
+            If ``nan_strategy`` is not one of ``error``, ``warn``, ``ignore`` or a float
+
+    Example:
+        >>> from paddlemetrics import MaxMetric
+        >>> metric = MaxMetric()
+        >>> metric.update(1)
+        >>> metric.update(B.tensor([2, 3]))
+        >>> metric.compute()
+        tensor(3.)
+    """
+
+    def __init__(
+        self,
+        nan_strategy: Union[str, float] = "warn",
+        compute_on_step: bool = True,
+        dist_sync_on_step: bool = False,
+        process_group: Optional[Any] = None,
+        dist_sync_fn: Callable = None,
+    ):
+        super().__init__(
+            "max",
+            -B.tensor(float("inf")),
+            nan_strategy,
+            compute_on_step,
+            dist_sync_on_step,
+            process_group,
+            dist_sync_fn,
+        )
+
+    def update(self, value: Union[float, Tensor]) -> None:  # type: ignore
+        """Update state with data.
+
+        Args:
+            value: Either a float or tensor containing data. Additional tensor
+                dimensions will be flattened
+        """
+        value = self._cast_and_nan_check_input(value)
+        if any(value.flatten()):  # make sure tensor not empty
+            self.value = B.max(self.value, B.max(value))
+
+
+class MinMetric(BaseAggregator):
+    """Aggregate a stream of value into their minimum value.
+
+    Args:
+        nan_strategy: options:
+            - ``'error'``: if any `nan` values are encounted will give a RuntimeError
+            - ``'warn'``: if any `nan` values are encounted will give a warning and continue
+            - ``'ignore'``: all `nan` values are silently removed
+            - a float: if a float is provided will impude any `nan` values with this value
+
+        compute_on_step:
+            Forward only calls ``update()`` and returns None if this is
+            set to False. default: True
+        dist_sync_on_step:
+            Synchronize metric state across processes at each ``forward()``
+            before returning the value at the step.
+        process_group:
+            Specify the process group on which synchronization is called.
+            default: None (which selects the entire world)
+        dist_sync_fn:
+            Callback that performs the allgather operation on the metric state.
+            When `None`, DDP will be used to perform the allgather.
+
+    Raises:
+        ValueError:
+            If ``nan_strategy`` is not one of ``error``, ``warn``, ``ignore`` or a float
+
+    Example:
+        >>> from paddlemetrics import MinMetric
+        >>> metric = MinMetric()
+        >>> metric.update(1)
+        >>> metric.update(B.tensor([2, 3]))
+        >>> metric.compute()
+        tensor(1.)
+    """
+
+    def __init__(
+        self,
+        nan_strategy: Union[str, float] = "warn",
+        compute_on_step: bool = True,
+        dist_sync_on_step: bool = False,
+        process_group: Optional[Any] = None,
+        dist_sync_fn: Callable = None,
+    ):
+        super().__init__(
+            "min",
+            B.tensor(float("inf")),
+            nan_strategy,
+            compute_on_step,
+            dist_sync_on_step,
+            process_group,
+            dist_sync_fn,
+        )
+
+    def update(self, value: Union[float, Tensor]) -> None:  # type: ignore
+        """Update state with data.
+
+        Args:
+            value: Either a float or tensor containing data. Additional tensor
+                dimensions will be flattened
+        """
+        value = self._cast_and_nan_check_input(value)
+        if any(value.flatten()):  # make sure tensor not empty
+            self.value = B.min(self.value, B.min(value))
+
+
+class SumMetric(BaseAggregator):
+    """Aggregate a stream of value into their sum.
+
+    Args:
+        nan_strategy: options:
+            - ``'error'``: if any `nan` values are encounted will give a RuntimeError
+            - ``'warn'``: if any `nan` values are encounted will give a warning and continue
+            - ``'ignore'``: all `nan` values are silently removed
+            - a float: if a float is provided will impude any `nan` values with this value
+
+        compute_on_step:
+            Forward only calls ``update()`` and returns None if this is
+            set to False. default: True
+        dist_sync_on_step:
+            Synchronize metric state across processes at each ``forward()``
+            before returning the value at the step.
+        process_group:
+            Specify the process group on which synchronization is called.
+            default: None (which selects the entire world)
+        dist_sync_fn:
+            Callback that performs the allgather operation on the metric state.
+            When `None`, DDP will be used to perform the allgather.
+
+    Raises:
+        ValueError:
+            If ``nan_strategy`` is not one of ``error``, ``warn``, ``ignore`` or a float
+
+    Example:
+        >>> from paddlemetrics import SumMetric
+        >>> metric = SumMetric()
+        >>> metric.update(1)
+        >>> metric.update(B.tensor([2, 3]))
+        >>> metric.compute()
+        tensor(6.)
+    """
+
+    def __init__(
+        self,
+        nan_strategy: Union[str, float] = "warn",
+        compute_on_step: bool = True,
+        dist_sync_on_step: bool = False,
+        process_group: Optional[Any] = None,
+        dist_sync_fn: Callable = None,
+    ):
+        super().__init__(
+            "sum", B.zeros(1), nan_strategy, compute_on_step, dist_sync_on_step, process_group, dist_sync_fn
+        )
+
+    def update(self, value: Union[float, Tensor]) -> None:  # type: ignore
+        """Update state with data.
+
+        Args:
+            value: Either a float or tensor containing data. Additional tensor
+                dimensions will be flattened
+        """
+        value = self._cast_and_nan_check_input(value)
+        self.value += value.sum()
+
+
+class CatMetric(BaseAggregator):
+    """Concatenate a stream of values.
+
+    Args:
+        nan_strategy: options:
+            - ``'error'``: if any `nan` values are encounted will give a RuntimeError
+            - ``'warn'``: if any `nan` values are encounted will give a warning and continue
+            - ``'ignore'``: all `nan` values are silently removed
+            - a float: if a float is provided will impude any `nan` values with this value
+
+        compute_on_step:
+            Forward only calls ``update()`` and returns None if this is
+            set to False. default: True
+        dist_sync_on_step:
+            Synchronize metric state across processes at each ``forward()``
+            before returning the value at the step.
+        process_group:
+            Specify the process group on which synchronization is called.
+            default: None (which selects the entire world)
+        dist_sync_fn:
+            Callback that performs the allgather operation on the metric state.
+            When `None`, DDP will be used to perform the allgather.
+
+    Raises:
+        ValueError:
+            If ``nan_strategy`` is not one of ``error``, ``warn``, ``ignore`` or a float
+
+    Example:
+        >>> from paddlemetrics import CatMetric
+        >>> metric = CatMetric()
+        >>> metric.update(1)
+        >>> metric.update(B.tensor([2, 3]))
+        >>> metric.compute()
+        tensor([1., 2., 3.])
+    """
+
+    def __init__(
+        self,
+        nan_strategy: Union[str, float] = "warn",
+        compute_on_step: bool = True,
+        dist_sync_on_step: bool = False,
+        process_group: Optional[Any] = None,
+        dist_sync_fn: Callable = None,
+    ):
+        super().__init__("cat", [], nan_strategy, compute_on_step, dist_sync_on_step, process_group, dist_sync_fn)
+
+    def update(self, value: Union[float, Tensor]) -> None:  # type: ignore
+        """Update state with data.
+
+        Args:
+            value: Either a float or tensor containing data. Additional tensor
+                dimensions will be flattened
+        """
+        value = self._cast_and_nan_check_input(value)
+        if any(value.flatten()):
+            self.value.append(value)
+
+    def compute(self) -> Tensor:
+        """Compute the aggregated value."""
+        if isinstance(self.value, list) and self.value:
+            return dim_zero_cat(self.value)
+        return self.value
+
+
+class MeanMetric(BaseAggregator):
+    """Aggregate a stream of value into their mean value.
+
+    Args:
+       nan_strategy: options:
+            - ``'error'``: if any `nan` values are encounted will give a RuntimeError
+            - ``'warn'``: if any `nan` values are encounted will give a warning and continue
+            - ``'ignore'``: all `nan` values are silently removed
+            - a float: if a float is provided will impude any `nan` values with this value
+
+        compute_on_step:
+            Forward only calls ``update()`` and returns None if this is
+            set to False. default: True
+        dist_sync_on_step:
+            Synchronize metric state across processes at each ``forward()``
+            before returning the value at the step.
+        process_group:
+            Specify the process group on which synchronization is called.
+            default: None (which selects the entire world)
+        dist_sync_fn:
+            Callback that performs the allgather operation on the metric state.
+            When `None`, DDP will be used to perform the allgather.
+
+    Raises:
+        ValueError:
+            If ``nan_strategy`` is not one of ``error``, ``warn``, ``ignore`` or a float
+
+    Example:
+        >>> from paddlemetrics import MeanMetric
+        >>> metric = MeanMetric()
+        >>> metric.update(1)
+        >>> metric.update(B.tensor([2, 3]))
+        >>> metric.compute()
+        tensor([2.])
+    """
+
+    def __init__(
+        self,
+        nan_strategy: Union[str, float] = "warn",
+        compute_on_step: bool = True,
+        dist_sync_on_step: bool = False,
+        process_group: Optional[Any] = None,
+        dist_sync_fn: Callable = None,
+    ):
+        super().__init__(
+            "sum", B.zeros(1), nan_strategy, compute_on_step, dist_sync_on_step, process_group, dist_sync_fn
+        )
+        self.add_state("weight", default=B.zeros(1), dist_reduce_fx="sum")
+
+    def update(self, value: Union[float, Tensor], weight: Union[float, Tensor] = 1.0) -> None:  # type: ignore
+        """Update state with data.
+
+        Args:
+            value: Either a float or tensor containing data. Additional tensor
+                dimensions will be flattened
+            weight: Either a float or tensor containing weights for calculating
+                the average. Shape of weight should be able to broadcast with
+                the shape of `value`. Default to `1.0` corresponding to simple
+                harmonic average.
+        """
+        value = self._cast_and_nan_check_input(value)
+        weight = self._cast_and_nan_check_input(weight)
+
+        # broadcast weight to values shape
+        if not hasattr(B, "broadcast_to"):
+            if weight.shape == ():
+                weight = B.ones_like(value) * weight
+            if weight.shape != value.shape:
+                raise ValueError("Broadcasting not supported on PyTorch <1.8")
+        else:
+            weight = B.broadcast_to(weight, value.shape)
+
+        self.value += (value * weight).sum()
+        self.weight += weight.sum()
+
+    def compute(self) -> Tensor:
+        """Compute the aggregated value."""
+        return self.value / self.weight
diff --git a/RE/paddlemetric/src/paddlemetrics/audio/__init__.py b/RE/paddlemetric/src/paddlemetrics/audio/__init__.py
new file mode 100644
index 00000000..efd0b451
--- /dev/null
+++ b/RE/paddlemetric/src/paddlemetrics/audio/__init__.py
@@ -0,0 +1,19 @@
+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from paddlemetrics.audio.pesq import PESQ  # noqa: F401
+from paddlemetrics.audio.pit import PIT  # noqa: F401
+from paddlemetrics.audio.si_sdr import SI_SDR  # noqa: F401
+from paddlemetrics.audio.si_snr import SI_SNR  # noqa: F401
+from paddlemetrics.audio.snr import SNR  # noqa: F401
+from paddlemetrics.audio.stoi import STOI  # noqa: F401
diff --git a/RE/paddlemetric/src/paddlemetrics/audio/pesq.py b/RE/paddlemetric/src/paddlemetrics/audio/pesq.py
new file mode 100644
index 00000000..d45fab53
--- /dev/null
+++ b/RE/paddlemetric/src/paddlemetrics/audio/pesq.py
@@ -0,0 +1,130 @@
+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from typing import Any, Callable, Optional
+
+from paddleext.torchapi import  Tensor, tensor
+
+from paddlemetrics.functional.audio.pesq import pesq
+from paddlemetrics.metric import Metric
+from paddlemetrics.utilities.imports import _PESQ_AVAILABLE
+
+
+class PESQ(Metric):
+    """PESQ (Perceptual Evaluation of Speech Quality)
+
+    This is a wrapper for the pesq package [1]. . Note that input will be moved to `cpu`
+    to perform the metric calculation.
+
+    .. note:: using this metrics requires you to have ``pesq`` install. Either install as ``pip install
+        paddlemetrics[audio]`` or ``pip install pesq``
+
+    Forward accepts
+
+    - ``preds``: ``shape [...,time]``
+    - ``target``: ``shape [...,time]``
+
+    Args:
+        fs:
+            sampling frequency, should be 16000 or 8000 (Hz)
+        mode:
+            'wb' (wide-band) or 'nb' (narrow-band)
+        keep_same_device:
+            whether to move the pesq value to the device of preds
+        compute_on_step:
+            Forward only calls ``update()`` and return ``None`` if this is set to ``False``.
+        dist_sync_on_step:
+            Synchronize metric state across processes at each ``forward()``
+            before returning the value at the step
+        process_group:
+            Specify the process group on which synchronization is called.
+            default: ``None`` (which selects the entire world)
+        dist_sync_fn:
+            Callback that performs the allgather operation on the metric state. When ``None``, DDP
+            will be used to perform the allgather
+
+    Raises:
+        ValueError:
+            If ``peqs`` package is not installed
+        ValueError:
+            If ``fs`` is not either  ``8000`` or ``16000``
+        ValueError:
+            If ``mode`` is not either ``"wb"`` or ``"nb"``
+
+    Example:
+        >>> from paddlemetrics.audio import PESQ
+        >>> import torchapi as B
+        >>> g = B.manual_seed(1)
+        >>> preds = B.randn(8000)
+        >>> target = B.randn(8000)
+        >>> nb_pesq = PESQ(8000, 'nb')
+        >>> nb_pesq(preds, target)
+        tensor(2.2076)
+        >>> wb_pesq = PESQ(16000, 'wb')
+        >>> wb_pesq(preds, target)
+        tensor(1.7359)
+
+    References:
+        [1] https://github.com/ludlows/python-pesq
+    """
+
+    sum_pesq: Tensor
+    total: Tensor
+    is_differentiable = False
+    higher_is_better = True
+
+    def __init__(
+        self,
+        fs: int,
+        mode: str,
+        compute_on_step: bool = True,
+        dist_sync_on_step: bool = False,
+        process_group: Optional[Any] = None,
+        dist_sync_fn: Optional[Callable[[Tensor], Tensor]] = None,
+    ) -> None:
+        super().__init__(
+            compute_on_step=compute_on_step,
+            dist_sync_on_step=dist_sync_on_step,
+            process_group=process_group,
+            dist_sync_fn=dist_sync_fn,
+        )
+        if not _PESQ_AVAILABLE:
+            raise ValueError(
+                "PESQ metric requires that pesq is installed."
+                "Either install as `pip install paddlemetrics[audio]` or `pip install pesq`"
+            )
+        if fs not in (8000, 16000):
+            raise ValueError(f"Expected argument `fs` to either be 8000 or 16000 but got {fs}")
+        self.fs = fs
+        if mode not in ("wb", "nb"):
+            raise ValueError(f"Expected argument `mode` to either be 'wb' or 'nb' but got {mode}")
+        self.mode = mode
+
+        self.add_state("sum_pesq", default=tensor(0.0), dist_reduce_fx="sum")
+        self.add_state("total", default=tensor(0), dist_reduce_fx="sum")
+
+    def update(self, preds: Tensor, target: Tensor) -> None:  # type: ignore
+        """Update state with predictions and targets.
+
+        Args:
+            preds: Predictions from model
+            target: Ground truth values
+        """
+        pesq_batch = pesq(preds, target, self.fs, self.mode, False).to(self.sum_pesq.device)
+
+        self.sum_pesq += pesq_batch.sum()
+        self.total += pesq_batch.numel()
+
+    def compute(self) -> Tensor:
+        """Computes average PESQ."""
+        return self.sum_pesq / self.total
diff --git a/RE/paddlemetric/src/paddlemetrics/audio/pit.py b/RE/paddlemetric/src/paddlemetrics/audio/pit.py
new file mode 100644
index 00000000..9d9dc757
--- /dev/null
+++ b/RE/paddlemetric/src/paddlemetrics/audio/pit.py
@@ -0,0 +1,113 @@
+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from typing import Any, Callable, Dict, Optional
+
+from paddleext.torchapi import  Tensor, tensor
+
+from paddlemetrics.functional.audio.pit import pit
+from paddlemetrics.metric import Metric
+
+
+class PIT(Metric):
+    """Permutation invariant training (PIT). The PIT implements the famous Permutation Invariant Training method.
+
+    [1] in speech separation field in order to calculate audio metrics in a permutation invariant way.
+
+    Forward accepts
+
+    - ``preds``: ``shape [batch, spk, ...]``
+    - ``target``: ``shape [batch, spk, ...]``
+
+    Args:
+        metric_func:
+            a metric function accept a batch of target and estimate, i.e. metric_func(preds[:, i, ...],
+            target[:, j, ...]), and returns a batch of metric tensors [batch]
+        eval_func:
+            the function to find the best permutation, can be 'min' or 'max', i.e. the smaller the better
+            or the larger the better.
+        compute_on_step:
+            Forward only calls ``update()`` and returns None if this is set to False. default: True
+        dist_sync_on_step:
+            Synchronize metric state across processes at each ``forward()``
+            before returning the value at the step.
+        process_group:
+            Specify the process group on which synchronization is called. default: None (which selects the entire world)
+        dist_sync_fn:
+            Callback that performs the allgather operation on the metric state. When `None`, DDP
+            will be used to perform the allgather.
+        kwargs:
+            additional args for metric_func
+
+    Returns:
+        average PIT metric
+
+    Example:
+        >>> import torchapi as B
+        >>> from paddlemetrics import PIT
+        >>> from paddlemetrics.functional import si_snr
+        >>> _ = B.manual_seed(42)
+        >>> preds = B.randn(3, 2, 5) # [batch, spk, time]
+        >>> target = B.randn(3, 2, 5) # [batch, spk, time]
+        >>> pit = PIT(si_snr, 'max')
+        >>> pit(preds, target)
+        tensor(-2.1065)
+
+    Reference:
+        [1]	D. Yu, M. Kolbaek, Z.-H. Tan, J. Jensen, Permutation invariant training of deep models for
+        speaker-independent multi-talker speech separation, in: 2017 IEEE Int. Conf. Acoust. Speech
+        Signal Process. ICASSP, IEEE, New Orleans, LA, 2017: pp. 241–245. https://doi.org/10.1109/ICASSP.2017.7952154.
+    """
+
+    is_differentiable = True
+    sum_pit_metric: Tensor
+    total: Tensor
+
+    def __init__(
+        self,
+        metric_func: Callable,
+        eval_func: str = "max",
+        compute_on_step: bool = True,
+        dist_sync_on_step: bool = False,
+        process_group: Optional[Any] = None,
+        dist_sync_fn: Optional[Callable[[Tensor], Tensor]] = None,
+        **kwargs: Dict[str, Any],
+    ) -> None:
+        super().__init__(
+            compute_on_step=compute_on_step,
+            dist_sync_on_step=dist_sync_on_step,
+            process_group=process_group,
+            dist_sync_fn=dist_sync_fn,
+        )
+        self.metric_func = metric_func
+        self.eval_func = eval_func
+        self.kwargs = kwargs
+
+        self.add_state("sum_pit_metric", default=tensor(0.0), dist_reduce_fx="sum")
+        self.add_state("total", default=tensor(0), dist_reduce_fx="sum")
+
+    def update(self, preds: Tensor, target: Tensor) -> None:  # type: ignore
+        """Update state with predictions and targets.
+
+        Args:
+            preds: Predictions from model
+            target: Ground truth values
+        """
+        pit_metric = pit(preds, target, self.metric_func, self.eval_func, **self.kwargs)[0]
+
+        self.sum_pit_metric += pit_metric.sum()
+        self.total += pit_metric.numel()
+
+    def compute(self) -> Tensor:
+        """Computes average PIT metric."""
+        return self.sum_pit_metric / self.total
diff --git a/RE/paddlemetric/src/paddlemetrics/audio/si_sdr.py b/RE/paddlemetric/src/paddlemetrics/audio/si_sdr.py
new file mode 100644
index 00000000..f6a46378
--- /dev/null
+++ b/RE/paddlemetric/src/paddlemetrics/audio/si_sdr.py
@@ -0,0 +1,105 @@
+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from typing import Any, Callable, Optional
+
+from paddleext.torchapi import  Tensor, tensor
+
+from paddlemetrics.functional.audio.si_sdr import si_sdr
+from paddlemetrics.metric import Metric
+
+
+class SI_SDR(Metric):
+    """Scale-invariant signal-to-distortion ratio (SI-SDR). The SI-SDR value is in general considered an overall
+    measure of how good a source sound.
+
+    Forward accepts
+
+    - ``preds``: ``shape [...,time]``
+    - ``target``: ``shape [...,time]``
+
+    Args:
+        zero_mean:
+            if to zero mean target and preds or not
+        compute_on_step:
+            Forward only calls ``update()`` and returns None if this is set to False. default: True
+        dist_sync_on_step:
+            Synchronize metric state across processes at each ``forward()``
+            before returning the value at the step.
+        process_group:
+            Specify the process group on which synchronization is called. default: None (which selects the entire world)
+        dist_sync_fn:
+            Callback that performs the allgather operation on the metric state. When `None`, DDP
+            will be used to perform the allgather.
+
+    Raises:
+        TypeError:
+            if target and preds have a different shape
+
+    Returns:
+        average si-sdr value
+
+    Example:
+        >>> import torchapi as B
+        >>> from paddlemetrics import SI_SDR
+        >>> target = B.tensor([3.0, -0.5, 2.0, 7.0])
+        >>> preds = B.tensor([2.5, 0.0, 2.0, 8.0])
+        >>> si_sdr = SI_SDR()
+        >>> si_sdr_val = si_sdr(preds, target)
+        >>> si_sdr_val
+        tensor(18.4030)
+
+    References:
+        [1] Le Roux, Jonathan, et al. "SDR half-baked or well done." IEEE International Conference on Acoustics, Speech
+        and Signal Processing (ICASSP) 2019.
+    """
+
+    is_differentiable = True
+    higher_is_better = True
+    sum_si_sdr: Tensor
+    total: Tensor
+
+    def __init__(
+        self,
+        zero_mean: bool = False,
+        compute_on_step: bool = True,
+        dist_sync_on_step: bool = False,
+        process_group: Optional[Any] = None,
+        dist_sync_fn: Optional[Callable[[Tensor], Tensor]] = None,
+    ) -> None:
+        super().__init__(
+            compute_on_step=compute_on_step,
+            dist_sync_on_step=dist_sync_on_step,
+            process_group=process_group,
+            dist_sync_fn=dist_sync_fn,
+        )
+        self.zero_mean = zero_mean
+
+        self.add_state("sum_si_sdr", default=tensor(0.0), dist_reduce_fx="sum")
+        self.add_state("total", default=tensor(0), dist_reduce_fx="sum")
+
+    def update(self, preds: Tensor, target: Tensor) -> None:  # type: ignore
+        """Update state with predictions and targets.
+
+        Args:
+            preds: Predictions from model
+            target: Ground truth values
+        """
+        si_sdr_batch = si_sdr(preds=preds, target=target, zero_mean=self.zero_mean)
+
+        self.sum_si_sdr += si_sdr_batch.sum()
+        self.total += si_sdr_batch.numel()
+
+    def compute(self) -> Tensor:
+        """Computes average SI-SDR."""
+        return self.sum_si_sdr / self.total
diff --git a/RE/paddlemetric/src/paddlemetrics/audio/si_snr.py b/RE/paddlemetric/src/paddlemetrics/audio/si_snr.py
new file mode 100644
index 00000000..31747a28
--- /dev/null
+++ b/RE/paddlemetric/src/paddlemetrics/audio/si_snr.py
@@ -0,0 +1,101 @@
+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from typing import Any, Callable, Optional
+
+from paddleext.torchapi import  Tensor, tensor
+
+from paddlemetrics.functional.audio.si_snr import si_snr
+from paddlemetrics.metric import Metric
+
+
+class SI_SNR(Metric):
+    """Scale-invariant signal-to-noise ratio (SI-SNR).
+
+    Forward accepts
+
+    - ``preds``: ``shape [...,time]``
+    - ``target``: ``shape [...,time]``
+
+    Args:
+        compute_on_step:
+            Forward only calls ``update()`` and returns None if this is set to False. default: True
+        dist_sync_on_step:
+            Synchronize metric state across processes at each ``forward()``
+            before returning the value at the step.
+        process_group:
+            Specify the process group on which synchronization is called. default: None (which selects the entire world)
+        dist_sync_fn:
+            Callback that performs the allgather operation on the metric state. When `None`, DDP
+            will be used to perform the allgather.
+
+    Raises:
+        TypeError:
+            if target and preds have a different shape
+
+    Returns:
+        average si-snr value
+
+    Example:
+        >>> import torchapi as B
+        >>> from paddlemetrics import SI_SNR
+        >>> target = B.tensor([3.0, -0.5, 2.0, 7.0])
+        >>> preds = B.tensor([2.5, 0.0, 2.0, 8.0])
+        >>> si_snr = SI_SNR()
+        >>> si_snr_val = si_snr(preds, target)
+        >>> si_snr_val
+        tensor(15.0918)
+
+    References:
+        [1] Y. Luo and N. Mesgarani, "TaSNet: Time-Domain Audio Separation Network for Real-Time, Single-Channel Speech
+        Separation," 2018 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), 2018, pp.
+        696-700, doi: 10.1109/ICASSP.2018.8462116.
+    """
+
+    is_differentiable = True
+    sum_si_snr: Tensor
+    total: Tensor
+    higher_is_better = True
+
+    def __init__(
+        self,
+        compute_on_step: bool = True,
+        dist_sync_on_step: bool = False,
+        process_group: Optional[Any] = None,
+        dist_sync_fn: Optional[Callable[[Tensor], Tensor]] = None,
+    ) -> None:
+        super().__init__(
+            compute_on_step=compute_on_step,
+            dist_sync_on_step=dist_sync_on_step,
+            process_group=process_group,
+            dist_sync_fn=dist_sync_fn,
+        )
+
+        self.add_state("sum_si_snr", default=tensor(0.0), dist_reduce_fx="sum")
+        self.add_state("total", default=tensor(0), dist_reduce_fx="sum")
+
+    def update(self, preds: Tensor, target: Tensor) -> None:  # type: ignore
+        """Update state with predictions and targets.
+
+        Args:
+            preds: Predictions from model
+            target: Ground truth values
+        """
+        si_snr_batch = si_snr(preds=preds, target=target)
+
+        self.sum_si_snr += si_snr_batch.sum()
+        self.total += si_snr_batch.numel()
+
+    def compute(self) -> Tensor:
+        """Computes average SI-SNR."""
+        return self.sum_si_snr / self.total
diff --git a/RE/paddlemetric/src/paddlemetrics/audio/snr.py b/RE/paddlemetric/src/paddlemetrics/audio/snr.py
new file mode 100644
index 00000000..683cb8bf
--- /dev/null
+++ b/RE/paddlemetric/src/paddlemetrics/audio/snr.py
@@ -0,0 +1,110 @@
+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from typing import Any, Callable, Optional
+
+from paddleext.torchapi import  Tensor, tensor
+
+from paddlemetrics.functional.audio.snr import snr
+from paddlemetrics.metric import Metric
+
+
+class SNR(Metric):
+    r"""Signal-to-noise ratio (SNR_):
+
+    .. math::
+        \text{SNR} = \frac{P_{signal}}{P_{noise}}
+
+    where  :math:`P` denotes the power of each signal. The SNR metric compares the level
+    of the desired signal to the level of background noise. Therefore, a high value of
+    SNR means that the audio is clear.
+
+    Forward accepts
+
+    - ``preds``: ``shape [..., time]``
+    - ``target``: ``shape [..., time]``
+
+    Args:
+        zero_mean:
+            if to zero mean target and preds or not
+        compute_on_step:
+            Forward only calls ``update()`` and returns None if this is set to False. default: True
+        dist_sync_on_step:
+            Synchronize metric state across processes at each ``forward()``
+            before returning the value at the step.
+        process_group:
+            Specify the process group on which synchronization is called. default: None (which selects the entire world)
+        dist_sync_fn:
+            Callback that performs the allgather operation on the metric state. When `None`, DDP
+            will be used to perform the allgather.
+
+    Raises:
+        TypeError:
+            if target and preds have a different shape
+
+    Returns:
+        average snr value
+
+    Example:
+        >>> import torchapi as B
+        >>> from paddlemetrics import SNR
+        >>> target = B.tensor([3.0, -0.5, 2.0, 7.0])
+        >>> preds = B.tensor([2.5, 0.0, 2.0, 8.0])
+        >>> snr = SNR()
+        >>> snr_val = snr(preds, target)
+        >>> snr_val
+        tensor(16.1805)
+
+    References:
+        [1] Le Roux, Jonathan, et al. "SDR half-baked or well done." IEEE International Conference on Acoustics, Speech
+        and Signal Processing (ICASSP) 2019.
+
+    """
+    is_differentiable = True
+    sum_snr: Tensor
+    total: Tensor
+
+    def __init__(
+        self,
+        zero_mean: bool = False,
+        compute_on_step: bool = True,
+        dist_sync_on_step: bool = False,
+        process_group: Optional[Any] = None,
+        dist_sync_fn: Optional[Callable[[Tensor], Tensor]] = None,
+    ) -> None:
+        super().__init__(
+            compute_on_step=compute_on_step,
+            dist_sync_on_step=dist_sync_on_step,
+            process_group=process_group,
+            dist_sync_fn=dist_sync_fn,
+        )
+        self.zero_mean = zero_mean
+
+        self.add_state("sum_snr", default=tensor(0.0), dist_reduce_fx="sum")
+        self.add_state("total", default=tensor(0), dist_reduce_fx="sum")
+
+    def update(self, preds: Tensor, target: Tensor) -> None:  # type: ignore
+        """Update state with predictions and targets.
+
+        Args:
+            preds: Predictions from model
+            target: Ground truth values
+        """
+        snr_batch = snr(preds=preds, target=target, zero_mean=self.zero_mean)
+
+        self.sum_snr += snr_batch.sum()
+        self.total += snr_batch.numel()
+
+    def compute(self) -> Tensor:
+        """Computes average SNR."""
+        return self.sum_snr / self.total
diff --git a/RE/paddlemetric/src/paddlemetrics/audio/stoi.py b/RE/paddlemetric/src/paddlemetrics/audio/stoi.py
new file mode 100644
index 00000000..1c8cf378
--- /dev/null
+++ b/RE/paddlemetric/src/paddlemetrics/audio/stoi.py
@@ -0,0 +1,133 @@
+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from typing import Any, Callable, Optional
+
+from paddleext.torchapi import  Tensor, tensor
+
+from paddlemetrics.functional.audio.stoi import stoi
+from paddlemetrics.metric import Metric
+from paddlemetrics.utilities.imports import _PYSTOI_AVAILABLE
+
+
+class STOI(Metric):
+    r"""STOI (Short Term Objective Intelligibility, see [2,3]), a wrapper for the pystoi package [1].
+    Note that input will be moved to `cpu` to perform the metric calculation.
+
+    Intelligibility measure which is highly correlated with the intelligibility of degraded speech signals, e.g., due
+    to additive noise, single/multi-channel noise reduction, binary masking and vocoded speech as in CI simulations.
+    The STOI-measure is intrusive, i.e., a function of the clean and degraded speech signals. STOI may be a good
+    alternative to the speech intelligibility index (SII) or the speech transmission index (STI), when you are
+    interested in the effect of nonlinear processing to noisy speech, e.g., noise reduction, binary masking algorithms,
+    on speech intelligibility. Description taken from [Cees Taal's website](http://www.ceestaal.nl/code/).
+
+    .. note:: using this metrics requires you to have ``pystoi`` install. Either install as ``pip install
+        paddlemetrics[audio]`` or ``pip install pystoi``
+
+    Forward accepts
+
+    - ``preds``: ``shape [...,time]``
+    - ``target``: ``shape [...,time]``
+
+    Args:
+        fs:
+            sampling frequency (Hz)
+        extended:
+            whether to use the extended STOI described in [4]
+        compute_on_step:
+            Forward only calls ``update()`` and returns None if this is set to False. default: True
+        dist_sync_on_step:
+            Synchronize metric state across processes at each ``forward()``
+            before returning the value at the step.
+        process_group:
+            Specify the process group on which synchronization is called. default: None (which selects the entire world)
+        dist_sync_fn:
+            Callback that performs the allgather operation on the metric state. When `None`, DDP
+            will be used to perform the allgather.
+
+    Returns:
+        average STOI value
+
+    Raises:
+        ModuleNotFoundError:
+            If ``pystoi`` package is not installed
+
+    Example:
+        >>> from paddlemetrics.audio import STOI
+        >>> import torchapi as B
+        >>> g = B.manual_seed(1)
+        >>> preds = B.randn(8000)
+        >>> target = B.randn(8000)
+        >>> stoi = STOI(8000, False)
+        >>> stoi(preds, target)
+        tensor(-0.0100)
+
+    References:
+        [1] https://github.com/mpariente/pystoi
+
+        [2] C.H.Taal, R.C.Hendriks, R.Heusdens, J.Jensen 'A Short-Time Objective Intelligibility Measure for
+        Time-Frequency Weighted Noisy Speech', ICASSP 2010, Texas, Dallas.
+
+        [3] C.H.Taal, R.C.Hendriks, R.Heusdens, J.Jensen 'An Algorithm for Intelligibility Prediction of
+        Time-Frequency Weighted Noisy Speech', IEEE Transactions on Audio, Speech, and Language Processing, 2011.
+
+        [4] J. Jensen and C. H. Taal, 'An Algorithm for Predicting the Intelligibility of Speech Masked by Modulated
+        Noise Maskers', IEEE Transactions on Audio, Speech and Language Processing, 2016.
+
+    """
+    sum_stoi: Tensor
+    total: Tensor
+    is_differentiable = False
+    higher_is_better = True
+
+    def __init__(
+        self,
+        fs: int,
+        extended: bool = False,
+        compute_on_step: bool = True,
+        dist_sync_on_step: bool = False,
+        process_group: Optional[Any] = None,
+        dist_sync_fn: Optional[Callable[[Tensor], Tensor]] = None,
+    ) -> None:
+        super().__init__(
+            compute_on_step=compute_on_step,
+            dist_sync_on_step=dist_sync_on_step,
+            process_group=process_group,
+            dist_sync_fn=dist_sync_fn,
+        )
+        if not _PYSTOI_AVAILABLE:
+            raise ModuleNotFoundError(
+                "STOI metric requires that pystoi is installed."
+                " Either install as `pip install paddlemetrics[audio]` or `pip install pystoi`"
+            )
+        self.fs = fs
+        self.extended = extended
+
+        self.add_state("sum_stoi", default=tensor(0.0), dist_reduce_fx="sum")
+        self.add_state("total", default=tensor(0), dist_reduce_fx="sum")
+
+    def update(self, preds: Tensor, target: Tensor) -> None:  # type: ignore
+        """Update state with predictions and targets.
+
+        Args:
+            preds: Predictions from model
+            target: Ground truth values
+        """
+        stoi_batch = stoi(preds, target, self.fs, self.extended, False).to(self.sum_stoi.device)
+
+        self.sum_stoi += stoi_batch.sum()
+        self.total += stoi_batch.numel()
+
+    def compute(self) -> Tensor:
+        """Computes average STOI."""
+        return self.sum_stoi / self.total
diff --git a/RE/paddlemetric/src/paddlemetrics/classification/__init__.py b/RE/paddlemetric/src/paddlemetrics/classification/__init__.py
new file mode 100644
index 00000000..e928018b
--- /dev/null
+++ b/RE/paddlemetric/src/paddlemetrics/classification/__init__.py
@@ -0,0 +1,34 @@
+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from paddlemetrics.classification.accuracy import Accuracy  # noqa: F401
+from paddlemetrics.classification.auc import AUC  # noqa: F401
+from paddlemetrics.classification.auroc import AUROC  # noqa: F401
+from paddlemetrics.classification.average_precision import AveragePrecision  # noqa: F401
+from paddlemetrics.classification.binned_precision_recall import BinnedAveragePrecision  # noqa: F401
+from paddlemetrics.classification.binned_precision_recall import BinnedPrecisionRecallCurve  # noqa: F401
+from paddlemetrics.classification.binned_precision_recall import BinnedRecallAtFixedPrecision  # noqa: F401
+from paddlemetrics.classification.calibration_error import CalibrationError  # noqa: F401
+from paddlemetrics.classification.cohen_kappa import CohenKappa  # noqa: F401
+from paddlemetrics.classification.confusion_matrix import ConfusionMatrix  # noqa: F401
+from paddlemetrics.classification.f_beta import F1, FBeta  # noqa: F401
+from paddlemetrics.classification.hamming_distance import HammingDistance  # noqa: F401
+from paddlemetrics.classification.hinge import Hinge  # noqa: F401
+from paddlemetrics.classification.iou import IoU  # noqa: F401
+from paddlemetrics.classification.kl_divergence import KLDivergence  # noqa: F401
+from paddlemetrics.classification.matthews_corrcoef import MatthewsCorrcoef  # noqa: F401
+from paddlemetrics.classification.precision_recall import Precision, Recall  # noqa: F401
+from paddlemetrics.classification.precision_recall_curve import PrecisionRecallCurve  # noqa: F401
+from paddlemetrics.classification.roc import ROC  # noqa: F401
+from paddlemetrics.classification.specificity import Specificity  # noqa: F401
+from paddlemetrics.classification.stat_scores import StatScores  # noqa: F401
diff --git a/RE/paddlemetric/src/paddlemetrics/classification/accuracy.py b/RE/paddlemetric/src/paddlemetrics/classification/accuracy.py
new file mode 100644
index 00000000..325a18d4
--- /dev/null
+++ b/RE/paddlemetric/src/paddlemetrics/classification/accuracy.py
@@ -0,0 +1,276 @@
+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from typing import Any, Callable, Optional
+
+from paddleext.torchapi import Tensor, tensor
+
+from paddlemetrics.functional.classification.accuracy import (
+    _accuracy_compute,
+    _accuracy_update,
+    _check_subset_validity,
+    _mode,
+    _subset_accuracy_compute,
+    _subset_accuracy_update,
+)
+from paddlemetrics.utilities.enums import DataType
+
+from paddlemetrics.classification.stat_scores import StatScores  # isort:skip
+
+
+class Accuracy(StatScores):
+    r"""
+    Computes Accuracy_:
+
+    .. math::
+        \text{Accuracy} = \frac{1}{N}\sum_i^N 1(y_i = \hat{y}_i)
+
+    Where :math:`y` is a tensor of target values, and :math:`\hat{y}` is a
+    tensor of predictions.
+
+    For multi-class and multi-dimensional multi-class data with probability or logits predictions, the
+    parameter ``top_k`` generalizes this metric to a Top-K accuracy metric: for each sample the
+    top-K highest probability or logit score items are considered to find the correct label.
+
+    For multi-label and multi-dimensional multi-class inputs, this metric computes the "global"
+    accuracy by default, which counts all labels or sub-samples separately. This can be
+    changed to subset accuracy (which requires all labels or sub-samples in the sample to
+    be correctly predicted) by setting ``subset_accuracy=True``.
+
+    Accepts all input types listed in :ref:`references/modules:input types`.
+
+    Args:
+        num_classes:
+            Number of classes. Necessary for ``'macro'``, ``'weighted'`` and ``None`` average methods.
+        threshold:
+            Threshold for transforming probability or logit predictions to binary (0,1) predictions, in the case
+            of binary or multi-label inputs. Default value of 0.5 corresponds to input being probabilities.
+        average:
+            Defines the reduction that is applied. Should be one of the following:
+
+            - ``'micro'`` [default]: Calculate the metric globally, across all samples and classes.
+            - ``'macro'``: Calculate the metric for each class separately, and average the
+              metrics across classes (with equal weights for each class).
+            - ``'weighted'``: Calculate the metric for each class separately, and average the
+              metrics across classes, weighting each class by its support (``tp + fn``).
+            - ``'none'`` or ``None``: Calculate the metric for each class separately, and return
+              the metric for every class.
+            - ``'samples'``: Calculate the metric for each sample, and average the metrics
+              across samples (with equal weights for each sample).
+
+            .. note:: What is considered a sample in the multi-dimensional multi-class case
+                depends on the value of ``mdmc_average``.
+
+            .. note:: If ``'none'`` and a given class doesn't occur in the `preds` or `target`,
+                the value for the class will be ``nan``.
+
+        mdmc_average:
+            Defines how averaging is done for multi-dimensional multi-class inputs (on top of the
+            ``average`` parameter). Should be one of the following:
+
+            - ``None`` [default]: Should be left unchanged if your data is not multi-dimensional
+              multi-class.
+
+            - ``'samplewise'``: In this case, the statistics are computed separately for each
+              sample on the ``N`` axis, and then averaged over samples.
+              The computation for each sample is done by treating the flattened extra axes ``...``
+              (see :ref:`references/modules:input types`) as the ``N`` dimension within the sample,
+              and computing the metric for the sample based on that.
+
+            - ``'global'``: In this case the ``N`` and ``...`` dimensions of the inputs
+              (see :ref:`references/modules:input types`)
+              are flattened into a new ``N_X`` sample axis, i.e. the inputs are treated as if they
+              were ``(N_X, C)``. From here on the ``average`` parameter applies as usual.
+
+        ignore_index:
+            Integer specifying a target class to ignore. If given, this class index does not contribute
+            to the returned score, regardless of reduction method. If an index is ignored, and ``average=None``
+            or ``'none'``, the score for the ignored class will be returned as ``nan``.
+
+        top_k:
+            Number of highest probability or logit score predictions considered to find the correct label,
+            relevant only for (multi-dimensional) multi-class inputs. The
+            default value (``None``) will be interpreted as 1 for these inputs.
+
+            Should be left at default (``None``) for all other types of inputs.
+
+        multiclass:
+            Used only in certain special cases, where you want to treat inputs as a different type
+            than what they appear to be. See the parameter's
+            :ref:`documentation section <references/modules:using the multiclass parameter>`
+            for a more detailed explanation and examples.
+
+        subset_accuracy:
+            Whether to compute subset accuracy for multi-label and multi-dimensional
+            multi-class inputs (has no effect for other input types).
+
+            - For multi-label inputs, if the parameter is set to ``True``, then all labels for
+              each sample must be correctly predicted for the sample to count as correct. If it
+              is set to ``False``, then all labels are counted separately - this is equivalent to
+              flattening inputs beforehand (i.e. ``preds = preds.flatten()`` and same for ``target``).
+
+            - For multi-dimensional multi-class inputs, if the parameter is set to ``True``, then all
+              sub-sample (on the extra axis) must be correct for the sample to be counted as correct.
+              If it is set to ``False``, then all sub-samples are counter separately - this is equivalent,
+              in the case of label predictions, to flattening the inputs beforehand (i.e.
+              ``preds = preds.flatten()`` and same for ``target``). Note that the ``top_k`` parameter
+              still applies in both cases, if set.
+
+        compute_on_step:
+            Forward only calls ``update()`` and return ``None`` if this is set to ``False``.
+        dist_sync_on_step:
+            Synchronize metric state across processes at each ``forward()``
+            before returning the value at the step
+        process_group:
+            Specify the process group on which synchronization is called.
+            default: ``None`` (which selects the entire world)
+        dist_sync_fn:
+            Callback that performs the allgather operation on the metric state. When ``None``, DDP
+            will be used to perform the allgather
+
+    Raises:
+        ValueError:
+            If ``top_k`` is not an ``integer`` larger than ``0``.
+        ValueError:
+            If ``average`` is none of ``"micro"``, ``"macro"``, ``"weighted"``, ``"samples"``, ``"none"``, ``None``.
+        ValueError:
+            If two different input modes are provided, eg. using ``multi-label`` with ``multi-class``.
+        ValueError:
+            If ``top_k`` parameter is set for ``multi-label`` inputs.
+
+    Example:
+        >>> import paddleext.torchapi as B
+        >>> from paddlemetrics import Accuracy
+        >>> target = B.tensor([0, 1, 2, 3])
+        >>> preds = B.tensor([0, 2, 1, 3])
+        >>> accuracy = Accuracy()
+        >>> accuracy(preds, target)
+        tensor(0.5000)
+
+        >>> target = B.tensor([0, 1, 2])
+        >>> preds = B.tensor([[0.1, 0.9, 0], [0.3, 0.1, 0.6], [0.2, 0.5, 0.3]])
+        >>> accuracy = Accuracy(top_k=2)
+        >>> accuracy(preds, target)
+        tensor(0.6667)
+
+    """
+    is_differentiable = False
+    correct: Tensor
+    total: Tensor
+
+    def __init__(
+        self,
+        threshold: float = 0.5,
+        num_classes: Optional[int] = None,
+        average: str = "micro",
+        mdmc_average: Optional[str] = "global",
+        ignore_index: Optional[int] = None,
+        top_k: Optional[int] = None,
+        multiclass: Optional[bool] = None,
+        subset_accuracy: bool = False,
+        compute_on_step: bool = True,
+        dist_sync_on_step: bool = False,
+        process_group: Optional[Any] = None,
+        dist_sync_fn: Callable = None,
+    ) -> None:
+        allowed_average = ["micro", "macro", "weighted", "samples", "none", None]
+        if average not in allowed_average:
+            raise ValueError(f"The `average` has to be one of {allowed_average}, got {average}.")
+
+        super().__init__(
+            reduce="macro" if average in ["weighted", "none", None] else average,
+            mdmc_reduce=mdmc_average,
+            threshold=threshold,
+            top_k=top_k,
+            num_classes=num_classes,
+            multiclass=multiclass,
+            ignore_index=ignore_index,
+            compute_on_step=compute_on_step,
+            dist_sync_on_step=dist_sync_on_step,
+            process_group=process_group,
+            dist_sync_fn=dist_sync_fn,
+        )
+
+        self.add_state("correct", default=tensor(0), dist_reduce_fx="sum")
+        self.add_state("total", default=tensor(0), dist_reduce_fx="sum")
+
+        if top_k is not None and (not isinstance(top_k, int) or top_k <= 0):
+            raise ValueError(f"The `top_k` should be an integer larger than 0, got {top_k}")
+
+        self.average = average
+        self.threshold = threshold
+        self.top_k = top_k
+        self.subset_accuracy = subset_accuracy
+        self.mode: DataType = None  # type: ignore
+        self.multiclass = multiclass
+
+    def update(self, preds: Tensor, target: Tensor) -> None:  # type: ignore
+        """Update state with predictions and targets. See
+        :ref:`references/modules:input types` for more information on input
+        types.
+
+        Args:
+            preds: Predictions from model (logits, probabilities, or labels)
+            target: Ground truth labels
+        """
+        """ returns the mode of the data (binary, multi label, multi class, multi-dim multi class) """
+        mode = _mode(preds, target, self.threshold, self.top_k, self.num_classes, self.multiclass)
+
+        if not self.mode:
+            self.mode = mode
+        elif self.mode != mode:
+            raise ValueError(f"You can not use {mode} inputs with {self.mode} inputs.")
+
+        if self.subset_accuracy and not _check_subset_validity(self.mode):
+            self.subset_accuracy = False
+
+        if self.subset_accuracy:
+            correct, total = _subset_accuracy_update(preds, target, threshold=self.threshold, top_k=self.top_k)
+            self.correct += correct
+            self.total += total
+        else:
+            if not self.mode:
+                raise RuntimeError("You have to have determined mode.")
+            tp, fp, tn, fn = _accuracy_update(
+                preds,
+                target,
+                reduce=self.reduce,
+                mdmc_reduce=self.mdmc_reduce,
+                threshold=self.threshold,
+                num_classes=self.num_classes,
+                top_k=self.top_k,
+                multiclass=self.multiclass,
+                ignore_index=self.ignore_index,
+                mode=self.mode,
+            )
+
+            # Update states
+            if self.reduce != "samples" and self.mdmc_reduce != "samplewise":
+                self.tp += tp
+                self.fp += fp
+                self.tn += tn
+                self.fn += fn
+            else:
+                self.tp.append(tp)
+                self.fp.append(fp)
+                self.tn.append(tn)
+                self.fn.append(fn)
+
+    def compute(self) -> Tensor:
+        """Computes accuracy based on inputs passed in to ``update`` previously."""
+        if not self.mode:
+            raise RuntimeError("You have to have determined mode.")
+        if self.subset_accuracy:
+            return _subset_accuracy_compute(self.correct, self.total)
+        tp, fp, tn, fn = self._get_final_stats()
+        return _accuracy_compute(tp, fp, tn, fn, self.average, self.mdmc_reduce, self.mode)
diff --git a/RE/paddlemetric/src/paddlemetrics/classification/auc.py b/RE/paddlemetric/src/paddlemetrics/classification/auc.py
new file mode 100644
index 00000000..99b64048
--- /dev/null
+++ b/RE/paddlemetric/src/paddlemetrics/classification/auc.py
@@ -0,0 +1,91 @@
+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from typing import Any, Callable, List, Optional
+
+from paddleext.torchapi import  Tensor
+
+from paddlemetrics.functional.classification.auc import _auc_compute, _auc_update
+from paddlemetrics.metric import Metric
+from paddlemetrics.utilities import rank_zero_warn
+from paddlemetrics.utilities.data import dim_zero_cat
+
+
+class AUC(Metric):
+    r"""
+    Computes Area Under the Curve (AUC) using the trapezoidal rule
+
+    Forward accepts two input tensors that should be 1D and have the same number
+    of elements
+
+    Args:
+        reorder: AUC expects its first input to be sorted. If this is not the case,
+            setting this argument to ``True`` will use a stable sorting algorithm to
+            sort the input in descending order
+        compute_on_step:
+            Forward only calls ``update()`` and return None if this is set to False.
+        dist_sync_on_step:
+            Synchronize metric state across processes at each ``forward()``
+            before returning the value at the step.
+        process_group:
+            Specify the process group on which synchronization is called. default: None (which selects the entire world)
+        dist_sync_fn:
+            Callback that performs the ``allgather`` operation on the metric state. When ``None``, DDP
+            will be used to perform the ``allgather``.
+    """
+    is_differentiable = False
+    x: List[Tensor]
+    y: List[Tensor]
+
+    def __init__(
+        self,
+        reorder: bool = False,
+        compute_on_step: bool = True,
+        dist_sync_on_step: bool = False,
+        process_group: Optional[Any] = None,
+        dist_sync_fn: Callable = None,
+    ) -> None:
+        super().__init__(
+            compute_on_step=compute_on_step,
+            dist_sync_on_step=dist_sync_on_step,
+            process_group=process_group,
+            dist_sync_fn=dist_sync_fn,
+        )
+
+        self.reorder = reorder
+
+        self.add_state("x", default=[], dist_reduce_fx="cat")
+        self.add_state("y", default=[], dist_reduce_fx="cat")
+
+        rank_zero_warn(
+            "Metric `AUC` will save all targets and predictions in buffer."
+            " For large datasets this may lead to large memory footprint."
+        )
+
+    def update(self, preds: Tensor, target: Tensor) -> None:  # type: ignore
+        """Update state with predictions and targets.
+
+        Args:
+            preds: Predictions from model (probabilities, or labels)
+            target: Ground truth labels
+        """
+        x, y = _auc_update(preds, target)
+
+        self.x.append(x)
+        self.y.append(y)
+
+    def compute(self) -> Tensor:
+        """Computes AUC based on inputs passed in to ``update`` previously."""
+        x = dim_zero_cat(self.x)
+        y = dim_zero_cat(self.y)
+        return _auc_compute(x, y, reorder=self.reorder)
diff --git a/RE/paddlemetric/src/paddlemetrics/classification/auroc.py b/RE/paddlemetric/src/paddlemetrics/classification/auroc.py
new file mode 100644
index 00000000..6236391d
--- /dev/null
+++ b/RE/paddlemetric/src/paddlemetrics/classification/auroc.py
@@ -0,0 +1,186 @@
+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from typing import Any, Callable, List, Optional
+
+import paddleext.torchapi as B
+from paddleext.torchapi import  Tensor
+
+from paddlemetrics.functional.classification.auroc import _auroc_compute, _auroc_update
+from paddlemetrics.metric import Metric
+from paddlemetrics.utilities import rank_zero_warn
+from paddlemetrics.utilities.data import dim_zero_cat
+from paddlemetrics.utilities.enums import DataType
+from paddlemetrics.utilities.imports import _TORCH_LOWER_1_6
+
+
+class AUROC(Metric):
+    r"""Compute Area Under the Receiver Operating Characteristic Curve (`ROC AUC`_).
+    Works for both binary, multilabel and multiclass problems. In the case of
+    multiclass, the values will be calculated based on a one-vs-the-rest approach.
+
+    Forward accepts
+
+    - ``preds`` (float tensor): ``(N, ...)`` (binary) or ``(N, C, ...)`` (multiclass) tensor
+      with probabilities, where C is the number of classes.
+
+    - ``target`` (long tensor): ``(N, ...)`` or ``(N, C, ...)`` with integer labels
+
+    For non-binary input, if the ``preds`` and ``target`` tensor have the same
+    size the input will be interpretated as multilabel and if ``preds`` have one
+    dimension more than the ``target`` tensor the input will be interpretated as
+    multiclass.
+
+    Args:
+       num_classes: integer with number of classes for multi-label and multiclass problems.
+           Should be set to ``None`` for binary problems
+       pos_label: integer determining the positive class. Default is ``None``
+           which for binary problem is translate to 1. For multiclass problems
+           this argument should not be set as we iteratively change it in the
+           range [0,num_classes-1]
+       average:
+           - ``'micro'`` computes metric globally. Only works for multilabel problems
+           - ``'macro'`` computes metric for each class and uniformly averages them
+           - ``'weighted'`` computes metric for each class and does a weighted-average,
+             where each class is weighted by their support (accounts for class imbalance)
+           - ``None`` computes and returns the metric per class
+       max_fpr:
+           If not ``None``, calculates standardized partial AUC over the
+           range [0, max_fpr]. Should be a float between 0 and 1.
+       compute_on_step:
+           Forward only calls ``update()`` and return None if this is set to False. default: True
+       dist_sync_on_step:
+           Synchronize metric state across processes at each ``forward()``
+           before returning the value at the step.
+       process_group:
+           Specify the process group on which synchronization is called. default: None (which selects the entire world)
+       dist_sync_fn:
+           Callback that performs the allgather operation on the metric state. When ``None``, DDP
+           will be used to perform the allgather
+
+    Raises:
+        ValueError:
+            If ``average`` is none of ``None``, ``"macro"`` or ``"weighted"``.
+        ValueError:
+            If ``max_fpr`` is not a ``float`` in the range ``(0, 1]``.
+        RuntimeError:
+            If ``PyTorch version`` is ``below 1.6`` since max_fpr requires ``B.bucketize``
+            which is not available below 1.6.
+        ValueError:
+            If the mode of data (binary, multi-label, multi-class) changes between batches.
+
+    Example (binary case):
+        >>> from paddlemetrics import AUROC
+        >>> preds = B.tensor([0.13, 0.26, 0.08, 0.19, 0.34])
+        >>> target = B.tensor([0, 0, 1, 1, 1])
+        >>> auroc = AUROC(pos_label=1)
+        >>> auroc(preds, target)
+        tensor(0.5000)
+
+    Example (multiclass case):
+        >>> preds = B.tensor([[0.90, 0.05, 0.05],
+        ...                       [0.05, 0.90, 0.05],
+        ...                       [0.05, 0.05, 0.90],
+        ...                       [0.85, 0.05, 0.10],
+        ...                       [0.10, 0.10, 0.80]])
+        >>> target = B.tensor([0, 1, 1, 2, 2])
+        >>> auroc = AUROC(num_classes=3)
+        >>> auroc(preds, target)
+        tensor(0.7778)
+
+    """
+    is_differentiable = False
+    preds: List[Tensor]
+    target: List[Tensor]
+
+    def __init__(
+        self,
+        num_classes: Optional[int] = None,
+        pos_label: Optional[int] = None,
+        average: Optional[str] = "macro",
+        max_fpr: Optional[float] = None,
+        compute_on_step: bool = True,
+        dist_sync_on_step: bool = False,
+        process_group: Optional[Any] = None,
+        dist_sync_fn: Callable = None,
+    ) -> None:
+        super().__init__(
+            compute_on_step=compute_on_step,
+            dist_sync_on_step=dist_sync_on_step,
+            process_group=process_group,
+            dist_sync_fn=dist_sync_fn,
+        )
+
+        self.num_classes = num_classes
+        self.pos_label = pos_label
+        self.average = average
+        self.max_fpr = max_fpr
+
+        allowed_average = (None, "macro", "weighted", "micro")
+        if self.average not in allowed_average:
+            raise ValueError(
+                f"Argument `average` expected to be one of the following: {allowed_average} but got {average}"
+            )
+
+        if self.max_fpr is not None:
+            if not isinstance(max_fpr, float) or not 0 < max_fpr <= 1:
+                raise ValueError(f"`max_fpr` should be a float in range (0, 1], got: {max_fpr}")
+
+            if _TORCH_LOWER_1_6:
+                raise RuntimeError(
+                    "`max_fpr` argument requires `B.bucketize` which is not available below PyTorch version 1.6"
+                )
+
+        self.mode: DataType = None  # type: ignore
+        self.add_state("preds", default=[], dist_reduce_fx="cat")
+        self.add_state("target", default=[], dist_reduce_fx="cat")
+
+        rank_zero_warn(
+            "Metric `AUROC` will save all targets and predictions in buffer."
+            " For large datasets this may lead to large memory footprint."
+        )
+
+    def update(self, preds: Tensor, target: Tensor) -> None:  # type: ignore
+        """Update state with predictions and targets.
+
+        Args:
+            preds: Predictions from model (probabilities, or labels)
+            target: Ground truth labels
+        """
+        preds, target, mode = _auroc_update(preds, target)
+
+        self.preds.append(preds)
+        self.target.append(target)
+
+        if self.mode and self.mode != mode:
+            raise ValueError(
+                "The mode of data (binary, multi-label, multi-class) should be constant, but changed"
+                f" between batches from {self.mode} to {mode}"
+            )
+        self.mode = mode
+
+    def compute(self) -> Tensor:
+        """Computes AUROC based on inputs passed in to ``update`` previously."""
+        if not self.mode:
+            raise RuntimeError("You have to have determined mode.")
+        preds = dim_zero_cat(self.preds)
+        target = dim_zero_cat(self.target)
+        return _auroc_compute(
+            preds,
+            target,
+            self.mode,
+            self.num_classes,
+            self.pos_label,
+            self.average,
+            self.max_fpr,
+        )
diff --git a/RE/paddlemetric/src/paddlemetrics/classification/average_precision.py b/RE/paddlemetric/src/paddlemetrics/classification/average_precision.py
new file mode 100644
index 00000000..0e37da58
--- /dev/null
+++ b/RE/paddlemetric/src/paddlemetrics/classification/average_precision.py
@@ -0,0 +1,147 @@
+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from typing import Any, List, Optional, Union
+
+import paddleext.torchapi as B
+from paddleext.torchapi import  Tensor
+
+from paddlemetrics.functional.classification.average_precision import (
+    _average_precision_compute,
+    _average_precision_update,
+)
+from paddlemetrics.metric import Metric
+from paddlemetrics.utilities import rank_zero_warn
+from paddlemetrics.utilities.data import dim_zero_cat
+
+
+class AveragePrecision(Metric):
+    """Computes the average precision score, which summarises the precision recall curve into one number. Works for
+    both binary and multiclass problems. In the case of multiclass, the values will be calculated based on a one-
+    vs-the-rest approach.
+
+    Forward accepts
+
+    - ``preds`` (float tensor): ``(N, ...)`` (binary) or ``(N, C, ...)`` (multiclass) tensor
+      with probabilities, where C is the number of classes.
+
+    - ``target`` (long tensor): ``(N, ...)`` with integer labels
+
+    Args:
+        num_classes: integer with number of classes. Not nessesary to provide
+            for binary problems.
+        pos_label: integer determining the positive class. Default is ``None``
+            which for binary problem is translate to 1. For multiclass problems
+            this argument should not be set as we iteratively change it in the
+            range [0,num_classes-1]
+        average:
+            defines the reduction that is applied in the case of multiclass and multilabel input.
+            Should be one of the following:
+
+            - ``'macro'`` [default]: Calculate the metric for each class separately, and average the
+              metrics across classes (with equal weights for each class).
+            - ``'micro'``: Calculate the metric globally, across all samples and classes. Cannot be
+              used with multiclass input.
+            - ``'weighted'``: Calculate the metric for each class separately, and average the
+              metrics across classes, weighting each class by its support.
+            - ``'none'`` or ``None``: Calculate the metric for each class separately, and return
+              the metric for every class.
+
+        compute_on_step:
+            Forward only calls ``update()`` and return None if this is set to False. default: True
+        dist_sync_on_step:
+            Synchronize metric state across processes at each ``forward()``
+            before returning the value at the step. default: False
+        process_group:
+            Specify the process group on which synchronization is called. default: None (which selects the entire world)
+
+    Example (binary case):
+        >>> from paddlemetrics import AveragePrecision
+        >>> pred = B.tensor([0, 1, 2, 3])
+        >>> target = B.tensor([0, 1, 1, 1])
+        >>> average_precision = AveragePrecision(pos_label=1)
+        >>> average_precision(pred, target)
+        tensor(1.)
+
+    Example (multiclass case):
+        >>> pred = B.tensor([[0.75, 0.05, 0.05, 0.05, 0.05],
+        ...                      [0.05, 0.75, 0.05, 0.05, 0.05],
+        ...                      [0.05, 0.05, 0.75, 0.05, 0.05],
+        ...                      [0.05, 0.05, 0.05, 0.75, 0.05]])
+        >>> target = B.tensor([0, 1, 3, 2])
+        >>> average_precision = AveragePrecision(num_classes=5, average=None)
+        >>> average_precision(pred, target)
+        [tensor(1.), tensor(1.), tensor(0.2500), tensor(0.2500), tensor(nan)]
+    """
+
+    is_differentiable = False
+    preds: List[Tensor]
+    target: List[Tensor]
+
+    def __init__(
+        self,
+        num_classes: Optional[int] = None,
+        pos_label: Optional[int] = None,
+        average: Optional[str] = "macro",
+        compute_on_step: bool = True,
+        dist_sync_on_step: bool = False,
+        process_group: Optional[Any] = None,
+    ) -> None:
+        super().__init__(
+            compute_on_step=compute_on_step,
+            dist_sync_on_step=dist_sync_on_step,
+            process_group=process_group,
+        )
+
+        self.num_classes = num_classes
+        self.pos_label = pos_label
+        allowed_average = ("micro", "macro", "weighted", None)
+        if average not in allowed_average:
+            raise ValueError(f"Expected argument `average` to be one of {allowed_average}" f" but got {average}")
+        self.average = average
+
+        self.add_state("preds", default=[], dist_reduce_fx="cat")
+        self.add_state("target", default=[], dist_reduce_fx="cat")
+
+        rank_zero_warn(
+            "Metric `AveragePrecision` will save all targets and predictions in buffer."
+            " For large datasets this may lead to large memory footprint."
+        )
+
+    def update(self, preds: Tensor, target: Tensor) -> None:  # type: ignore
+        """Update state with predictions and targets.
+
+        Args:
+            preds: Predictions from model
+            target: Ground truth values
+        """
+        preds, target, num_classes, pos_label = _average_precision_update(
+            preds, target, self.num_classes, self.pos_label, self.average
+        )
+        self.preds.append(preds)
+        self.target.append(target)
+        self.num_classes = num_classes
+        self.pos_label = pos_label
+
+    def compute(self) -> Union[Tensor, List[Tensor]]:
+        """Compute the average precision score.
+
+        Returns:
+            tensor with average precision. If multiclass will return list
+            of such tensors, one for each class
+        """
+        preds = dim_zero_cat(self.preds)
+        target = dim_zero_cat(self.target)
+        if not self.num_classes:
+            raise ValueError(f"`num_classes` bas to be positive number, but got {self.num_classes}")
+        return _average_precision_compute(preds, target, self.num_classes, self.pos_label, self.average)
diff --git a/RE/paddlemetric/src/paddlemetrics/classification/binned_precision_recall.py b/RE/paddlemetric/src/paddlemetrics/classification/binned_precision_recall.py
new file mode 100644
index 00000000..ffc86ae6
--- /dev/null
+++ b/RE/paddlemetric/src/paddlemetrics/classification/binned_precision_recall.py
@@ -0,0 +1,324 @@
+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from typing import Any, List, Optional, Tuple, Union
+
+import paddleext.torchapi as B
+from paddleext.torchapi import  Tensor
+
+from paddlemetrics.functional.classification.average_precision import _average_precision_compute_with_precision_recall
+from paddlemetrics.metric import Metric
+from paddlemetrics.utilities.data import METRIC_EPS, to_onehot
+
+
+def _recall_at_precision(
+    precision: Tensor,
+    recall: Tensor,
+    thresholds: Tensor,
+    min_precision: float,
+) -> Tuple[Tensor, Tensor]:
+    try:
+        max_recall, _, best_threshold = max(
+            (r, p, t) for p, r, t in zip(precision, recall, thresholds) if p >= min_precision
+        )
+
+    except ValueError:
+        max_recall = B.tensor(0.0, device=recall.device, dtype=recall.dtype)
+        best_threshold = B.tensor(0)
+
+    if max_recall == 0.0:
+        best_threshold = B.tensor(1e6, device=thresholds.device, dtype=thresholds.dtype)
+
+    return max_recall, best_threshold
+
+
+class BinnedPrecisionRecallCurve(Metric):
+    """Computes precision-recall pairs for different thresholds. Works for both binary and multiclass problems. In
+    the case of multiclass, the values will be calculated based on a one-vs-the-rest approach.
+
+    Computation is performed in constant-memory by computing precision and recall
+    for ``thresholds`` buckets/thresholds (evenly distributed between 0 and 1).
+
+    Forward accepts
+
+    - ``preds`` (float tensor): ``(N, ...)`` (binary) or ``(N, C, ...)`` (multiclass) tensor
+      with probabilities, where C is the number of classes.
+
+    - ``target`` (long tensor): ``(N, ...)`` or ``(N, C, ...)`` with integer labels
+
+    Args:
+        num_classes: integer with number of classes. For binary, set to 1.
+        thresholds: list or tensor with specific thresholds or a number of bins from linear sampling.
+            It is used for computation will lead to more detailed curve and accurate estimates,
+            but will be slower and consume more memory.
+        compute_on_step:
+            Forward only calls ``update()`` and return None if this is set to False. default: True
+        dist_sync_on_step:
+            Synchronize metric state across processes at each ``forward()``
+            before returning the value at the step. default: False
+        process_group:
+            Specify the process group on which synchronization is called. default: None (which selects the entire world)
+
+    Raises:
+        ValueError:
+            If ``thresholds`` is not a int, list or tensor
+
+    Example (binary case):
+        >>> from paddlemetrics import BinnedPrecisionRecallCurve
+        >>> pred = B.tensor([0, 0.1, 0.8, 0.4])
+        >>> target = B.tensor([0, 1, 1, 0])
+        >>> pr_curve = BinnedPrecisionRecallCurve(num_classes=1, thresholds=5)
+        >>> precision, recall, thresholds = pr_curve(pred, target)
+        >>> precision
+        tensor([0.5000, 0.5000, 1.0000, 1.0000, 1.0000, 1.0000])
+        >>> recall
+        tensor([1.0000, 0.5000, 0.5000, 0.5000, 0.0000, 0.0000])
+        >>> thresholds
+        tensor([0.0000, 0.2500, 0.5000, 0.7500, 1.0000])
+
+    Example (multiclass case):
+        >>> pred = B.tensor([[0.75, 0.05, 0.05, 0.05, 0.05],
+        ...                      [0.05, 0.75, 0.05, 0.05, 0.05],
+        ...                      [0.05, 0.05, 0.75, 0.05, 0.05],
+        ...                      [0.05, 0.05, 0.05, 0.75, 0.05]])
+        >>> target = B.tensor([0, 1, 3, 2])
+        >>> pr_curve = BinnedPrecisionRecallCurve(num_classes=5, thresholds=3)
+        >>> precision, recall, thresholds = pr_curve(pred, target)
+        >>> precision   # doctest: +NORMALIZE_WHITESPACE
+        [tensor([0.2500, 1.0000, 1.0000, 1.0000]),
+        tensor([0.2500, 1.0000, 1.0000, 1.0000]),
+        tensor([2.5000e-01, 1.0000e-06, 1.0000e+00, 1.0000e+00]),
+        tensor([2.5000e-01, 1.0000e-06, 1.0000e+00, 1.0000e+00]),
+        tensor([2.5000e-07, 1.0000e+00, 1.0000e+00, 1.0000e+00])]
+        >>> recall   # doctest: +NORMALIZE_WHITESPACE
+        [tensor([1.0000, 1.0000, 0.0000, 0.0000]),
+        tensor([1.0000, 1.0000, 0.0000, 0.0000]),
+        tensor([1.0000, 0.0000, 0.0000, 0.0000]),
+        tensor([1.0000, 0.0000, 0.0000, 0.0000]),
+        tensor([0., 0., 0., 0.])]
+        >>> thresholds   # doctest: +NORMALIZE_WHITESPACE
+        [tensor([0.0000, 0.5000, 1.0000]),
+        tensor([0.0000, 0.5000, 1.0000]),
+        tensor([0.0000, 0.5000, 1.0000]),
+        tensor([0.0000, 0.5000, 1.0000]),
+        tensor([0.0000, 0.5000, 1.0000])]
+    """
+
+    TPs: Tensor
+    FPs: Tensor
+    FNs: Tensor
+
+    def __init__(
+        self,
+        num_classes: int,
+        thresholds: Union[int, Tensor, List[float], None] = None,
+        compute_on_step: bool = True,
+        dist_sync_on_step: bool = False,
+        process_group: Optional[Any] = None,
+    ) -> None:
+        super().__init__(
+            compute_on_step=compute_on_step,
+            dist_sync_on_step=dist_sync_on_step,
+            process_group=process_group,
+        )
+
+        self.num_classes = num_classes
+        if isinstance(thresholds, int):
+            self.num_thresholds = thresholds
+            thresholds = B.linspace(0, 1.0, thresholds)
+            self.register_buffer("thresholds", thresholds)
+        elif thresholds is not None:
+            if not isinstance(thresholds, (list, Tensor)):
+                raise ValueError("Expected argument `thresholds` to either be an integer, list of floats or a tensor")
+            thresholds = B.tensor(thresholds) if isinstance(thresholds, list) else thresholds
+            self.num_thresholds = thresholds.numel()
+            self.register_buffer("thresholds", thresholds)
+
+        for name in ("TPs", "FPs", "FNs"):
+            self.add_state(
+                name=name,
+                default=B.zeros(num_classes, self.num_thresholds, dtype=B.float32),
+                dist_reduce_fx="sum",
+            )
+
+    def update(self, preds: Tensor, target: Tensor) -> None:  # type: ignore
+        """
+        Args
+            preds: (n_samples, n_classes) tensor
+            target: (n_samples, n_classes) tensor
+        """
+        # binary case
+        if len(preds.shape) == len(target.shape) == 1:
+            preds = preds.reshape(-1, 1)
+            target = target.reshape(-1, 1)
+
+        if len(preds.shape) == len(target.shape) + 1:
+            target = to_onehot(target, num_classes=self.num_classes)
+
+        target = target == 1
+        # Iterate one threshold at a time to conserve memory
+        for i in range(self.num_thresholds):
+            predictions = preds >= self.thresholds[i]
+            self.TPs[:, i] += (target & predictions).sum(dim=0)
+            self.FPs[:, i] += ((~target) & (predictions)).sum(dim=0)
+            self.FNs[:, i] += ((target) & (~predictions)).sum(dim=0)
+
+    def compute(self) -> Union[Tuple[Tensor, Tensor, Tensor], Tuple[List[Tensor], List[Tensor], List[Tensor]]]:
+        """Returns float tensor of size n_classes."""
+        precisions = (self.TPs + METRIC_EPS) / (self.TPs + self.FPs + METRIC_EPS)
+        recalls = self.TPs / (self.TPs + self.FNs + METRIC_EPS)
+
+        # Need to guarantee that last precision=1 and recall=0, similar to precision_recall_curve
+        t_ones = B.ones(self.num_classes, 1, dtype=precisions.dtype, device=precisions.device)
+        precisions = B.cat([precisions, t_ones], dim=1)
+        t_zeros = B.zeros(self.num_classes, 1, dtype=recalls.dtype, device=recalls.device)
+        recalls = B.cat([recalls, t_zeros], dim=1)
+        if self.num_classes == 1:
+            return precisions[0, :], recalls[0, :], self.thresholds
+        return list(precisions), list(recalls), [self.thresholds for _ in range(self.num_classes)]
+
+
+class BinnedAveragePrecision(BinnedPrecisionRecallCurve):
+    """Computes the average precision score, which summarises the precision recall curve into one number. Works for
+    both binary and multiclass problems. In the case of multiclass, the values will be calculated based on a one-
+    vs-the-rest approach.
+
+    Computation is performed in constant-memory by computing precision and recall
+    for ``thresholds`` buckets/thresholds (evenly distributed between 0 and 1).
+
+    Forward accepts
+
+    - ``preds`` (float tensor): ``(N, ...)`` (binary) or ``(N, C, ...)`` (multiclass) tensor
+      with probabilities, where C is the number of classes.
+
+    - ``target`` (long tensor): ``(N, ...)`` with integer labels
+
+    Args:
+        num_classes: integer with number of classes. Not nessesary to provide
+            for binary problems.
+        thresholds: list or tensor with specific thresholds or a number of bins from linear sampling.
+            It is used for computation will lead to more detailed curve and accurate estimates,
+            but will be slower and consume more memory
+        compute_on_step:
+            Forward only calls ``update()`` and return None if this is set to False. default: True
+        process_group:
+            Specify the process group on which synchronization is called. default: None (which selects the entire world)
+
+    Raises:
+        ValueError:
+            If ``thresholds`` is not a list or tensor
+
+    Example (binary case):
+        >>> from paddlemetrics import BinnedAveragePrecision
+        >>> pred = B.tensor([0, 1, 2, 3])
+        >>> target = B.tensor([0, 1, 1, 1])
+        >>> average_precision = BinnedAveragePrecision(num_classes=1, thresholds=10)
+        >>> average_precision(pred, target)
+        tensor(1.0000)
+
+    Example (multiclass case):
+        >>> pred = B.tensor([[0.75, 0.05, 0.05, 0.05, 0.05],
+        ...                      [0.05, 0.75, 0.05, 0.05, 0.05],
+        ...                      [0.05, 0.05, 0.75, 0.05, 0.05],
+        ...                      [0.05, 0.05, 0.05, 0.75, 0.05]])
+        >>> target = B.tensor([0, 1, 3, 2])
+        >>> average_precision = BinnedAveragePrecision(num_classes=5, thresholds=10)
+        >>> average_precision(pred, target)
+        [tensor(1.0000), tensor(1.0000), tensor(0.2500), tensor(0.2500), tensor(-0.)]
+    """
+
+    def compute(self) -> Union[List[Tensor], Tensor]:  # type: ignore
+        precisions, recalls, _ = super().compute()
+        return _average_precision_compute_with_precision_recall(precisions, recalls, self.num_classes, average=None)
+
+
+class BinnedRecallAtFixedPrecision(BinnedPrecisionRecallCurve):
+    """Computes the higest possible recall value given the minimum precision thresholds provided.
+
+    Computation is performed in constant-memory by computing precision and recall
+    for ``thresholds`` buckets/thresholds (evenly distributed between 0 and 1).
+
+    Forward accepts
+
+    - ``preds`` (float tensor): ``(N, ...)`` (binary) or ``(N, C, ...)`` (multiclass) tensor
+      with probabilities, where C is the number of classes.
+
+    - ``target`` (long tensor): ``(N, ...)`` with integer labels
+
+    Args:
+        num_classes: integer with number of classes. Provide 1 for for binary problems.
+        min_precision: float value specifying minimum precision threshold.
+        thresholds: list or tensor with specific thresholds or a number of bins from linear sampling.
+            It is used for computation will lead to more detailed curve and accurate estimates,
+            but will be slower and consume more memory
+        compute_on_step:
+            Forward only calls ``update()`` and return None if this is set to False. default: True
+        process_group:
+            Specify the process group on which synchronization is called. default: None (which selects the entire world)
+
+    Raises:
+        ValueError:
+            If ``thresholds`` is not a list or tensor
+
+    Example (binary case):
+        >>> from paddlemetrics import BinnedRecallAtFixedPrecision
+        >>> pred = B.tensor([0, 0.2, 0.5, 0.8])
+        >>> target = B.tensor([0, 1, 1, 0])
+        >>> average_precision = BinnedRecallAtFixedPrecision(num_classes=1, thresholds=10, min_precision=0.5)
+        >>> average_precision(pred, target)
+        (tensor(1.0000), tensor(0.1111))
+
+    Example (multiclass case):
+        >>> pred = B.tensor([[0.75, 0.05, 0.05, 0.05, 0.05],
+        ...                      [0.05, 0.75, 0.05, 0.05, 0.05],
+        ...                      [0.05, 0.05, 0.75, 0.05, 0.05],
+        ...                      [0.05, 0.05, 0.05, 0.75, 0.05]])
+        >>> target = B.tensor([0, 1, 3, 2])
+        >>> average_precision = BinnedRecallAtFixedPrecision(num_classes=5, thresholds=10, min_precision=0.5)
+        >>> average_precision(pred, target)   # doctest: +NORMALIZE_WHITESPACE
+        (tensor([1.0000, 1.0000, 0.0000, 0.0000, 0.0000]),
+        tensor([6.6667e-01, 6.6667e-01, 1.0000e+06, 1.0000e+06, 1.0000e+06]))
+    """
+
+    def __init__(
+        self,
+        num_classes: int,
+        min_precision: float,
+        thresholds: Union[int, Tensor, List[float], None] = None,
+        compute_on_step: bool = True,
+        dist_sync_on_step: bool = False,
+        process_group: Optional[Any] = None,
+    ) -> None:
+        super().__init__(
+            num_classes=num_classes,
+            thresholds=thresholds,
+            compute_on_step=compute_on_step,
+            dist_sync_on_step=dist_sync_on_step,
+            process_group=process_group,
+        )
+        self.min_precision = min_precision
+
+    def compute(self) -> Tuple[Tensor, Tensor]:  # type: ignore
+        """Returns float tensor of size n_classes."""
+        precisions, recalls, thresholds = super().compute()
+
+        if self.num_classes == 1:
+            return _recall_at_precision(precisions, recalls, thresholds, self.min_precision)
+
+        recalls_at_p = B.zeros(self.num_classes, device=recalls[0].device, dtype=recalls[0].dtype)
+        thresholds_at_p = B.zeros(self.num_classes, device=thresholds[0].device, dtype=thresholds[0].dtype)
+        for i in range(self.num_classes):
+            recalls_at_p[i], thresholds_at_p[i] = _recall_at_precision(
+                precisions[i], recalls[i], thresholds[i], self.min_precision
+            )
+        return recalls_at_p, thresholds_at_p
diff --git a/RE/paddlemetric/src/paddlemetrics/classification/calibration_error.py b/RE/paddlemetric/src/paddlemetrics/classification/calibration_error.py
new file mode 100644
index 00000000..5fc9d10a
--- /dev/null
+++ b/RE/paddlemetric/src/paddlemetrics/classification/calibration_error.py
@@ -0,0 +1,115 @@
+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from typing import Any, List, Optional
+
+import paddleext.torchapi as B
+from paddleext.torchapi import  Tensor
+
+from paddlemetrics.functional.classification.calibration_error import _ce_compute, _ce_update
+from paddlemetrics.metric import Metric
+from paddlemetrics.utilities.data import dim_zero_cat
+
+
+class CalibrationError(Metric):
+    r"""
+
+    `Computes the Top-label Calibration Error`_
+    Three different norms are implemented, each corresponding to variations on the calibration error metric.
+
+    L1 norm (Expected Calibration Error)
+
+    .. math::
+        \text{ECE} = \frac{1}{N}\sum_i^N \|(p_i - c_i)\|
+
+    Infinity norm (Maximum Calibration Error)
+
+    .. math::
+        \text{RMSCE} =  \max_{i} (p_i - c_i)
+
+    L2 norm (Root Mean Square Calibration Error)
+
+    .. math::
+        \text{MCE} = \frac{1}{N}\sum_i^N (p_i - c_i)^2
+
+    Where :math:`p_i` is the top-1 prediction accuracy in bin i
+    and :math:`c_i` is the average confidence of predictions in bin i.
+
+    .. note::
+        L2-norm debiasing is not yet supported.
+
+    Args:
+        n_bins: Number of bins to use when computing probabilites and accuracies.
+        norm: Norm used to compare empirical and expected probability bins.
+            Defaults to "l1", or Expected Calibration Error.
+        debias: Applies debiasing term, only implemented for l2 norm. Defaults to True.
+        compute_on_step:  Forward only calls ``update()`` and return None if this is set to False.
+        dist_sync_on_step: Synchronize metric state across processes at each ``forward()``
+            before returning the value at the step
+        process_group: Specify the process group on which synchronization is called.
+            default: None (which selects the entire world)
+    """
+    DISTANCES = {"l1", "l2", "max"}
+    confidences: List[Tensor]
+    accuracies: List[Tensor]
+
+    def __init__(
+        self,
+        n_bins: int = 15,
+        norm: str = "l1",
+        compute_on_step: bool = False,
+        dist_sync_on_step: bool = False,
+        process_group: Optional[Any] = None,
+    ):
+
+        super().__init__(
+            compute_on_step=compute_on_step,
+            dist_sync_on_step=dist_sync_on_step,
+            process_group=process_group,
+            dist_sync_fn=None,
+        )
+
+        if norm not in self.DISTANCES:
+            raise ValueError(f"Norm {norm} is not supported. Please select from l1, l2, or max. ")
+
+        if not isinstance(n_bins, int) or n_bins <= 0:
+            raise ValueError(f"Expected argument `n_bins` to be a int larger than 0 but got {n_bins}")
+        self.n_bins = n_bins
+        self.register_buffer("bin_boundaries", B.linspace(0, 1, n_bins + 1))
+        self.norm = norm
+
+        self.add_state("confidences", [], dist_reduce_fx="cat")
+        self.add_state("accuracies", [], dist_reduce_fx="cat")
+
+    def update(self, preds: Tensor, target: Tensor) -> None:  # type: ignore
+        """Computes top-level confidences and accuracies for the input probabilites and appends them to internal
+        state.
+
+        Args:
+            preds (Tensor): Model output probabilities.
+            target (Tensor): Ground-truth target class labels.
+        """
+        confidences, accuracies = _ce_update(preds, target)
+
+        self.confidences.append(confidences)
+        self.accuracies.append(accuracies)
+
+    def compute(self) -> Tensor:
+        """Computes calibration error across all confidences and accuracies.
+
+        Returns:
+            Tensor: Calibration error across previously collected examples.
+        """
+        confidences = dim_zero_cat(self.confidences)
+        accuracies = dim_zero_cat(self.accuracies)
+        return _ce_compute(confidences, accuracies, self.bin_boundaries, norm=self.norm)
diff --git a/RE/paddlemetric/src/paddlemetrics/classification/cohen_kappa.py b/RE/paddlemetric/src/paddlemetrics/classification/cohen_kappa.py
new file mode 100644
index 00000000..3a4817cf
--- /dev/null
+++ b/RE/paddlemetric/src/paddlemetrics/classification/cohen_kappa.py
@@ -0,0 +1,119 @@
+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from typing import Any, Optional
+
+import paddleext.torchapi as B
+from paddleext.torchapi import  Tensor
+
+from paddlemetrics.functional.classification.cohen_kappa import _cohen_kappa_compute, _cohen_kappa_update
+from paddlemetrics.metric import Metric
+
+
+class CohenKappa(Metric):
+    r"""
+    Calculates `Cohen's kappa score`_ that measures
+    inter-annotator agreement. It is defined as
+
+    .. math::
+        \kappa = (p_o - p_e) / (1 - p_e)
+
+    where :math:`p_o` is the empirical probability of agreement and :math:`p_e` is
+    the expected agreement when both annotators assign labels randomly. Note that
+    :math:`p_e` is estimated using a per-annotator empirical prior over the
+    class labels.
+
+    Works with binary, multiclass, and multilabel data.  Accepts probabilities from a model output or
+    integer class values in prediction.  Works with multi-dimensional preds and target.
+
+    Forward accepts
+        - ``preds`` (float or long tensor): ``(N, ...)`` or ``(N, C, ...)`` where C is the number of classes
+
+        - ``target`` (long tensor): ``(N, ...)``
+
+    If preds and target are the same shape and preds is a float tensor, we use the ``self.threshold`` argument
+    to convert into integer labels. This is the case for binary and multi-label probabilities or logits.
+
+    If preds has an extra dimension as in the case of multi-class scores we perform an argmax on ``dim=1``.
+
+    Args:
+        num_classes: Number of classes in the dataset.
+
+        weights: Weighting type to calculate the score. Choose from
+            - ``None`` or ``'none'``: no weighting
+            - ``'linear'``: linear weighting
+            - ``'quadratic'``: quadratic weighting
+
+        threshold:
+            Threshold for transforming probability or logit predictions to binary (0,1) predictions, in the case
+            of binary or multi-label inputs. Default value of 0.5 corresponds to input being probabilities.
+
+        compute_on_step:
+            Forward only calls ``update()`` and return None if this is set to False. default: True
+
+        dist_sync_on_step:
+            Synchronize metric state across processes at each ``forward()``
+            before returning the value at the step. default: False
+
+        process_group:
+            Specify the process group on which synchronization is called. default: None (which selects the entire world)
+
+    Example:
+        >>> from paddlemetrics import CohenKappa
+        >>> target = B.tensor([1, 1, 0, 0])
+        >>> preds = B.tensor([0, 1, 0, 0])
+        >>> cohenkappa = CohenKappa(num_classes=2)
+        >>> cohenkappa(preds, target)
+        tensor(0.5000)
+
+    """
+    is_differentiable = False
+    confmat: Tensor
+
+    def __init__(
+        self,
+        num_classes: int,
+        weights: Optional[str] = None,
+        threshold: float = 0.5,
+        compute_on_step: bool = True,
+        dist_sync_on_step: bool = False,
+        process_group: Optional[Any] = None,
+    ) -> None:
+        super().__init__(
+            compute_on_step=compute_on_step,
+            dist_sync_on_step=dist_sync_on_step,
+            process_group=process_group,
+        )
+        self.num_classes = num_classes
+        self.weights = weights
+        self.threshold = threshold
+
+        allowed_weights = ("linear", "quadratic", "none", None)
+        if self.weights not in allowed_weights:
+            raise ValueError(f"Argument weights needs to one of the following: {allowed_weights}")
+
+        self.add_state("confmat", default=B.zeros(num_classes, num_classes), dist_reduce_fx="sum")
+
+    def update(self, preds: Tensor, target: Tensor) -> None:  # type: ignore
+        """Update state with predictions and targets.
+
+        Args:
+            preds: Predictions from model
+            target: Ground truth values
+        """
+        confmat = _cohen_kappa_update(preds, target, self.num_classes, self.threshold)
+        self.confmat += confmat
+
+    def compute(self) -> Tensor:
+        """Computes cohen kappa score."""
+        return _cohen_kappa_compute(self.confmat, self.weights)
diff --git a/RE/paddlemetric/src/paddlemetrics/classification/confusion_matrix.py b/RE/paddlemetric/src/paddlemetrics/classification/confusion_matrix.py
new file mode 100644
index 00000000..a3485570
--- /dev/null
+++ b/RE/paddlemetric/src/paddlemetrics/classification/confusion_matrix.py
@@ -0,0 +1,141 @@
+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from typing import Any, Optional
+
+import paddleext.torchapi as B
+from paddleext.torchapi import  Tensor
+
+from paddlemetrics.functional.classification.confusion_matrix import _confusion_matrix_compute, _confusion_matrix_update
+from paddlemetrics.metric import Metric
+
+
+class ConfusionMatrix(Metric):
+    r"""
+    Computes the `confusion matrix`_.  Works with binary,
+    multiclass, and multilabel data.  Accepts probabilities or logits from a model output or integer class
+    values in prediction. Works with multi-dimensional preds and target, but it should be noted that
+    additional dimensions will be flattened.
+
+    Forward accepts
+
+    - ``preds`` (float or long tensor): ``(N, ...)`` or ``(N, C, ...)`` where C is the number of classes
+    - ``target`` (long tensor): ``(N, ...)``
+
+    If preds and target are the same shape and preds is a float tensor, we use the ``self.threshold`` argument
+    to convert into integer labels. This is the case for binary and multi-label probabilities or logits.
+
+    If preds has an extra dimension as in the case of multi-class scores we perform an argmax on ``dim=1``.
+
+    If working with multilabel data, setting the `is_multilabel` argument to `True` will make sure that a
+    `confusion matrix gets calculated per label`_.
+
+    Args:
+        num_classes: Number of classes in the dataset.
+        normalize: Normalization mode for confusion matrix. Choose from
+
+            - ``None`` or ``'none'``: no normalization (default)
+            - ``'true'``: normalization over the targets (most commonly used)
+            - ``'pred'``: normalization over the predictions
+            - ``'all'``: normalization over the whole matrix
+
+        threshold:
+            Threshold for transforming probability or logit predictions to binary (0,1) predictions, in the case
+            of binary or multi-label inputs. Default value of 0.5 corresponds to input being probabilities.
+
+        multilabel:
+            determines if data is multilabel or not.
+        compute_on_step:
+            Forward only calls ``update()`` and return None if this is set to False. default: True
+        dist_sync_on_step:
+            Synchronize metric state across processes at each ``forward()``
+            before returning the value at the step. default: False
+        process_group:
+            Specify the process group on which synchronization is called. default: None (which selects the entire world)
+
+    Example (binary data):
+        >>> from paddlemetrics import ConfusionMatrix
+        >>> target = B.tensor([1, 1, 0, 0])
+        >>> preds = B.tensor([0, 1, 0, 0])
+        >>> confmat = ConfusionMatrix(num_classes=2)
+        >>> confmat(preds, target)
+        tensor([[2., 0.],
+                [1., 1.]])
+
+    Example (multiclass data):
+        >>> target = B.tensor([2, 1, 0, 0])
+        >>> preds = B.tensor([2, 1, 0, 1])
+        >>> confmat = ConfusionMatrix(num_classes=3)
+        >>> confmat(preds, target)
+        tensor([[1., 1., 0.],
+                [0., 1., 0.],
+                [0., 0., 1.]])
+
+    Example (multilabel data):
+        >>> target = B.tensor([[0, 1, 0], [1, 0, 1]])
+        >>> preds = B.tensor([[0, 0, 1], [1, 0, 1]])
+        >>> confmat = ConfusionMatrix(num_classes=3, multilabel=True)
+        >>> confmat(preds, target)  # doctest: +NORMALIZE_WHITESPACE
+        tensor([[[1., 0.], [0., 1.]],
+                [[1., 0.], [1., 0.]],
+                [[0., 1.], [0., 1.]]])
+
+    """
+    is_differentiable = False
+    confmat: Tensor
+
+    def __init__(
+        self,
+        num_classes: int,
+        normalize: Optional[str] = None,
+        threshold: float = 0.5,
+        multilabel: bool = False,
+        compute_on_step: bool = True,
+        dist_sync_on_step: bool = False,
+        process_group: Optional[Any] = None,
+    ) -> None:
+        super().__init__(
+            compute_on_step=compute_on_step,
+            dist_sync_on_step=dist_sync_on_step,
+            process_group=process_group,
+        )
+        self.num_classes = num_classes
+        self.normalize = normalize
+        self.threshold = threshold
+        self.multilabel = multilabel
+
+        allowed_normalize = ("true", "pred", "all", "none", None)
+        if self.normalize not in allowed_normalize:
+            raise ValueError(f"Argument average needs to one of the following: {allowed_normalize}")
+
+        default = B.zeros(num_classes, 2, 2) if multilabel else B.zeros(num_classes, num_classes)
+        self.add_state("confmat", default=default, dist_reduce_fx="sum")
+
+    def update(self, preds: Tensor, target: Tensor) -> None:  # type: ignore
+        """Update state with predictions and targets.
+
+        Args:
+            preds: Predictions from model
+            target: Ground truth values
+        """
+        confmat = _confusion_matrix_update(preds, target, self.num_classes, self.threshold, self.multilabel)
+        self.confmat += confmat
+
+    def compute(self) -> Tensor:
+        """Computes confusion matrix.
+
+        Returns:
+            If `multilabel=False` this will be a `[n_classes, n_classes]` tensor and if `multilabel=True`
+            this will be a `[n_classes, 2, 2]` tensor
+        """
+        return _confusion_matrix_compute(self.confmat, self.normalize)
diff --git a/RE/paddlemetric/src/paddlemetrics/classification/f_beta.py b/RE/paddlemetric/src/paddlemetrics/classification/f_beta.py
new file mode 100644
index 00000000..4b24dc0e
--- /dev/null
+++ b/RE/paddlemetric/src/paddlemetrics/classification/f_beta.py
@@ -0,0 +1,301 @@
+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from typing import Any, Callable, Optional
+
+import paddleext.torchapi as B
+from paddleext.torchapi import  Tensor
+
+from paddlemetrics.classification.stat_scores import StatScores
+from paddlemetrics.functional.classification.f_beta import _fbeta_compute
+from paddlemetrics.utilities.enums import AverageMethod
+
+
+class FBeta(StatScores):
+    r"""
+    Computes `F-score`_, specifically:
+
+    .. math::
+        F_\beta = (1 + \beta^2) * \frac{\text{precision} * \text{recall}}
+        {(\beta^2 * \text{precision}) + \text{recall}}
+
+    Where :math:`\beta` is some positive real factor. Works with binary, multiclass, and multilabel data.
+    Accepts logit scores or probabilities from a model output or integer class values in prediction.
+    Works with multi-dimensional preds and target.
+
+    Forward accepts
+
+    - ``preds`` (float or long tensor): ``(N, ...)`` or ``(N, C, ...)`` where C is the number of classes
+    - ``target`` (long tensor): ``(N, ...)``
+
+    If preds and target are the same shape and preds is a float tensor, we use the ``self.threshold`` argument
+    to convert into integer labels. This is the case for binary and multi-label logits and probabilities.
+
+    If preds has an extra dimension as in the case of multi-class scores we perform an argmax on ``dim=1``.
+
+    Args:
+        num_classes:
+            Number of classes. Necessary for ``'macro'``, ``'weighted'`` and ``None`` average methods.
+        beta:
+            Beta coefficient in the F measure.
+        threshold:
+            Threshold for transforming probability or logit predictions to binary (0,1) predictions, in the case
+            of binary or multi-label inputs. Default value of 0.5 corresponds to input being probabilities.
+        average:
+            Defines the reduction that is applied. Should be one of the following:
+
+            - ``'micro'`` [default]: Calculate the metric globally, across all samples and classes.
+            - ``'macro'``: Calculate the metric for each class separately, and average the
+              metrics across classes (with equal weights for each class).
+            - ``'weighted'``: Calculate the metric for each class separately, and average the
+              metrics across classes, weighting each class by its support (``tp + fn``).
+            - ``'none'`` or ``None``: Calculate the metric for each class separately, and return
+              the metric for every class.
+            - ``'samples'``: Calculate the metric for each sample, and average the metrics
+              across samples (with equal weights for each sample).
+
+            .. note:: What is considered a sample in the multi-dimensional multi-class case
+                depends on the value of ``mdmc_average``.
+
+            .. note:: If ``'none'`` and a given class doesn't occur in the `preds` or `target`,
+                the value for the class will be ``nan``.
+
+        mdmc_average:
+            Defines how averaging is done for multi-dimensional multi-class inputs (on top of the
+            ``average`` parameter). Should be one of the following:
+
+            - ``None`` [default]: Should be left unchanged if your data is not multi-dimensional
+              multi-class.
+
+            - ``'samplewise'``: In this case, the statistics are computed separately for each
+              sample on the ``N`` axis, and then averaged over samples.
+              The computation for each sample is done by treating the flattened extra axes ``...``
+              (see :ref:`references/modules:input types`) as the ``N`` dimension within the sample,
+              and computing the metric for the sample based on that.
+
+            - ``'global'``: In this case the ``N`` and ``...`` dimensions of the inputs
+              (see :ref:`references/modules:input types`)
+              are flattened into a new ``N_X`` sample axis, i.e. the inputs are treated as if they
+              were ``(N_X, C)``. From here on the ``average`` parameter applies as usual.
+
+        ignore_index:
+            Integer specifying a target class to ignore. If given, this class index does not contribute
+            to the returned score, regardless of reduction method. If an index is ignored, and ``average=None``
+            or ``'none'``, the score for the ignored class will be returned as ``nan``.
+
+        top_k:
+            Number of highest probability or logit score predictions considered to find the correct label,
+            relevant only for (multi-dimensional) multi-class inputs. The
+            default value (``None``) will be interpreted as 1 for these inputs.
+
+            Should be left at default (``None``) for all other types of inputs.
+
+        multiclass:
+            Used only in certain special cases, where you want to treat inputs as a different type
+            than what they appear to be. See the parameter's
+            :ref:`documentation section <references/modules:using the multiclass parameter>`
+            for a more detailed explanation and examples.
+
+        compute_on_step:
+            Forward only calls ``update()`` and return ``None`` if this is set to ``False``.
+        dist_sync_on_step:
+            Synchronize metric state across processes at each ``forward()``
+            before returning the value at the step
+        process_group:
+            Specify the process group on which synchronization is called.
+            default: ``None`` (which selects the entire world)
+        dist_sync_fn:
+            Callback that performs the allgather operation on the metric state. When ``None``, DDP
+            will be used to perform the allgather.
+
+    Raises:
+        ValueError:
+            If ``average`` is none of ``"micro"``, ``"macro"``, ``"weighted"``, ``"none"``, ``None``.
+
+    Example:
+        >>> from paddlemetrics import FBeta
+        >>> target = B.tensor([0, 1, 2, 0, 1, 2])
+        >>> preds = B.tensor([0, 2, 1, 0, 0, 1])
+        >>> f_beta = FBeta(num_classes=3, beta=0.5)
+        >>> f_beta(preds, target)
+        tensor(0.3333)
+
+    """
+
+    def __init__(
+        self,
+        num_classes: Optional[int] = None,
+        beta: float = 1.0,
+        threshold: float = 0.5,
+        average: str = "micro",
+        mdmc_average: Optional[str] = None,
+        ignore_index: Optional[int] = None,
+        top_k: Optional[int] = None,
+        multiclass: Optional[bool] = None,
+        compute_on_step: bool = True,
+        dist_sync_on_step: bool = False,
+        process_group: Optional[Any] = None,
+        dist_sync_fn: Callable = None,
+    ) -> None:
+        self.beta = beta
+        allowed_average = list(AverageMethod)
+        if average not in allowed_average:
+            raise ValueError(f"The `average` has to be one of {allowed_average}, got {average}.")
+
+        super().__init__(
+            reduce="macro" if average in [AverageMethod.WEIGHTED, AverageMethod.NONE] else average,
+            mdmc_reduce=mdmc_average,
+            threshold=threshold,
+            top_k=top_k,
+            num_classes=num_classes,
+            multiclass=multiclass,
+            ignore_index=ignore_index,
+            compute_on_step=compute_on_step,
+            dist_sync_on_step=dist_sync_on_step,
+            process_group=process_group,
+            dist_sync_fn=dist_sync_fn,
+        )
+
+        self.average = average
+
+    def compute(self) -> Tensor:
+        """Computes fbeta over state."""
+        tp, fp, tn, fn = self._get_final_stats()
+        return _fbeta_compute(tp, fp, tn, fn, self.beta, self.ignore_index, self.average, self.mdmc_reduce)
+
+
+class F1(FBeta):
+    """Computes F1 metric. F1 metrics correspond to a harmonic mean of the precision and recall scores.
+
+    Works with binary, multiclass, and multilabel data. Accepts logits or probabilities from a model
+    output or integer class values in prediction. Works with multi-dimensional preds and target.
+
+    Forward accepts
+
+    - ``preds`` (float or long tensor): ``(N, ...)`` or ``(N, C, ...)`` where C is the number of classes
+    - ``target`` (long tensor): ``(N, ...)``
+
+    If preds and target are the same shape and preds is a float tensor, we use the ``self.threshold`` argument.
+    This is the case for binary and multi-label logits.
+
+    If preds has an extra dimension as in the case of multi-class scores we perform an argmax on ``dim=1``.
+
+    Args:
+        num_classes:
+            Number of classes. Necessary for ``'macro'``, ``'weighted'`` and ``None`` average methods.
+        threshold:
+            Threshold for transforming probability or logit predictions to binary (0,1) predictions, in the case
+            of binary or multi-label inputs. Default value of 0.5 corresponds to input being probabilities.
+        average:
+            Defines the reduction that is applied. Should be one of the following:
+
+            - ``'micro'`` [default]: Calculate the metric globally, across all samples and classes.
+            - ``'macro'``: Calculate the metric for each class separately, and average the
+              metrics across classes (with equal weights for each class).
+            - ``'weighted'``: Calculate the metric for each class separately, and average the
+              metrics across classes, weighting each class by its support (``tp + fn``).
+            - ``'none'`` or ``None``: Calculate the metric for each class separately, and return
+              the metric for every class.
+            - ``'samples'``: Calculate the metric for each sample, and average the metrics
+              across samples (with equal weights for each sample).
+
+            .. note:: What is considered a sample in the multi-dimensional multi-class case
+                depends on the value of ``mdmc_average``.
+
+        mdmc_average:
+            Defines how averaging is done for multi-dimensional multi-class inputs (on top of the
+            ``average`` parameter). Should be one of the following:
+
+            - ``None`` [default]: Should be left unchanged if your data is not multi-dimensional
+              multi-class.
+
+            - ``'samplewise'``: In this case, the statistics are computed separately for each
+              sample on the ``N`` axis, and then averaged over samples.
+              The computation for each sample is done by treating the flattened extra axes ``...``
+              (see :ref:`references/modules:input types`) as the ``N`` dimension within the sample,
+              and computing the metric for the sample based on that.
+
+            - ``'global'``: In this case the ``N`` and ``...`` dimensions of the inputs
+              (see :ref:`references/modules:input types`)
+              are flattened into a new ``N_X`` sample axis, i.e. the inputs are treated as if they
+              were ``(N_X, C)``. From here on the ``average`` parameter applies as usual.
+
+        ignore_index:
+            Integer specifying a target class to ignore. If given, this class index does not contribute
+            to the returned score, regardless of reduction method. If an index is ignored, and ``average=None``
+            or ``'none'``, the score for the ignored class will be returned as ``nan``.
+
+        top_k:
+            Number of highest probability or logit score predictions considered to find the correct label,
+            relevant only for (multi-dimensional) multi-class inputs. The
+            default value (``None``) will be interpreted as 1 for these inputs.
+
+            Should be left at default (``None``) for all other types of inputs.
+        multiclass:
+            Used only in certain special cases, where you want to treat inputs as a different type
+            than what they appear to be. See the parameter's
+            :ref:`documentation section <references/modules:using the multiclass parameter>`
+            for a more detailed explanation and examples.
+
+        compute_on_step:
+            Forward only calls ``update()`` and return ``None`` if this is set to ``False``.
+        dist_sync_on_step:
+            Synchronize metric state across processes at each ``forward()``
+            before returning the value at the step
+        process_group:
+            Specify the process group on which synchronization is called.
+            default: ``None`` (which selects the entire world)
+        dist_sync_fn:
+            Callback that performs the allgather operation on the metric state. When ``None``, DDP
+            will be used to perform the allgather.
+
+
+    Example:
+        >>> from paddlemetrics import F1
+        >>> target = B.tensor([0, 1, 2, 0, 1, 2])
+        >>> preds = B.tensor([0, 2, 1, 0, 0, 1])
+        >>> f1 = F1(num_classes=3)
+        >>> f1(preds, target)
+        tensor(0.3333)
+    """
+
+    is_differentiable = False
+
+    def __init__(
+        self,
+        num_classes: Optional[int] = None,
+        threshold: float = 0.5,
+        average: str = "micro",
+        mdmc_average: Optional[str] = None,
+        ignore_index: Optional[int] = None,
+        top_k: Optional[int] = None,
+        multiclass: Optional[bool] = None,
+        compute_on_step: bool = True,
+        dist_sync_on_step: bool = False,
+        process_group: Optional[Any] = None,
+        dist_sync_fn: Callable = None,
+    ) -> None:
+        super().__init__(
+            num_classes=num_classes,
+            beta=1.0,
+            threshold=threshold,
+            average=average,
+            mdmc_average=mdmc_average,
+            ignore_index=ignore_index,
+            top_k=top_k,
+            multiclass=multiclass,
+            compute_on_step=compute_on_step,
+            dist_sync_on_step=dist_sync_on_step,
+            process_group=process_group,
+            dist_sync_fn=dist_sync_fn,
+        )
diff --git a/RE/paddlemetric/src/paddlemetrics/classification/hamming_distance.py b/RE/paddlemetric/src/paddlemetrics/classification/hamming_distance.py
new file mode 100644
index 00000000..855d7f7e
--- /dev/null
+++ b/RE/paddlemetric/src/paddlemetrics/classification/hamming_distance.py
@@ -0,0 +1,110 @@
+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from typing import Any, Callable, Optional
+
+import paddleext.torchapi as B
+from paddleext.torchapi import  Tensor, tensor
+
+from paddlemetrics.functional.classification.hamming_distance import _hamming_distance_compute, _hamming_distance_update
+from paddlemetrics.metric import Metric
+
+
+class HammingDistance(Metric):
+    r"""
+    Computes the average `Hamming distance`_ (also
+    known as Hamming loss) between targets and predictions:
+
+    .. math::
+        \text{Hamming distance} = \frac{1}{N \cdot L}\sum_i^N \sum_l^L 1(y_{il} \neq \hat{y_{il}})
+
+    Where :math:`y` is a tensor of target values, :math:`\hat{y}` is a tensor of predictions,
+    and :math:`\bullet_{il}` refers to the :math:`l`-th label of the :math:`i`-th sample of that
+    tensor.
+
+    This is the same as ``1-accuracy`` for binary data, while for all other types of inputs it
+    treats each possible label separately - meaning that, for example, multi-class data is
+    treated as if it were multi-label.
+
+    Accepts all input types listed in :ref:`references/modules:input types`.
+
+    Args:
+        threshold:
+            Threshold for transforming probability or logit predictions to binary (0,1) predictions, in the case
+            of binary or multi-label inputs. Default value of 0.5 corresponds to input being probabilities.
+        compute_on_step:
+            Forward only calls ``update()`` and return ``None`` if this is set to ``False``.
+        dist_sync_on_step:
+            Synchronize metric state across processes at each ``forward()``
+            before returning the value at the step.
+        process_group:
+            Specify the process group on which synchronization is called.
+            default: ``None`` (which selects the entire world)
+        dist_sync_fn:
+            Callback that performs the allgather operation on the metric state. When ``None``, DDP
+            will be used to perform the all gather.
+
+    Raises:
+        ValueError:
+            If ``threshold`` is not between ``0`` and ``1``.
+
+    Example:
+        >>> from paddlemetrics import HammingDistance
+        >>> target = B.tensor([[0, 1], [1, 1]])
+        >>> preds = B.tensor([[0, 1], [0, 1]])
+        >>> hamming_distance = HammingDistance()
+        >>> hamming_distance(preds, target)
+        tensor(0.2500)
+
+    """
+    is_differentiable = False
+    correct: Tensor
+    total: Tensor
+
+    def __init__(
+        self,
+        threshold: float = 0.5,
+        compute_on_step: bool = True,
+        dist_sync_on_step: bool = False,
+        process_group: Optional[Any] = None,
+        dist_sync_fn: Callable = None,
+    ) -> None:
+        super().__init__(
+            compute_on_step=compute_on_step,
+            dist_sync_on_step=dist_sync_on_step,
+            process_group=process_group,
+            dist_sync_fn=dist_sync_fn,
+        )
+
+        self.add_state("correct", default=tensor(0), dist_reduce_fx="sum")
+        self.add_state("total", default=tensor(0), dist_reduce_fx="sum")
+
+        self.threshold = threshold
+
+    def update(self, preds: Tensor, target: Tensor) -> None:  # type: ignore
+        """Update state with predictions and targets. See
+        :ref:`references/modules:input types` for more information on input
+        types.
+
+        Args:
+            preds: Predictions from model (probabilities, logits or labels)
+            target: Ground truth labels
+        """
+        correct, total = _hamming_distance_update(preds, target, self.threshold)
+
+        self.correct += correct
+        self.total += total
+
+    def compute(self) -> Tensor:
+        """Computes hamming distance based on inputs passed in to ``update`` previously."""
+        return _hamming_distance_compute(self.correct, self.total)
diff --git a/RE/paddlemetric/src/paddlemetrics/classification/hinge.py b/RE/paddlemetric/src/paddlemetrics/classification/hinge.py
new file mode 100644
index 00000000..099864a3
--- /dev/null
+++ b/RE/paddlemetric/src/paddlemetrics/classification/hinge.py
@@ -0,0 +1,127 @@
+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from typing import Any, Callable, Optional, Union
+
+from paddleext.torchapi import  Tensor, tensor
+
+from paddlemetrics.functional.classification.hinge import MulticlassMode, _hinge_compute, _hinge_update
+from paddlemetrics.metric import Metric
+
+
+class Hinge(Metric):
+    r"""
+    Computes the mean `Hinge loss`_, typically used for Support Vector
+    Machines (SVMs). In the binary case it is defined as:
+
+    .. math::
+        \text{Hinge loss} = \max(0, 1 - y \times \hat{y})
+
+    Where :math:`y \in {-1, 1}` is the target, and :math:`\hat{y} \in \mathbb{R}` is the prediction.
+
+    In the multi-class case, when ``multiclass_mode=None`` (default), ``multiclass_mode=MulticlassMode.CRAMMER_SINGER``
+    or ``multiclass_mode="crammer-singer"``, this metric will compute the multi-class hinge loss defined by Crammer and
+    Singer as:
+
+    .. math::
+        \text{Hinge loss} = \max\left(0, 1 - \hat{y}_y + \max_{i \ne y} (\hat{y}_i)\right)
+
+    Where :math:`y \in {0, ..., \mathrm{C}}` is the target class (where :math:`\mathrm{C}` is the number of classes),
+    and :math:`\hat{y} \in \mathbb{R}^\mathrm{C}` is the predicted output per class.
+
+    In the multi-class case when ``multiclass_mode=MulticlassMode.ONE_VS_ALL`` or ``multiclass_mode='one-vs-all'``, this
+    metric will use a one-vs-all approach to compute the hinge loss, giving a vector of C outputs where each entry pits
+    that class against all remaining classes.
+
+    This metric can optionally output the mean of the squared hinge loss by setting ``squared=True``
+
+    Only accepts inputs with preds shape of (N) (binary) or (N, C) (multi-class) and target shape of (N).
+
+    Args:
+        squared:
+            If True, this will compute the squared hinge loss. Otherwise, computes the regular hinge loss (default).
+        multiclass_mode:
+            Which approach to use for multi-class inputs (has no effect in the binary case). ``None`` (default),
+            ``MulticlassMode.CRAMMER_SINGER`` or ``"crammer-singer"``, uses the Crammer Singer multi-class hinge loss.
+            ``MulticlassMode.ONE_VS_ALL`` or ``"one-vs-all"`` computes the hinge loss in a one-vs-all fashion.
+
+    Raises:
+        ValueError:
+            If ``multiclass_mode`` is not: None, ``MulticlassMode.CRAMMER_SINGER``, ``"crammer-singer"``,
+            ``MulticlassMode.ONE_VS_ALL`` or ``"one-vs-all"``.
+
+    Example (binary case):
+        >>> import torchapi as B
+        >>> from paddlemetrics import Hinge
+        >>> target = B.tensor([0, 1, 1])
+        >>> preds = B.tensor([-2.2, 2.4, 0.1])
+        >>> hinge = Hinge()
+        >>> hinge(preds, target)
+        tensor(0.3000)
+
+    Example (default / multiclass case):
+        >>> target = B.tensor([0, 1, 2])
+        >>> preds = B.tensor([[-1.0, 0.9, 0.2], [0.5, -1.1, 0.8], [2.2, -0.5, 0.3]])
+        >>> hinge = Hinge()
+        >>> hinge(preds, target)
+        tensor(2.9000)
+
+    Example (multiclass example, one vs all mode):
+        >>> target = B.tensor([0, 1, 2])
+        >>> preds = B.tensor([[-1.0, 0.9, 0.2], [0.5, -1.1, 0.8], [2.2, -0.5, 0.3]])
+        >>> hinge = Hinge(multiclass_mode="one-vs-all")
+        >>> hinge(preds, target)
+        tensor([2.2333, 1.5000, 1.2333])
+
+    """
+    is_differentiable = True
+    measure: Tensor
+    total: Tensor
+
+    def __init__(
+        self,
+        squared: bool = False,
+        multiclass_mode: Optional[Union[str, MulticlassMode]] = None,
+        compute_on_step: bool = True,
+        dist_sync_on_step: bool = False,
+        process_group: Optional[Any] = None,
+        dist_sync_fn: Callable = None,
+    ) -> None:
+        super().__init__(
+            compute_on_step=compute_on_step,
+            dist_sync_on_step=dist_sync_on_step,
+            process_group=process_group,
+            dist_sync_fn=dist_sync_fn,
+        )
+
+        self.add_state("measure", default=tensor(0.0), dist_reduce_fx="sum")
+        self.add_state("total", default=tensor(0), dist_reduce_fx="sum")
+
+        if multiclass_mode not in (None, MulticlassMode.CRAMMER_SINGER, MulticlassMode.ONE_VS_ALL):
+            raise ValueError(
+                "The `multiclass_mode` should be either None / 'crammer-singer' / MulticlassMode.CRAMMER_SINGER"
+                "(default) or 'one-vs-all' / MulticlassMode.ONE_VS_ALL,"
+                f" got {multiclass_mode}."
+            )
+
+        self.squared = squared
+        self.multiclass_mode = multiclass_mode
+
+    def update(self, preds: Tensor, target: Tensor) -> None:  # type: ignore
+        measure, total = _hinge_update(preds, target, squared=self.squared, multiclass_mode=self.multiclass_mode)
+
+        self.measure = measure + self.measure
+        self.total = total + self.total
+
+    def compute(self) -> Tensor:
+        return _hinge_compute(self.measure, self.total)
diff --git a/RE/paddlemetric/src/paddlemetrics/classification/iou.py b/RE/paddlemetric/src/paddlemetrics/classification/iou.py
new file mode 100644
index 00000000..9e89946a
--- /dev/null
+++ b/RE/paddlemetric/src/paddlemetrics/classification/iou.py
@@ -0,0 +1,107 @@
+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from typing import Any, Optional
+
+import paddleext.torchapi as B
+from paddleext.torchapi import  Tensor
+
+from paddlemetrics.classification.confusion_matrix import ConfusionMatrix
+from paddlemetrics.functional.classification.iou import _iou_from_confmat
+
+
+class IoU(ConfusionMatrix):
+    r"""
+    Computes Intersection over union, or `Jaccard index`_:
+
+    .. math:: J(A,B) = \frac{|A\cap B|}{|A\cup B|}
+
+    Where: :math:`A` and :math:`B` are both tensors of the same size, containing integer class values.
+    They may be subject to conversion from input data (see description below). Note that it is different from box IoU.
+
+    Works with binary, multiclass and multi-label data.
+    Accepts probabilities from a model output or integer class values in prediction.
+    Works with multi-dimensional preds and target.
+
+    Forward accepts
+
+    - ``preds`` (float or long tensor): ``(N, ...)`` or ``(N, C, ...)`` where C is the number of classes
+    - ``target`` (long tensor): ``(N, ...)``
+
+    If preds and target are the same shape and preds is a float tensor, we use the ``self.threshold`` argument
+    to convert into integer labels. This is the case for binary and multi-label probabilities.
+
+    If preds has an extra dimension as in the case of multi-class scores we perform an argmax on ``dim=1``.
+
+    Args:
+        num_classes: Number of classes in the dataset.
+        ignore_index: optional int specifying a target class to ignore. If given, this class index does not contribute
+            to the returned score, regardless of reduction method. Has no effect if given an int that is not in the
+            range [0, num_classes-1]. By default, no index is ignored, and all classes are used.
+        absent_score: score to use for an individual class, if no instances of the class index were present in
+            `pred` AND no instances of the class index were present in `target`. For example, if we have 3 classes,
+            [0, 0] for `pred`, and [0, 2] for `target`, then class 1 would be assigned the `absent_score`.
+        threshold:
+            Threshold value for binary or multi-label probabilities.
+        reduction: a method to reduce metric score over labels.
+
+            - ``'elementwise_mean'``: takes the mean (default)
+            - ``'sum'``: takes the sum
+            - ``'none'``: no reduction will be applied
+
+        compute_on_step:
+            Forward only calls ``update()`` and return None if this is set to False.
+        dist_sync_on_step:
+            Synchronize metric state across processes at each ``forward()``
+            before returning the value at the step.
+        process_group:
+            Specify the process group on which synchronization is called. default: None (which selects the entire world)
+
+    Example:
+        >>> from paddlemetrics import IoU
+        >>> target = B.randint(0, 2, (10, 25, 25))
+        >>> pred = B.tensor(target)
+        >>> pred[2:5, 7:13, 9:15] = 1 - pred[2:5, 7:13, 9:15]
+        >>> iou = IoU(num_classes=2)
+        >>> iou(pred, target)
+        tensor(0.9660)
+
+    """
+    is_differentiable = False
+
+    def __init__(
+        self,
+        num_classes: int,
+        ignore_index: Optional[int] = None,
+        absent_score: float = 0.0,
+        threshold: float = 0.5,
+        reduction: str = "elementwise_mean",
+        compute_on_step: bool = True,
+        dist_sync_on_step: bool = False,
+        process_group: Optional[Any] = None,
+    ) -> None:
+        super().__init__(
+            num_classes=num_classes,
+            normalize=None,
+            threshold=threshold,
+            compute_on_step=compute_on_step,
+            dist_sync_on_step=dist_sync_on_step,
+            process_group=process_group,
+        )
+        self.reduction = reduction
+        self.ignore_index = ignore_index
+        self.absent_score = absent_score
+
+    def compute(self) -> Tensor:
+        """Computes intersection over union (IoU)"""
+        return _iou_from_confmat(self.confmat, self.num_classes, self.ignore_index, self.absent_score, self.reduction)
diff --git a/RE/paddlemetric/src/paddlemetrics/classification/kl_divergence.py b/RE/paddlemetric/src/paddlemetrics/classification/kl_divergence.py
new file mode 100644
index 00000000..cce887f0
--- /dev/null
+++ b/RE/paddlemetric/src/paddlemetrics/classification/kl_divergence.py
@@ -0,0 +1,109 @@
+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from typing import Any, Callable, Optional
+
+import paddleext.torchapi as B
+from paddleext.torchapi import  Tensor
+
+from paddlemetrics.functional.classification.kl_divergence import _kld_compute, _kld_update
+from paddlemetrics.metric import Metric
+from paddlemetrics.utilities.data import dim_zero_cat
+
+
+class KLDivergence(Metric):
+    r"""Computes the `KL divergence`_:
+
+    .. math::
+        D_{KL}(P||Q) = \sum_{x\in\mathcal{X}} P(x) \log\frac{P(x)}{Q{x}}
+
+    Where :math:`P` and :math:`Q` are probability distributions where :math:`P` usually represents a distribution
+    over data and :math:`Q` is often a prior or approximation of :math:`P`. It should be noted that the KL divergence
+    is a non-symetrical metric i.e. :math:`D_{KL}(P||Q) \neq D_{KL}(Q||P)`.
+
+    Args:
+        p: data distribution with shape ``[N, d]``
+        q: prior or approximate distribution with shape ``[N, d]``
+        log_prob: bool indicating if input is log-probabilities or probabilities. If given as probabilities,
+            will normalize to make sure the distributes sum to 1
+        reduction:
+            Determines how to reduce over the ``N``/batch dimension:
+
+            - ``'mean'`` [default]: Averages score across samples
+            - ``'sum'``: Sum score across samples
+            - ``'none'`` or ``None``: Returns score per sample
+
+    Raises:
+        TypeError:
+            If ``log_prob`` is not an ``bool``
+        ValueError:
+            If ``reduction`` is not one of ``'mean'``, ``'sum'``, ``'none'`` or ``None``
+
+    .. note::
+        Half precision is only support on GPU for this metric
+
+    Example:
+        >>> import torchapi as B
+        >>> from paddlemetrics.functional import kl_divergence
+        >>> p = B.tensor([[0.36, 0.48, 0.16]])
+        >>> q = B.tensor([[1/3, 1/3, 1/3]])
+        >>> kl_divergence(p, q)
+        tensor(0.0853)
+
+    """
+    is_differentiable = True
+    # TODO: canot be used because if scripting
+    # measures: Union[List[Tensor], Tensor]
+    total: Tensor
+
+    def __init__(
+        self,
+        log_prob: bool = False,
+        reduction: Optional[str] = "mean",
+        compute_on_step: bool = True,
+        dist_sync_on_step: bool = False,
+        process_group: Optional[Any] = None,
+        dist_sync_fn: Callable = None,
+    ) -> None:
+        super().__init__(
+            compute_on_step=compute_on_step,
+            dist_sync_on_step=dist_sync_on_step,
+            process_group=process_group,
+            dist_sync_fn=dist_sync_fn,
+        )
+        if not isinstance(log_prob, bool):
+            raise TypeError(f"Expected argument `log_prob` to be bool but got {log_prob}")
+        self.log_prob = log_prob
+
+        allowed_reduction = ["mean", "sum", "none", None]
+        if reduction not in allowed_reduction:
+            raise ValueError(f"Expected argument `reduction` to be one of {allowed_reduction} but got {reduction}")
+        self.reduction = reduction
+
+        if self.reduction in ["mean", "sum"]:
+            self.add_state("measures", B.zeros(1), dist_reduce_fx="sum")
+        else:
+            self.add_state("measures", [], dist_reduce_fx="cat")
+        self.add_state("total", B.zeros(1), dist_reduce_fx="sum")
+
+    def update(self, p: Tensor, q: Tensor) -> None:  # type: ignore
+        measures, total = _kld_update(p, q, self.log_prob)
+        if self.reduction is None or self.reduction == "none":
+            self.measures.append(measures)
+        else:
+            self.measures += measures.sum()
+            self.total += total
+
+    def compute(self) -> Tensor:
+        measures = dim_zero_cat(self.measures) if self.reduction is None or self.reduction == "none" else self.measures
+        return _kld_compute(measures, self.total, self.reduction)
diff --git a/RE/paddlemetric/src/paddlemetrics/classification/matthews_corrcoef.py b/RE/paddlemetric/src/paddlemetrics/classification/matthews_corrcoef.py
new file mode 100644
index 00000000..2ea52673
--- /dev/null
+++ b/RE/paddlemetric/src/paddlemetrics/classification/matthews_corrcoef.py
@@ -0,0 +1,111 @@
+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from typing import Any, Callable, Optional
+
+import paddleext.torchapi as B
+from paddleext.torchapi import  Tensor
+
+from paddlemetrics.functional.classification.matthews_corrcoef import (
+    _matthews_corrcoef_compute,
+    _matthews_corrcoef_update,
+)
+from paddlemetrics.metric import Metric
+
+
+class MatthewsCorrcoef(Metric):
+    r"""
+    Calculates `Matthews correlation coefficient`_ that measures
+    the general correlation or quality of a classification. In the binary case it
+    is defined as:
+
+    .. math::
+        MCC = \frac{TP*TN - FP*FN}{\sqrt{(TP+FP)*(TP+FN)*(TN+FP)*(TN+FN)}}
+
+    where TP, TN, FP and FN are respectively the true postitives, true negatives,
+    false positives and false negatives. Also works in the case of multi-label or
+    multi-class input.
+
+    Note:
+        This metric produces a multi-dimensional output, so it can not be directly logged.
+
+    Forward accepts
+
+    - ``preds`` (float or long tensor): ``(N, ...)`` or ``(N, C, ...)`` where C is the number of classes
+    - ``target`` (long tensor): ``(N, ...)``
+
+    If preds and target are the same shape and preds is a float tensor, we use the ``self.threshold`` argument
+    to convert into integer labels. This is the case for binary and multi-label probabilities.
+
+    If preds has an extra dimension as in the case of multi-class scores we perform an argmax on ``dim=1``.
+
+    Args:
+        num_classes: Number of classes in the dataset.
+        threshold:
+            Threshold value for binary or multi-label probabilites. default: 0.5
+        compute_on_step:
+            Forward only calls ``update()`` and return None if this is set to False. default: True
+        dist_sync_on_step:
+            Synchronize metric state across processes at each ``forward()``
+            before returning the value at the step. default: False
+        process_group:
+            Specify the process group on which synchronization is called. default: None (which selects the entire world)
+        dist_sync_fn:
+            Callback that performs the allgather operation on the metric state. When ``None``, DDP
+            will be used to perform the allgather
+
+    Example:
+        >>> from paddlemetrics import MatthewsCorrcoef
+        >>> target = B.tensor([1, 1, 0, 0])
+        >>> preds = B.tensor([0, 1, 0, 0])
+        >>> matthews_corrcoef = MatthewsCorrcoef(num_classes=2)
+        >>> matthews_corrcoef(preds, target)
+        tensor(0.5774)
+
+    """
+    is_differentiable = False
+    confmat: Tensor
+
+    def __init__(
+        self,
+        num_classes: int,
+        threshold: float = 0.5,
+        compute_on_step: bool = True,
+        dist_sync_on_step: bool = False,
+        process_group: Optional[Any] = None,
+        dist_sync_fn: Callable = None,
+    ) -> None:
+        super().__init__(
+            compute_on_step=compute_on_step,
+            dist_sync_on_step=dist_sync_on_step,
+            process_group=process_group,
+            dist_sync_fn=dist_sync_fn,
+        )
+        self.num_classes = num_classes
+        self.threshold = threshold
+
+        self.add_state("confmat", default=B.zeros(num_classes, num_classes), dist_reduce_fx="sum")
+
+    def update(self, preds: Tensor, target: Tensor) -> None:  # type: ignore
+        """Update state with predictions and targets.
+
+        Args:
+            preds: Predictions from model
+            target: Ground truth values
+        """
+        confmat = _matthews_corrcoef_update(preds, target, self.num_classes, self.threshold)
+        self.confmat += confmat
+
+    def compute(self) -> Tensor:
+        """Computes matthews correlation coefficient."""
+        return _matthews_corrcoef_compute(self.confmat)
diff --git a/RE/paddlemetric/src/paddlemetrics/classification/precision_recall.py b/RE/paddlemetric/src/paddlemetrics/classification/precision_recall.py
new file mode 100644
index 00000000..77920cfc
--- /dev/null
+++ b/RE/paddlemetric/src/paddlemetrics/classification/precision_recall.py
@@ -0,0 +1,320 @@
+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from typing import Any, Callable, Optional
+
+import paddleext.torchapi as B
+from paddleext.torchapi import  Tensor
+
+from paddlemetrics.classification.stat_scores import StatScores
+from paddlemetrics.functional.classification.precision_recall import _precision_compute, _recall_compute
+
+
+class Precision(StatScores):
+    r"""
+    Computes `Precision`_:
+
+    .. math:: \text{Precision} = \frac{\text{TP}}{\text{TP} + \text{FP}}
+
+    Where :math:`\text{TP}` and :math:`\text{FP}` represent the number of true positives and
+    false positives respecitively. With the use of ``top_k`` parameter, this metric can
+    generalize to Precision@K.
+
+    The reduction method (how the precision scores are aggregated) is controlled by the
+    ``average`` parameter, and additionally by the ``mdmc_average`` parameter in the
+    multi-dimensional multi-class case. Accepts all inputs listed in :ref:`references/modules:input types`.
+
+    Args:
+        num_classes:
+            Number of classes. Necessary for ``'macro'``, ``'weighted'`` and ``None`` average methods.
+        threshold:
+            Threshold for transforming probability or logit predictions to binary (0,1) predictions, in the case
+            of binary or multi-label inputs. Default value of 0.5 corresponds to input being probabilities.
+        average:
+            Defines the reduction that is applied. Should be one of the following:
+
+            - ``'micro'`` [default]: Calculate the metric globally, across all samples and classes.
+            - ``'macro'``: Calculate the metric for each class separately, and average the
+              metrics across classes (with equal weights for each class).
+            - ``'weighted'``: Calculate the metric for each class separately, and average the
+              metrics across classes, weighting each class by its support (``tp + fn``).
+            - ``'none'`` or ``None``: Calculate the metric for each class separately, and return
+              the metric for every class.
+            - ``'samples'``: Calculate the metric for each sample, and average the metrics
+              across samples (with equal weights for each sample).
+
+            .. note:: What is considered a sample in the multi-dimensional multi-class case
+                depends on the value of ``mdmc_average``.
+
+        mdmc_average:
+            Defines how averaging is done for multi-dimensional multi-class inputs (on top of the
+            ``average`` parameter). Should be one of the following:
+
+            - ``None`` [default]: Should be left unchanged if your data is not multi-dimensional
+              multi-class.
+
+            - ``'samplewise'``: In this case, the statistics are computed separately for each
+              sample on the ``N`` axis, and then averaged over samples.
+              The computation for each sample is done by treating the flattened extra axes ``...``
+              (see :ref:`references/modules:input types`) as the ``N`` dimension within the sample,
+              and computing the metric for the sample based on that.
+
+            - ``'global'``: In this case the ``N`` and ``...`` dimensions of the inputs
+              (see :ref:`references/modules:input types`)
+              are flattened into a new ``N_X`` sample axis, i.e. the inputs are treated as if they
+              were ``(N_X, C)``. From here on the ``average`` parameter applies as usual.
+
+        ignore_index:
+            Integer specifying a target class to ignore. If given, this class index does not contribute
+            to the returned score, regardless of reduction method. If an index is ignored, and ``average=None``
+            or ``'none'``, the score for the ignored class will be returned as ``nan``.
+
+        top_k:
+            Number of highest probability or logit score predictions considered to find the correct label,
+            relevant only for (multi-dimensional) multi-class inputs. The
+            default value (``None``) will be interpreted as 1 for these inputs.
+
+            Should be left at default (``None``) for all other types of inputs.
+
+        multiclass:
+            Used only in certain special cases, where you want to treat inputs as a different type
+            than what they appear to be. See the parameter's
+            :ref:`documentation section <references/modules:using the multiclass parameter>`
+            for a more detailed explanation and examples.
+
+        compute_on_step:
+            Forward only calls ``update()`` and return ``None`` if this is set to ``False``.
+        dist_sync_on_step:
+            Synchronize metric state across processes at each ``forward()``
+            before returning the value at the step
+        process_group:
+            Specify the process group on which synchronization is called.
+            default: ``None`` (which selects the entire world)
+        dist_sync_fn:
+            Callback that performs the allgather operation on the metric state. When ``None``, DDP
+            will be used to perform the allgather.
+
+    Raises:
+        ValueError:
+            If ``average`` is none of ``"micro"``, ``"macro"``, ``"weighted"``, ``"samples"``, ``"none"``, ``None``.
+
+    Example:
+        >>> from paddlemetrics import Precision
+        >>> preds  = B.tensor([2, 0, 2, 1])
+        >>> target = B.tensor([1, 1, 2, 0])
+        >>> precision = Precision(average='macro', num_classes=3)
+        >>> precision(preds, target)
+        tensor(0.1667)
+        >>> precision = Precision(average='micro')
+        >>> precision(preds, target)
+        tensor(0.2500)
+
+    """
+    is_differentiable = False
+
+    def __init__(
+        self,
+        num_classes: Optional[int] = None,
+        threshold: float = 0.5,
+        average: str = "micro",
+        mdmc_average: Optional[str] = None,
+        ignore_index: Optional[int] = None,
+        top_k: Optional[int] = None,
+        multiclass: Optional[bool] = None,
+        compute_on_step: bool = True,
+        dist_sync_on_step: bool = False,
+        process_group: Optional[Any] = None,
+        dist_sync_fn: Callable = None,
+    ) -> None:
+        allowed_average = ["micro", "macro", "weighted", "samples", "none", None]
+        if average not in allowed_average:
+            raise ValueError(f"The `average` has to be one of {allowed_average}, got {average}.")
+
+        super().__init__(
+            reduce="macro" if average in ["weighted", "none", None] else average,
+            mdmc_reduce=mdmc_average,
+            threshold=threshold,
+            top_k=top_k,
+            num_classes=num_classes,
+            multiclass=multiclass,
+            ignore_index=ignore_index,
+            compute_on_step=compute_on_step,
+            dist_sync_on_step=dist_sync_on_step,
+            process_group=process_group,
+            dist_sync_fn=dist_sync_fn,
+        )
+
+        self.average = average
+
+    def compute(self) -> Tensor:
+        """Computes the precision score based on inputs passed in to ``update`` previously.
+
+        Return:
+            The shape of the returned tensor depends on the ``average`` parameter
+
+            - If ``average in ['micro', 'macro', 'weighted', 'samples']``, a one-element tensor will be returned
+            - If ``average in ['none', None]``, the shape will be ``(C,)``, where ``C`` stands  for the number
+              of classes
+        """
+        tp, fp, _, fn = self._get_final_stats()
+        return _precision_compute(tp, fp, fn, self.average, self.mdmc_reduce)
+
+
+class Recall(StatScores):
+    r"""
+    Computes `Recall`_:
+
+    .. math:: \text{Recall} = \frac{\text{TP}}{\text{TP} + \text{FN}}
+
+    Where :math:`\text{TP}` and :math:`\text{FN}` represent the number of true positives and
+    false negatives respecitively. With the use of ``top_k`` parameter, this metric can
+    generalize to Recall@K.
+
+    The reduction method (how the recall scores are aggregated) is controlled by the
+    ``average`` parameter, and additionally by the ``mdmc_average`` parameter in the
+    multi-dimensional multi-class case. Accepts all inputs listed in :ref:`references/modules:input types`.
+
+    Args:
+        num_classes:
+            Number of classes. Necessary for ``'macro'``, ``'weighted'`` and ``None`` average methods.
+        threshold:
+            Threshold for transforming probability or logit predictions to binary (0,1) predictions, in the case
+            of binary or multi-label inputs. Default value of 0.5 corresponds to input being probabilities.
+        average:
+            Defines the reduction that is applied. Should be one of the following:
+
+            - ``'micro'`` [default]: Calculate the metric globally, across all samples and classes.
+            - ``'macro'``: Calculate the metric for each class separately, and average the
+              metrics across classes (with equal weights for each class).
+            - ``'weighted'``: Calculate the metric for each class separately, and average the
+              metrics across classes, weighting each class by its support (``tp + fn``).
+            - ``'none'`` or ``None``: Calculate the metric for each class separately, and return
+              the metric for every class.
+            - ``'samples'``: Calculate the metric for each sample, and average the metrics
+              across samples (with equal weights for each sample).
+
+            .. note:: What is considered a sample in the multi-dimensional multi-class case
+                depends on the value of ``mdmc_average``.
+
+        mdmc_average:
+            Defines how averaging is done for multi-dimensional multi-class inputs (on top of the
+            ``average`` parameter). Should be one of the following:
+
+            - ``None`` [default]: Should be left unchanged if your data is not multi-dimensional
+              multi-class.
+
+            - ``'samplewise'``: In this case, the statistics are computed separately for each
+              sample on the ``N`` axis, and then averaged over samples.
+              The computation for each sample is done by treating the flattened extra axes ``...``
+              (see :ref:`references/modules:input types`) as the ``N`` dimension within the sample,
+              and computing the metric for the sample based on that.
+
+            - ``'global'``: In this case the ``N`` and ``...`` dimensions of the inputs
+              (see :ref:`references/modules:input types`)
+              are flattened into a new ``N_X`` sample axis, i.e. the inputs are treated as if they
+              were ``(N_X, C)``. From here on the ``average`` parameter applies as usual.
+
+        ignore_index:
+            Integer specifying a target class to ignore. If given, this class index does not contribute
+            to the returned score, regardless of reduction method. If an index is ignored, and ``average=None``
+            or ``'none'``, the score for the ignored class will be returned as ``nan``.
+
+        top_k:
+            Number of highest probability or logit score predictions considered to find the correct label,
+            relevant only for (multi-dimensional) multi-class. The
+            default value (``None``) will be interpreted as 1 for these inputs.
+
+            Should be left at default (``None``) for all other types of inputs.
+
+        multiclass:
+            Used only in certain special cases, where you want to treat inputs as a different type
+            than what they appear to be. See the parameter's
+            :ref:`documentation section <references/modules:using the multiclass parameter>`
+            for a more detailed explanation and examples.
+
+        compute_on_step:
+            Forward only calls ``update()`` and return ``None`` if this is set to ``False``.
+        dist_sync_on_step:
+            Synchronize metric state across processes at each ``forward()``
+            before returning the value at the step
+        process_group:
+            Specify the process group on which synchronization is called.
+            default: ``None`` (which selects the entire world)
+        dist_sync_fn:
+            Callback that performs the allgather operation on the metric state. When ``None``, DDP
+            will be used to perform the allgather.
+
+    Raises:
+        ValueError:
+            If ``average`` is none of ``"micro"``, ``"macro"``, ``"weighted"``, ``"samples"``, ``"none"``, ``None``.
+
+    Example:
+        >>> from paddlemetrics import Recall
+        >>> preds  = B.tensor([2, 0, 2, 1])
+        >>> target = B.tensor([1, 1, 2, 0])
+        >>> recall = Recall(average='macro', num_classes=3)
+        >>> recall(preds, target)
+        tensor(0.3333)
+        >>> recall = Recall(average='micro')
+        >>> recall(preds, target)
+        tensor(0.2500)
+
+    """
+    is_differentiable = False
+
+    def __init__(
+        self,
+        num_classes: Optional[int] = None,
+        threshold: float = 0.5,
+        average: str = "micro",
+        mdmc_average: Optional[str] = None,
+        ignore_index: Optional[int] = None,
+        top_k: Optional[int] = None,
+        multiclass: Optional[bool] = None,
+        compute_on_step: bool = True,
+        dist_sync_on_step: bool = False,
+        process_group: Optional[Any] = None,
+        dist_sync_fn: Callable = None,
+    ) -> None:
+        allowed_average = ["micro", "macro", "weighted", "samples", "none", None]
+        if average not in allowed_average:
+            raise ValueError(f"The `average` has to be one of {allowed_average}, got {average}.")
+
+        super().__init__(
+            reduce="macro" if average in ["weighted", "none", None] else average,
+            mdmc_reduce=mdmc_average,
+            threshold=threshold,
+            top_k=top_k,
+            num_classes=num_classes,
+            multiclass=multiclass,
+            ignore_index=ignore_index,
+            compute_on_step=compute_on_step,
+            dist_sync_on_step=dist_sync_on_step,
+            process_group=process_group,
+            dist_sync_fn=dist_sync_fn,
+        )
+
+        self.average = average
+
+    def compute(self) -> Tensor:
+        """Computes the recall score based on inputs passed in to ``update`` previously.
+
+        Return:
+            The shape of the returned tensor depends on the ``average`` parameter
+
+            - If ``average in ['micro', 'macro', 'weighted', 'samples']``, a one-element tensor will be returned
+            - If ``average in ['none', None]``, the shape will be ``(C,)``, where ``C`` stands  for the number
+              of classes
+        """
+        tp, fp, _, fn = self._get_final_stats()
+        return _recall_compute(tp, fp, fn, self.average, self.mdmc_reduce)
diff --git a/RE/paddlemetric/src/paddlemetrics/classification/precision_recall_curve.py b/RE/paddlemetric/src/paddlemetrics/classification/precision_recall_curve.py
new file mode 100644
index 00000000..34141909
--- /dev/null
+++ b/RE/paddlemetric/src/paddlemetrics/classification/precision_recall_curve.py
@@ -0,0 +1,149 @@
+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from typing import Any, List, Optional, Tuple, Union
+
+import paddleext.torchapi as B
+from paddleext.torchapi import  Tensor
+
+from paddlemetrics.functional.classification.precision_recall_curve import (
+    _precision_recall_curve_compute,
+    _precision_recall_curve_update,
+)
+from paddlemetrics.metric import Metric
+from paddlemetrics.utilities import rank_zero_warn
+from paddlemetrics.utilities.data import dim_zero_cat
+
+
+class PrecisionRecallCurve(Metric):
+    """Computes precision-recall pairs for different thresholds. Works for both binary and multiclass problems. In
+    the case of multiclass, the values will be calculated based on a one-vs-the-rest approach.
+
+    Forward accepts
+
+    - ``preds`` (float tensor): ``(N, ...)`` (binary) or ``(N, C, ...)`` (multiclass) tensor
+      with probabilities, where C is the number of classes.
+
+    - ``target`` (long tensor): ``(N, ...)`` or ``(N, C, ...)`` with integer labels
+
+    Args:
+        num_classes: integer with number of classes for multi-label and multiclass problems.
+            Should be set to ``None`` for binary problems
+        pos_label: integer determining the positive class. Default is ``None``
+            which for binary problem is translate to 1. For multiclass problems
+            this argument should not be set as we iteratively change it in the
+            range [0,num_classes-1]
+        compute_on_step:
+            Forward only calls ``update()`` and return None if this is set to False. default: True
+        dist_sync_on_step:
+            Synchronize metric state across processes at each ``forward()``
+            before returning the value at the step. default: False
+        process_group:
+            Specify the process group on which synchronization is called. default: None (which selects the entire world)
+
+    Example (binary case):
+        >>> from paddlemetrics import PrecisionRecallCurve
+        >>> pred = B.tensor([0, 1, 2, 3])
+        >>> target = B.tensor([0, 1, 1, 0])
+        >>> pr_curve = PrecisionRecallCurve(pos_label=1)
+        >>> precision, recall, thresholds = pr_curve(pred, target)
+        >>> precision
+        tensor([0.6667, 0.5000, 0.0000, 1.0000])
+        >>> recall
+        tensor([1.0000, 0.5000, 0.0000, 0.0000])
+        >>> thresholds
+        tensor([1, 2, 3])
+
+    Example (multiclass case):
+        >>> pred = B.tensor([[0.75, 0.05, 0.05, 0.05, 0.05],
+        ...                      [0.05, 0.75, 0.05, 0.05, 0.05],
+        ...                      [0.05, 0.05, 0.75, 0.05, 0.05],
+        ...                      [0.05, 0.05, 0.05, 0.75, 0.05]])
+        >>> target = B.tensor([0, 1, 3, 2])
+        >>> pr_curve = PrecisionRecallCurve(num_classes=5)
+        >>> precision, recall, thresholds = pr_curve(pred, target)
+        >>> precision   # doctest: +NORMALIZE_WHITESPACE
+        [tensor([1., 1.]), tensor([1., 1.]), tensor([0.2500, 0.0000, 1.0000]),
+         tensor([0.2500, 0.0000, 1.0000]), tensor([0., 1.])]
+        >>> recall
+        [tensor([1., 0.]), tensor([1., 0.]), tensor([1., 0., 0.]), tensor([1., 0., 0.]), tensor([nan, 0.])]
+        >>> thresholds
+        [tensor([0.7500]), tensor([0.7500]), tensor([0.0500, 0.7500]), tensor([0.0500, 0.7500]), tensor([0.0500])]
+    """
+
+    is_differentiable = False
+    preds: List[Tensor]
+    target: List[Tensor]
+
+    def __init__(
+        self,
+        num_classes: Optional[int] = None,
+        pos_label: Optional[int] = None,
+        compute_on_step: bool = True,
+        dist_sync_on_step: bool = False,
+        process_group: Optional[Any] = None,
+    ) -> None:
+        super().__init__(
+            compute_on_step=compute_on_step,
+            dist_sync_on_step=dist_sync_on_step,
+            process_group=process_group,
+        )
+
+        self.num_classes = num_classes
+        self.pos_label = pos_label
+
+        self.add_state("preds", default=[], dist_reduce_fx="cat")
+        self.add_state("target", default=[], dist_reduce_fx="cat")
+
+        rank_zero_warn(
+            "Metric `PrecisionRecallCurve` will save all targets and predictions in buffer."
+            " For large datasets this may lead to large memory footprint."
+        )
+
+    def update(self, preds: Tensor, target: Tensor) -> None:  # type: ignore
+        """Update state with predictions and targets.
+
+        Args:
+            preds: Predictions from model
+            target: Ground truth values
+        """
+        preds, target, num_classes, pos_label = _precision_recall_curve_update(
+            preds, target, self.num_classes, self.pos_label
+        )
+        self.preds.append(preds)
+        self.target.append(target)
+        self.num_classes = num_classes
+        self.pos_label = pos_label
+
+    def compute(self) -> Union[Tuple[Tensor, Tensor, Tensor], Tuple[List[Tensor], List[Tensor], List[Tensor]]]:
+        """Compute the precision-recall curve.
+
+        Returns:
+            3-element tuple containing
+
+            precision:
+                tensor where element i is the precision of predictions with
+                score >= thresholds[i] and the last element is 1.
+                If multiclass, this is a list of such tensors, one for each class.
+            recall:
+                tensor where element i is the recall of predictions with
+                score >= thresholds[i] and the last element is 0.
+                If multiclass, this is a list of such tensors, one for each class.
+            thresholds:
+                Thresholds used for computing precision/recall scores
+        """
+        preds = dim_zero_cat(self.preds)
+        target = dim_zero_cat(self.target)
+        if not self.num_classes:
+            raise ValueError(f"`num_classes` bas to be positive number, but got {self.num_classes}")
+        return _precision_recall_curve_compute(preds, target, self.num_classes, self.pos_label)
diff --git a/RE/paddlemetric/src/paddlemetrics/classification/roc.py b/RE/paddlemetric/src/paddlemetrics/classification/roc.py
new file mode 100644
index 00000000..a01a5b94
--- /dev/null
+++ b/RE/paddlemetric/src/paddlemetrics/classification/roc.py
@@ -0,0 +1,169 @@
+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from typing import Any, Callable, List, Optional, Tuple, Union
+
+import paddleext.torchapi as B
+from paddleext.torchapi import  Tensor
+
+from paddlemetrics.functional.classification.roc import _roc_compute, _roc_update
+from paddlemetrics.metric import Metric
+from paddlemetrics.utilities import rank_zero_warn
+
+
+class ROC(Metric):
+    """Computes the Receiver Operating Characteristic (ROC). Works for both binary, multiclass and multilabel
+    problems. In the case of multiclass, the values will be calculated based on a one-vs-the-rest approach.
+
+    Forward accepts
+
+    - ``preds`` (float tensor): ``(N, ...)`` (binary) or ``(N, C, ...)`` (multiclass/multilabel) tensor
+      with probabilities, where C is the number of classes/labels.
+
+    - ``target`` (long tensor): ``(N, ...)`` or ``(N, C, ...)`` with integer labels
+
+    Args:
+        num_classes: integer with number of classes for multi-label and multiclass problems.
+            Should be set to ``None`` for binary problems
+        pos_label: integer determining the positive class. Default is ``None``
+            which for binary problem is translate to 1. For multiclass problems
+            this argument should not be set as we iteratively change it in the
+            range [0,num_classes-1]
+        compute_on_step:
+            Forward only calls ``update()`` and return None if this is set to False. default: True
+        dist_sync_on_step:
+            Synchronize metric state across processes at each ``forward()``
+            before returning the value at the step. default: False
+        process_group:
+            Specify the process group on which synchronization is called. default: None (which selects the entire world)
+        dist_sync_fn:
+            Callback that performs the allgather operation on the metric state. When ``None``, DDP
+            will be used to perform the allgather
+
+    Example (binary case):
+        >>> from paddlemetrics import ROC
+        >>> pred = B.tensor([0, 1, 2, 3])
+        >>> target = B.tensor([0, 1, 1, 1])
+        >>> roc = ROC(pos_label=1)
+        >>> fpr, tpr, thresholds = roc(pred, target)
+        >>> fpr
+        tensor([0., 0., 0., 0., 1.])
+        >>> tpr
+        tensor([0.0000, 0.3333, 0.6667, 1.0000, 1.0000])
+        >>> thresholds
+        tensor([4, 3, 2, 1, 0])
+
+    Example (multiclass case):
+        >>> pred = B.tensor([[0.75, 0.05, 0.05, 0.05],
+        ...                      [0.05, 0.75, 0.05, 0.05],
+        ...                      [0.05, 0.05, 0.75, 0.05],
+        ...                      [0.05, 0.05, 0.05, 0.75]])
+        >>> target = B.tensor([0, 1, 3, 2])
+        >>> roc = ROC(num_classes=4)
+        >>> fpr, tpr, thresholds = roc(pred, target)
+        >>> fpr
+        [tensor([0., 0., 1.]), tensor([0., 0., 1.]), tensor([0.0000, 0.3333, 1.0000]), tensor([0.0000, 0.3333, 1.0000])]
+        >>> tpr
+        [tensor([0., 1., 1.]), tensor([0., 1., 1.]), tensor([0., 0., 1.]), tensor([0., 0., 1.])]
+        >>> thresholds # doctest: +NORMALIZE_WHITESPACE
+        [tensor([1.7500, 0.7500, 0.0500]),
+         tensor([1.7500, 0.7500, 0.0500]),
+         tensor([1.7500, 0.7500, 0.0500]),
+         tensor([1.7500, 0.7500, 0.0500])]
+
+    Example (multilabel case):
+        >>> pred = B.tensor([[0.8191, 0.3680, 0.1138],
+        ...                      [0.3584, 0.7576, 0.1183],
+        ...                      [0.2286, 0.3468, 0.1338],
+        ...                      [0.8603, 0.0745, 0.1837]])
+        >>> target = B.tensor([[1, 1, 0], [0, 1, 0], [0, 0, 0], [0, 1, 1]])
+        >>> roc = ROC(num_classes=3, pos_label=1)
+        >>> fpr, tpr, thresholds = roc(pred, target)
+        >>> fpr # doctest: +NORMALIZE_WHITESPACE
+        [tensor([0.0000, 0.3333, 0.3333, 0.6667, 1.0000]),
+         tensor([0., 0., 0., 1., 1.]),
+         tensor([0.0000, 0.0000, 0.3333, 0.6667, 1.0000])]
+        >>> tpr  # doctest: +NORMALIZE_WHITESPACE
+        [tensor([0., 0., 1., 1., 1.]),
+         tensor([0.0000, 0.3333, 0.6667, 0.6667, 1.0000]),
+         tensor([0., 1., 1., 1., 1.])]
+        >>> thresholds # doctest: +NORMALIZE_WHITESPACE
+        [tensor([1.8603, 0.8603, 0.8191, 0.3584, 0.2286]),
+         tensor([1.7576, 0.7576, 0.3680, 0.3468, 0.0745]),
+         tensor([1.1837, 0.1837, 0.1338, 0.1183, 0.1138])]
+    """
+
+    is_differentiable = False
+    preds: List[Tensor]
+    target: List[Tensor]
+
+    def __init__(
+        self,
+        num_classes: Optional[int] = None,
+        pos_label: Optional[int] = None,
+        compute_on_step: bool = True,
+        dist_sync_on_step: bool = False,
+        process_group: Optional[Any] = None,
+        dist_sync_fn: Callable = None,
+    ) -> None:
+        super().__init__(
+            compute_on_step=compute_on_step,
+            dist_sync_on_step=dist_sync_on_step,
+            process_group=process_group,
+            dist_sync_fn=dist_sync_fn,
+        )
+
+        self.num_classes = num_classes
+        self.pos_label = pos_label
+
+        self.add_state("preds", default=[], dist_reduce_fx=None)
+        self.add_state("target", default=[], dist_reduce_fx=None)
+
+        rank_zero_warn(
+            "Metric `ROC` will save all targets and predictions in buffer."
+            " For large datasets this may lead to large memory footprint."
+        )
+
+    def update(self, preds: Tensor, target: Tensor) -> None:  # type: ignore
+        """Update state with predictions and targets.
+
+        Args:
+            preds: Predictions from model
+            target: Ground truth values
+        """
+        preds, target, num_classes, pos_label = _roc_update(preds, target, self.num_classes, self.pos_label)
+        self.preds.append(preds)
+        self.target.append(target)
+        self.num_classes = num_classes
+        self.pos_label = pos_label
+
+    def compute(self) -> Union[Tuple[Tensor, Tensor, Tensor], Tuple[List[Tensor], List[Tensor], List[Tensor]]]:
+        """Compute the receiver operating characteristic.
+
+        Returns:
+            3-element tuple containing
+
+            fpr:
+                tensor with false positive rates.
+                If multiclass, this is a list of such tensors, one for each class.
+            tpr:
+                tensor with true positive rates.
+                If multiclass, this is a list of such tensors, one for each class.
+            thresholds:
+                thresholds used for computing false- and true postive rates
+        """
+        preds = B.cat(self.preds, dim=0)
+        target = B.cat(self.target, dim=0)
+        if not self.num_classes:
+            raise ValueError(f"`num_classes` bas to be positive number, but got {self.num_classes}")
+        return _roc_compute(preds, target, self.num_classes, self.pos_label)
diff --git a/RE/paddlemetric/src/paddlemetrics/classification/specificity.py b/RE/paddlemetric/src/paddlemetrics/classification/specificity.py
new file mode 100644
index 00000000..0ad44268
--- /dev/null
+++ b/RE/paddlemetric/src/paddlemetrics/classification/specificity.py
@@ -0,0 +1,171 @@
+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from typing import Any, Callable, Optional
+
+import paddleext.torchapi as B
+from paddleext.torchapi import  Tensor
+
+from paddlemetrics.classification.stat_scores import StatScores
+from paddlemetrics.functional.classification.specificity import _specificity_compute
+
+
+class Specificity(StatScores):
+    r"""
+    Computes `Specificity`_:
+
+    .. math:: \text{Specificity} = \frac{\text{TN}}{\text{TN} + \text{FP}}
+
+    Where :math:`\text{TN}` and :math:`\text{FP}` represent the number of true negatives and
+    false positives respecitively. With the use of ``top_k`` parameter, this metric can
+    generalize to Specificity@K.
+
+    The reduction method (how the specificity scores are aggregated) is controlled by the
+    ``average`` parameter, and additionally by the ``mdmc_average`` parameter in the
+    multi-dimensional multi-class case. Accepts all inputs listed in :ref:`references/modules:input types`.
+
+    Args:
+        num_classes:
+            Number of classes. Necessary for ``'macro'``, ``'weighted'`` and ``None`` average methods.
+        threshold:
+            Threshold probability value for transforming probability predictions to binary
+            (0,1) predictions, in the case of binary or multi-label inputs.
+        average:
+            Defines the reduction that is applied. Should be one of the following:
+
+            - ``'micro'`` [default]: Calculate the metric globally, across all samples and classes.
+            - ``'macro'``: Calculate the metric for each class separately, and average the
+              metrics across classes (with equal weights for each class).
+            - ``'weighted'``: Calculate the metric for each class separately, and average the
+              metrics across classes, weighting each class by its support (``tn + fp``).
+            - ``'none'`` or ``None``: Calculate the metric for each class separately, and return
+              the metric for every class.
+            - ``'samples'``: Calculate the metric for each sample, and average the metrics
+              across samples (with equal weights for each sample).
+
+            .. note:: What is considered a sample in the multi-dimensional multi-class case
+                depends on the value of ``mdmc_average``.
+
+        mdmc_average:
+            Defines how averaging is done for multi-dimensional multi-class inputs (on top of the
+            ``average`` parameter). Should be one of the following:
+
+            - ``None`` [default]: Should be left unchanged if your data is not multi-dimensional
+              multi-class.
+
+            - ``'samplewise'``: In this case, the statistics are computed separately for each
+              sample on the ``N`` axis, and then averaged over samples.
+              The computation for each sample is done by treating the flattened extra axes ``...``
+              (see :ref:`references/modules:input types`) as the ``N`` dimension within the sample,
+              and computing the metric for the sample based on that.
+
+            - ``'global'``: In this case the ``N`` and ``...`` dimensions of the inputs
+              (see :ref:`references/modules:input types`)
+              are flattened into a new ``N_X`` sample axis, i.e. the inputs are treated as if they
+              were ``(N_X, C)``. From here on the ``average`` parameter applies as usual.
+
+        ignore_index:
+            Integer specifying a target class to ignore. If given, this class index does not contribute
+            to the returned score, regardless of reduction method. If an index is ignored, and ``average=None``
+            or ``'none'``, the score for the ignored class will be returned as ``nan``.
+
+        top_k:
+            Number of highest probability entries for each sample to convert to 1s - relevant
+            only for inputs with probability predictions. If this parameter is set for multi-label
+            inputs, it will take precedence over ``threshold``. For (multi-dim) multi-class inputs,
+            this parameter defaults to 1.
+
+            Should be left unset (``None``) for inputs with label predictions.
+
+        multiclass:
+            Used only in certain special cases, where you want to treat inputs as a different type
+            than what they appear to be. See the parameter's
+            :ref:`documentation section <references/modules:using the multiclass parameter>`
+            for a more detailed explanation and examples.
+
+        compute_on_step:
+            Forward only calls ``update()`` and return ``None`` if this is set to ``False``.
+        dist_sync_on_step:
+            Synchronize metric state across processes at each ``forward()``
+            before returning the value at the step
+        process_group:
+            Specify the process group on which synchronization is called.
+            default: ``None`` (which selects the entire world)
+        dist_sync_fn:
+            Callback that performs the allgather operation on the metric state. When ``None``, DDP
+            will be used to perform the allgather.
+
+    Raises:
+        ValueError:
+            If ``average`` is none of ``"micro"``, ``"macro"``, ``"weighted"``, ``"samples"``, ``"none"``, ``None``.
+
+    Example:
+        >>> from paddlemetrics import Specificity
+        >>> preds  = B.tensor([2, 0, 2, 1])
+        >>> target = B.tensor([1, 1, 2, 0])
+        >>> specificity = Specificity(average='macro', num_classes=3)
+        >>> specificity(preds, target)
+        tensor(0.6111)
+        >>> specificity = Specificity(average='micro')
+        >>> specificity(preds, target)
+        tensor(0.6250)
+
+    """
+    is_differentiable = False
+
+    def __init__(
+        self,
+        num_classes: Optional[int] = None,
+        threshold: float = 0.5,
+        average: str = "micro",
+        mdmc_average: Optional[str] = None,
+        ignore_index: Optional[int] = None,
+        top_k: Optional[int] = None,
+        multiclass: Optional[bool] = None,
+        compute_on_step: bool = True,
+        dist_sync_on_step: bool = False,
+        process_group: Optional[Any] = None,
+        dist_sync_fn: Callable = None,
+    ) -> None:
+        allowed_average = ["micro", "macro", "weighted", "samples", "none", None]
+        if average not in allowed_average:
+            raise ValueError(f"The `average` has to be one of {allowed_average}, got {average}.")
+
+        super().__init__(
+            reduce="macro" if average in ["weighted", "none", None] else average,
+            mdmc_reduce=mdmc_average,
+            threshold=threshold,
+            top_k=top_k,
+            num_classes=num_classes,
+            multiclass=multiclass,
+            ignore_index=ignore_index,
+            compute_on_step=compute_on_step,
+            dist_sync_on_step=dist_sync_on_step,
+            process_group=process_group,
+            dist_sync_fn=dist_sync_fn,
+        )
+
+        self.average = average
+
+    def compute(self) -> Tensor:
+        """Computes the specificity score based on inputs passed in to ``update`` previously.
+
+        Return:
+            The shape of the returned tensor depends on the ``average`` parameter
+
+            - If ``average in ['micro', 'macro', 'weighted', 'samples']``, a one-element tensor will be returned
+            - If ``average in ['none', None]``, the shape will be ``(C,)``, where ``C`` stands  for the number
+              of classes
+        """
+        tp, fp, tn, fn = self._get_final_stats()
+        return _specificity_compute(tp, fp, tn, fn, self.average, self.mdmc_reduce)
diff --git a/RE/paddlemetric/src/paddlemetrics/classification/stat_scores.py b/RE/paddlemetric/src/paddlemetrics/classification/stat_scores.py
new file mode 100644
index 00000000..ec099c86
--- /dev/null
+++ b/RE/paddlemetric/src/paddlemetrics/classification/stat_scores.py
@@ -0,0 +1,267 @@
+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from typing import Any, Callable, Optional, Tuple
+
+import paddleext.torchapi as B
+from paddleext.torchapi import Tensor
+
+from paddlemetrics.functional.classification.stat_scores import _stat_scores_compute, _stat_scores_update
+from paddlemetrics.metric import Metric
+from paddlemetrics.utilities.enums import AverageMethod, MDMCAverageMethod
+
+
+class StatScores(Metric):
+    r"""Computes the number of true positives, false positives, true negatives, false negatives.
+    Related to `Type I and Type II errors`_
+    and the `confusion matrix`_.
+
+    The reduction method (how the statistics are aggregated) is controlled by the
+    ``reduce`` parameter, and additionally by the ``mdmc_reduce`` parameter in the
+    multi-dimensional multi-class case.
+
+    Accepts all inputs listed in :ref:`references/modules:input types`.
+
+    Args:
+        threshold:
+            Threshold for transforming probability or logit predictions to binary (0,1) predictions, in the case
+            of binary or multi-label inputs. Default value of 0.5 corresponds to input being probabilities.
+
+        top_k:
+            Number of highest probability or logit score predictions considered to find the correct label,
+            relevant only for (multi-dimensional) multi-class inputs. The
+            default value (``None``) will be interpreted as 1 for these inputs.
+
+            Should be left at default (``None``) for all other types of inputs.
+
+        reduce:
+            Defines the reduction that is applied. Should be one of the following:
+
+            - ``'micro'`` [default]: Counts the statistics by summing over all [sample, class]
+              combinations (globally). Each statistic is represented by a single integer.
+            - ``'macro'``: Counts the statistics for each class separately (over all samples).
+              Each statistic is represented by a ``(C,)`` tensor. Requires ``num_classes``
+              to be set.
+            - ``'samples'``: Counts the statistics for each sample separately (over all classes).
+              Each statistic is represented by a ``(N, )`` 1d tensor.
+
+            .. note:: What is considered a sample in the multi-dimensional multi-class case
+                depends on the value of ``mdmc_reduce``.
+
+        num_classes:
+            Number of classes. Necessary for (multi-dimensional) multi-class or multi-label data.
+
+        ignore_index:
+            Specify a class (label) to ignore. If given, this class index does not contribute
+            to the returned score, regardless of reduction method. If an index is ignored, and
+            ``reduce='macro'``, the class statistics for the ignored class will all be returned
+            as ``-1``.
+
+        mdmc_reduce:
+            Defines how the multi-dimensional multi-class inputs are handeled. Should be
+            one of the following:
+
+            - ``None`` [default]: Should be left unchanged if your data is not multi-dimensional
+              multi-class (see :ref:`references/modules:input types` for the definition of input types).
+
+            - ``'samplewise'``: In this case, the statistics are computed separately for each
+              sample on the ``N`` axis, and then the outputs are concatenated together. In each
+              sample the extra axes ``...`` are flattened to become the sub-sample axis, and
+              statistics for each sample are computed by treating the sub-sample axis as the
+              ``N`` axis for that sample.
+
+            - ``'global'``: In this case the ``N`` and ``...`` dimensions of the inputs are
+              flattened into a new ``N_X`` sample axis, i.e. the inputs are treated as if they
+              were ``(N_X, C)``. From here on the ``reduce`` parameter applies as usual.
+
+        multiclass:
+            Used only in certain special cases, where you want to treat inputs as a different type
+            than what they appear to be. See the parameter's
+            :ref:`documentation section <references/modules:using the multiclass parameter>`
+            for a more detailed explanation and examples.
+
+        compute_on_step:
+            Forward only calls ``update()`` and return ``None`` if this is set to ``False``.
+        dist_sync_on_step:
+            Synchronize metric state across processes at each ``forward()``
+            before returning the value at the step
+        process_group:
+            Specify the process group on which synchronization is called.
+            default: ``None`` (which selects the entire world)
+        dist_sync_fn:
+            Callback that performs the allgather operation on the metric state. When ``None``, DDP
+            will be used to perform the allgather.
+
+    Raises:
+        ValueError:
+            If ``reduce`` is none of ``"micro"``, ``"macro"`` or ``"samples"``.
+        ValueError:
+            If ``mdmc_reduce`` is none of ``None``, ``"samplewise"``, ``"global"``.
+        ValueError:
+            If ``reduce`` is set to ``"macro"`` and ``num_classes`` is not provided.
+        ValueError:
+            If ``num_classes`` is set
+            and ``ignore_index`` is not in the range ``0`` <= ``ignore_index`` < ``num_classes``.
+
+    Example:
+        >>> from paddlemetrics.classification import StatScores
+        >>> preds  = B.tensor([1, 0, 2, 1])
+        >>> target = B.tensor([1, 1, 2, 0])
+        >>> stat_scores = StatScores(reduce='macro', num_classes=3)
+        >>> stat_scores(preds, target)
+        tensor([[0, 1, 2, 1, 1],
+                [1, 1, 1, 1, 2],
+                [1, 0, 3, 0, 1]])
+        >>> stat_scores = StatScores(reduce='micro')
+        >>> stat_scores(preds, target)
+        tensor([2, 2, 6, 2, 4])
+
+    """
+    is_differentiable = False
+    # TODO: canot be used because if scripting
+    # tp: Union[Tensor, List[Tensor]]
+    # fp: Union[Tensor, List[Tensor]]
+    # tn: Union[Tensor, List[Tensor]]
+    # fn: Union[Tensor, List[Tensor]]
+
+    def __init__(
+        self,
+        threshold: float = 0.5,
+        top_k: Optional[int] = None,
+        reduce: str = "micro",
+        num_classes: Optional[int] = None,
+        ignore_index: Optional[int] = None,
+        mdmc_reduce: Optional[str] = None,
+        multiclass: Optional[bool] = None,
+        compute_on_step: bool = True,
+        dist_sync_on_step: bool = False,
+        process_group: Optional[Any] = None,
+        dist_sync_fn: Callable = None,
+    ) -> None:
+        super().__init__(
+            compute_on_step=compute_on_step,
+            dist_sync_on_step=dist_sync_on_step,
+            process_group=process_group,
+            dist_sync_fn=dist_sync_fn,
+        )
+
+        self.reduce = reduce
+        self.mdmc_reduce = mdmc_reduce
+        self.num_classes = num_classes
+        self.threshold = threshold
+        self.multiclass = multiclass
+        self.ignore_index = ignore_index
+        self.top_k = top_k
+
+        if reduce not in ["micro", "macro", "samples"]:
+            raise ValueError(f"The `reduce` {reduce} is not valid.")
+
+        if mdmc_reduce not in [None, "samplewise", "global"]:
+            raise ValueError(f"The `mdmc_reduce` {mdmc_reduce} is not valid.")
+
+        if reduce == "macro" and (not num_classes or num_classes < 1):
+            raise ValueError("When you set `reduce` as 'macro', you have to provide the number of classes.")
+
+        if num_classes and ignore_index is not None and (not 0 <= ignore_index < num_classes or num_classes == 1):
+            raise ValueError(f"The `ignore_index` {ignore_index} is not valid for inputs with {num_classes} classes")
+
+        default: Callable = lambda: []
+        reduce_fn: Optional[str] = None
+        if mdmc_reduce != "samplewise" and reduce != "samples":
+            if reduce == "micro":
+                zeros_shape = []
+            elif reduce == "macro":
+                zeros_shape = [num_classes]
+            else:
+                raise ValueError(f'Wrong reduce="{reduce}"')
+            default = lambda: B.zeros(zeros_shape, dtype=B.long)
+            reduce_fn = "sum"
+
+        for s in ("tp", "fp", "tn", "fn"):
+            self.add_state(s, default=default(), dist_reduce_fx=reduce_fn)
+
+    def update(self, preds: Tensor, target: Tensor) -> None:  # type: ignore
+        """Update state with predictions and targets. See
+        :ref:`references/modules:input types` for more information on input
+        types.
+
+        Args:
+            preds: Predictions from model (probabilities, logits or labels)
+            target: Ground truth values
+        """
+
+        tp, fp, tn, fn = _stat_scores_update(
+            preds,
+            target,
+            reduce=self.reduce,
+            mdmc_reduce=self.mdmc_reduce,
+            threshold=self.threshold,
+            num_classes=self.num_classes,
+            top_k=self.top_k,
+            multiclass=self.multiclass,
+            ignore_index=self.ignore_index,
+        )
+
+        # Update states
+        if self.reduce != AverageMethod.SAMPLES and self.mdmc_reduce != MDMCAverageMethod.SAMPLEWISE:
+            self.tp += tp
+            self.fp += fp
+            self.tn += tn
+            self.fn += fn
+        else:
+            self.tp.append(tp)
+            self.fp.append(fp)
+            self.tn.append(tn)
+            self.fn.append(fn)
+
+    def _get_final_stats(self) -> Tuple[Tensor, Tensor, Tensor, Tensor]:
+        """Performs concatenation on the stat scores if neccesary, before passing them to a compute function."""
+        tp = B.cat(self.tp) if isinstance(self.tp, list) else self.tp
+        fp = B.cat(self.fp) if isinstance(self.fp, list) else self.fp
+        tn = B.cat(self.tn) if isinstance(self.tn, list) else self.tn
+        fn = B.cat(self.fn) if isinstance(self.fn, list) else self.fn
+        return tp, fp, tn, fn
+
+    def compute(self) -> Tensor:
+        """Computes the stat scores based on inputs passed in to ``update`` previously.
+
+        Return:
+            The metric returns a tensor of shape ``(..., 5)``, where the last dimension corresponds
+            to ``[tp, fp, tn, fn, sup]`` (``sup`` stands for support and equals ``tp + fn``). The
+            shape depends on the ``reduce`` and ``mdmc_reduce`` (in case of multi-dimensional
+            multi-class data) parameters:
+
+            - If the data is not multi-dimensional multi-class, then
+
+              - If ``reduce='micro'``, the shape will be ``(5, )``
+              - If ``reduce='macro'``, the shape will be ``(C, 5)``,
+                where ``C`` stands for the number of classes
+              - If ``reduce='samples'``, the shape will be ``(N, 5)``, where ``N`` stands for
+                the number of samples
+
+            - If the data is multi-dimensional multi-class and ``mdmc_reduce='global'``, then
+
+              - If ``reduce='micro'``, the shape will be ``(5, )``
+              - If ``reduce='macro'``, the shape will be ``(C, 5)``
+              - If ``reduce='samples'``, the shape will be ``(N*X, 5)``, where ``X`` stands for
+                the product of sizes of all "extra" dimensions of the data (i.e. all dimensions
+                except for ``C`` and ``N``)
+
+            - If the data is multi-dimensional multi-class and ``mdmc_reduce='samplewise'``, then
+
+              - If ``reduce='micro'``, the shape will be ``(N, 5)``
+              - If ``reduce='macro'``, the shape will be ``(N, C, 5)``
+              - If ``reduce='samples'``, the shape will be ``(N, X, 5)``
+        """
+        tp, fp, tn, fn = self._get_final_stats()
+        return _stat_scores_compute(tp, fp, tn, fn)
diff --git a/RE/paddlemetric/src/paddlemetrics/collections.py b/RE/paddlemetric/src/paddlemetrics/collections.py
new file mode 100644
index 00000000..3b03856e
--- /dev/null
+++ b/RE/paddlemetric/src/paddlemetrics/collections.py
@@ -0,0 +1,239 @@
+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from collections import OrderedDict
+from copy import deepcopy
+from typing import Any, Dict, Hashable, Iterable, Optional, Sequence, Tuple, Union
+
+import paddleext.torchapi as B
+from paddleext.torchapi import nn
+
+from paddlemetrics.metric import Metric
+from paddlemetrics.utilities import rank_zero_warn
+
+
+class MetricCollection(nn.ModuleDict):
+    """MetricCollection class can be used to chain metrics that have the same call pattern into one single class.
+
+    Args:
+        metrics: One of the following
+
+            * list or tuple (sequence): if metrics are passed in as a list or tuple, will use the metrics class name
+              as key for output dict. Therefore, two metrics of the same class cannot be chained this way.
+
+            * arguments: similar to passing in as a list, metrics passed in as arguments will use their metric
+              class name as key for the output dict.
+
+            * dict: if metrics are passed in as a dict, will use each key in the dict as key for output dict.
+              Use this format if you want to chain together multiple of the same metric with different parameters.
+              Note that the keys in the output dict will be sorted alphabetically.
+
+        prefix: a string to append in front of the keys of the output dict
+
+        postfix: a string to append after the keys of the output dict
+
+    Raises:
+        ValueError:
+            If one of the elements of ``metrics`` is not an instance of ``pl.metrics.Metric``.
+        ValueError:
+            If two elements in ``metrics`` have the same ``name``.
+        ValueError:
+            If ``metrics`` is not a ``list``, ``tuple`` or a ``dict``.
+        ValueError:
+            If ``metrics`` is ``dict`` and additional_metrics are passed in.
+        ValueError:
+            If ``prefix`` is set and it is not a string.
+        ValueError:
+            If ``postfix`` is set and it is not a string.
+
+    Example (input as list):
+        >>> import torchapi as B
+        >>> from pprint import pprint
+        >>> from paddlemetrics import MetricCollection, Accuracy, Precision, Recall
+        >>> target = B.tensor([0, 2, 0, 2, 0, 1, 0, 2])
+        >>> preds = B.tensor([2, 1, 2, 0, 1, 2, 2, 2])
+        >>> metrics = MetricCollection([Accuracy(),
+        ...                             Precision(num_classes=3, average='macro'),
+        ...                             Recall(num_classes=3, average='macro')])
+        >>> metrics(preds, target)
+        {'Accuracy': tensor(0.1250), 'Precision': tensor(0.0667), 'Recall': tensor(0.1111)}
+
+    Example (input as arguments):
+        >>> metrics = MetricCollection(Accuracy(), Precision(num_classes=3, average='macro'),
+        ...                            Recall(num_classes=3, average='macro'))
+        >>> metrics(preds, target)
+        {'Accuracy': tensor(0.1250), 'Precision': tensor(0.0667), 'Recall': tensor(0.1111)}
+
+    Example (input as dict):
+        >>> metrics = MetricCollection({'micro_recall': Recall(num_classes=3, average='micro'),
+        ...                             'macro_recall': Recall(num_classes=3, average='macro')})
+        >>> same_metric = metrics.clone()
+        >>> pprint(metrics(preds, target))
+        {'macro_recall': tensor(0.1111), 'micro_recall': tensor(0.1250)}
+        >>> pprint(same_metric(preds, target))
+        {'macro_recall': tensor(0.1111), 'micro_recall': tensor(0.1250)}
+        >>> metrics.persistent()
+    """
+
+    def __init__(
+        self,
+        metrics: Union[Metric, Sequence[Metric], Dict[str, Metric]],
+        *additional_metrics: Metric,
+        prefix: Optional[str] = None,
+        postfix: Optional[str] = None,
+    ) -> None:
+        super().__init__()
+
+        self._modules = self._sub_layers
+
+        self.add_metrics(metrics, *additional_metrics)
+
+        self.prefix = self._check_arg(prefix, "prefix")
+        self.postfix = self._check_arg(postfix, "postfix")
+
+    def forward(self, *args: Any, **kwargs: Any) -> Dict[str, Any]:
+        """Iteratively call forward for each metric.
+
+        Positional arguments (args) will be passed to every metric in the collection, while keyword arguments (kwargs)
+        will be filtered based on the signature of the individual metric.
+        """
+        return {k: m(*args, **m._filter_kwargs(**kwargs)) for k, m in self.items()}
+
+    def update(self, *args: Any, **kwargs: Any) -> None:
+        """Iteratively call update for each metric.
+
+        Positional arguments (args) will be passed to every metric in the collection, while keyword arguments (kwargs)
+        will be filtered based on the signature of the individual metric.
+        """
+        for _, m in self.items(keep_base=True):
+            m_kwargs = m._filter_kwargs(**kwargs)
+            m.update(*args, **m_kwargs)
+
+    def compute(self) -> Dict[str, Any]:
+        return {k: m.compute() for k, m in self.items()}
+
+    def reset(self) -> None:
+        """Iteratively call reset for each metric."""
+        for _, m in self.items(keep_base=True):
+            m.reset()
+
+    def clone(self, prefix: Optional[str] = None, postfix: Optional[str] = None) -> "MetricCollection":
+        """Make a copy of the metric collection
+        Args:
+            prefix: a string to append in front of the metric keys
+            postfix: a string to append after the keys of the output dict
+
+        """
+        mc = deepcopy(self)
+        if prefix:
+            mc.prefix = self._check_arg(prefix, "prefix")
+        if postfix:
+            mc.postfix = self._check_arg(postfix, "postfix")
+        return mc
+
+    def persistent(self, mode: bool = True) -> None:
+        """Method for post-init to change if metric states should be saved to its state_dict."""
+        for _, m in self.items(keep_base=True):
+            m.persistent(mode)
+
+    def add_metrics(
+        self, metrics: Union[Metric, Sequence[Metric], Dict[str, Metric]], *additional_metrics: Metric
+    ) -> None:
+        """Add new metrics to Metric Collection."""
+        if isinstance(metrics, Metric):
+            # set compatible with original type expectations
+            metrics = [metrics]
+        if isinstance(metrics, Sequence):
+            # prepare for optional additions
+            metrics = list(metrics)
+            remain: list = []
+            for m in additional_metrics:
+                (metrics if isinstance(m, Metric) else remain).append(m)
+
+            if remain:
+                rank_zero_warn(
+                    f"You have passes extra arguments {remain} which are not `Metric` so they will be ignored."
+                )
+        elif additional_metrics:
+            raise ValueError(
+                f"You have passes extra arguments {additional_metrics} which are not compatible"
+                f" with first passed dictionary {metrics} so they will be ignored."
+            )
+
+        if isinstance(metrics, dict):
+            # Check all values are metrics
+            # Make sure that metrics are added in deterministic order
+            for name in sorted(metrics.keys()):
+                metric = metrics[name]
+                if not isinstance(metric, Metric):
+                    raise ValueError(
+                        f"Value {metric} belonging to key {name} is not an instance of `pl.metrics.Metric`"
+                    )
+                self[name] = metric
+        elif isinstance(metrics, Sequence):
+            for metric in metrics:
+                if not isinstance(metric, Metric):
+                    raise ValueError(f"Input {metric} to `MetricCollection` is not a instance of `pl.metrics.Metric`")
+                name = metric.__class__.__name__
+                if name in self:
+                    raise ValueError(f"Encountered two metrics both named {name}")
+                self[name] = metric
+        else:
+            raise ValueError("Unknown input to MetricCollection.")
+
+    def _set_name(self, base: str) -> str:
+        name = base if self.prefix is None else self.prefix + base
+        name = name if self.postfix is None else name + self.postfix
+        return name
+
+    def _to_renamed_ordered_dict(self) -> OrderedDict:
+        od = OrderedDict()
+        for k, v in self._modules.items():
+            od[self._set_name(k)] = v
+        return od
+
+    def keys(self, keep_base: bool = False) -> Iterable[Hashable]:
+        r"""Return an iterable of the ModuleDict key.
+        Args:
+            keep_base: Whether to add prefix/postfix on the items collection.
+        """
+        if keep_base:
+            return self._modules.keys()
+        return self._to_renamed_ordered_dict().keys()
+
+    def items(self, keep_base: bool = False) -> Iterable[Tuple[str, nn.Module]]:
+        r"""Return an iterable of the ModuleDict key/value pairs.
+        Args:
+            keep_base: Whether to add prefix/postfix on the items collection.
+        """
+        if keep_base:
+            return self._modules.items()
+        return self._to_renamed_ordered_dict().items()
+
+    @staticmethod
+    def _check_arg(arg: Optional[str], name: str) -> Optional[str]:
+        if arg is None or isinstance(arg, str):
+            return arg
+        raise ValueError(f"Expected input `{name}` to be a string, but got {type(arg)}")
+
+    def __repr__(self) -> str:
+        repr_str = super().__repr__()[:-2]
+        if self.prefix:
+            repr_str += f",\n  prefix={self.prefix}{',' if self.postfix else ''}"
+        if self.postfix:
+            repr_str += f"{',' if not self.prefix else ''}\n  postfix={self.postfix}"
+        return repr_str + "\n)"
+
+    def to(self, device):
+        pass
\ No newline at end of file
diff --git a/RE/paddlemetric/src/paddlemetrics/functional/__init__.py b/RE/paddlemetric/src/paddlemetrics/functional/__init__.py
new file mode 100644
index 00000000..365d93c9
--- /dev/null
+++ b/RE/paddlemetric/src/paddlemetrics/functional/__init__.py
@@ -0,0 +1,138 @@
+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from paddlemetrics.functional.audio.pesq import pesq
+from paddlemetrics.functional.audio.pit import pit, pit_permutate
+from paddlemetrics.functional.audio.si_sdr import si_sdr
+from paddlemetrics.functional.audio.si_snr import si_snr
+from paddlemetrics.functional.audio.snr import snr
+from paddlemetrics.functional.audio.stoi import stoi
+from paddlemetrics.functional.classification.accuracy import accuracy
+from paddlemetrics.functional.classification.auc import auc
+from paddlemetrics.functional.classification.auroc import auroc
+from paddlemetrics.functional.classification.average_precision import average_precision
+from paddlemetrics.functional.classification.calibration_error import calibration_error
+from paddlemetrics.functional.classification.cohen_kappa import cohen_kappa
+from paddlemetrics.functional.classification.confusion_matrix import confusion_matrix
+from paddlemetrics.functional.classification.dice import dice_score
+from paddlemetrics.functional.classification.f_beta import f1, fbeta
+from paddlemetrics.functional.classification.hamming_distance import hamming_distance
+from paddlemetrics.functional.classification.hinge import hinge
+from paddlemetrics.functional.classification.iou import iou
+from paddlemetrics.functional.classification.kl_divergence import kl_divergence
+from paddlemetrics.functional.classification.matthews_corrcoef import matthews_corrcoef
+from paddlemetrics.functional.classification.precision_recall import precision, precision_recall, recall
+from paddlemetrics.functional.classification.precision_recall_curve import precision_recall_curve
+from paddlemetrics.functional.classification.roc import roc
+from paddlemetrics.functional.classification.specificity import specificity
+from paddlemetrics.functional.classification.stat_scores import stat_scores
+from paddlemetrics.functional.image.gradients import image_gradients
+from paddlemetrics.functional.image.psnr import psnr
+from paddlemetrics.functional.image.ssim import ssim
+from paddlemetrics.functional.pairwise.cosine import pairwise_cosine_similarity
+from paddlemetrics.functional.pairwise.euclidean import pairwise_euclidean_distance
+from paddlemetrics.functional.pairwise.linear import pairwise_linear_similarity
+from paddlemetrics.functional.pairwise.manhatten import pairwise_manhatten_distance
+from paddlemetrics.functional.regression.cosine_similarity import cosine_similarity
+from paddlemetrics.functional.regression.explained_variance import explained_variance
+from paddlemetrics.functional.regression.mean_absolute_error import mean_absolute_error
+from paddlemetrics.functional.regression.mean_absolute_percentage_error import mean_absolute_percentage_error
+from paddlemetrics.functional.regression.mean_squared_error import mean_squared_error
+from paddlemetrics.functional.regression.mean_squared_log_error import mean_squared_log_error
+from paddlemetrics.functional.regression.pearson import pearson_corrcoef
+from paddlemetrics.functional.regression.r2 import r2_score
+from paddlemetrics.functional.regression.spearman import spearman_corrcoef
+from paddlemetrics.functional.regression.symmetric_mean_absolute_percentage_error import (
+    symmetric_mean_absolute_percentage_error,
+)
+from paddlemetrics.functional.regression.tweedie_deviance import tweedie_deviance_score
+from paddlemetrics.functional.retrieval.average_precision import retrieval_average_precision
+from paddlemetrics.functional.retrieval.fall_out import retrieval_fall_out
+from paddlemetrics.functional.retrieval.hit_rate import retrieval_hit_rate
+from paddlemetrics.functional.retrieval.ndcg import retrieval_normalized_dcg
+from paddlemetrics.functional.retrieval.precision import retrieval_precision
+from paddlemetrics.functional.retrieval.r_precision import retrieval_r_precision
+from paddlemetrics.functional.retrieval.recall import retrieval_recall
+from paddlemetrics.functional.retrieval.reciprocal_rank import retrieval_reciprocal_rank
+from paddlemetrics.functional.self_supervised import embedding_similarity
+#from paddlemetrics.functional.text.bert import bert_score
+from paddlemetrics.functional.text.bleu import bleu_score
+from paddlemetrics.functional.text.rouge import rouge_score
+from paddlemetrics.functional.text.sacre_bleu import sacre_bleu_score
+from paddlemetrics.functional.text.wer import wer
+
+__all__ = [
+    "accuracy",
+    "auc",
+    "auroc",
+    "average_precision",
+#    "bert_score",
+    "bleu_score",
+    "calibration_error",
+    "cohen_kappa",
+    "confusion_matrix",
+    "cosine_similarity",
+    "tweedie_deviance_score",
+    "dice_score",
+    "embedding_similarity",
+    "explained_variance",
+    "f1",
+    "fbeta",
+    "hamming_distance",
+    "hinge",
+    "image_gradients",
+    "iou",
+    "kl_divergence",
+    "kldivergence",
+    "matthews_corrcoef",
+    "mean_absolute_error",
+    "mean_absolute_percentage_error",
+    "mean_squared_error",
+    "mean_squared_log_error",
+    "pairwise_cosine_similarity",
+    "pairwise_euclidean_distance",
+    "pairwise_linear_similarity",
+    "pairwise_manhatten_distance",
+    "pearson_corrcoef",
+    "pesq",
+    "pit",
+    "pit_permutate",
+    "precision",
+    "precision_recall",
+    "precision_recall_curve",
+    "psnr",
+    "r2_score",
+    "r2score",
+    "recall",
+    "retrieval_average_precision",
+    "retrieval_fall_out",
+    "retrieval_hit_rate",
+    "retrieval_normalized_dcg",
+    "retrieval_precision",
+    "retrieval_r_precision",
+    "retrieval_recall",
+    "retrieval_reciprocal_rank",
+    "roc",
+    "rouge_score",
+    "sacre_bleu_score",
+    "si_sdr",
+    "si_snr",
+    "snr",
+    "spearman_corrcoef",
+    "specificity",
+    "ssim",
+    "stat_scores",
+    "stoi",
+    "symmetric_mean_absolute_percentage_error",
+    "wer",
+]
diff --git a/RE/paddlemetric/src/paddlemetrics/functional/audio/__init__.py b/RE/paddlemetric/src/paddlemetrics/functional/audio/__init__.py
new file mode 100644
index 00000000..a7e7d89c
--- /dev/null
+++ b/RE/paddlemetric/src/paddlemetrics/functional/audio/__init__.py
@@ -0,0 +1,19 @@
+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from paddlemetrics.functional.audio.pesq import pesq  # noqa: F401
+from paddlemetrics.functional.audio.pit import pit, pit_permutate  # noqa: F401
+from paddlemetrics.functional.audio.si_sdr import si_sdr  # noqa: F401
+from paddlemetrics.functional.audio.si_snr import si_snr  # noqa: F401
+from paddlemetrics.functional.audio.snr import snr  # noqa: F401
+from paddlemetrics.functional.audio.stoi import stoi  # noqa: F401
diff --git a/RE/paddlemetric/src/paddlemetrics/functional/audio/pesq.py b/RE/paddlemetric/src/paddlemetrics/functional/audio/pesq.py
new file mode 100644
index 00000000..ab81723d
--- /dev/null
+++ b/RE/paddlemetric/src/paddlemetrics/functional/audio/pesq.py
@@ -0,0 +1,100 @@
+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import numpy as np
+
+from paddlemetrics.utilities.imports import _PESQ_AVAILABLE
+
+if _PESQ_AVAILABLE:
+    import pesq as pesq_backend
+else:
+    pesq_backend = None
+import paddleext.torchapi as B
+from paddleext.torchapi import  Tensor
+
+from paddlemetrics.utilities.checks import _check_same_shape
+
+
+def pesq(preds: Tensor, target: Tensor, fs: int, mode: str, keep_same_device: bool = False) -> Tensor:
+    r"""PESQ (Perceptual Evaluation of Speech Quality)
+
+    This is a wrapper for the ``pesq`` package [1]. Note that input will be moved to `cpu`
+    to perform the metric calculation.
+
+    .. note:: using this metrics requires you to have ``pesq`` install. Either install as ``pip install
+        paddlemetrics[audio]`` or ``pip install pesq``
+
+    Args:
+        preds:
+            shape ``[...,time]``
+        target:
+            shape ``[...,time]``
+        fs:
+            sampling frequency, should be 16000 or 8000 (Hz)
+        mode:
+            'wb' (wide-band) or 'nb' (narrow-band)
+        keep_same_device:
+            whether to move the pesq value to the device of preds
+
+    Returns:
+        pesq value of shape [...]
+
+    Raises:
+        ValueError:
+            If ``peqs`` package is not installed
+        ValueError:
+            If ``fs`` is not either  ``8000`` or ``16000``
+        ValueError:
+            If ``mode`` is not either ``"wb"`` or ``"nb"``
+
+    Example:
+        >>> from paddlemetrics.functional.audio import pesq
+        >>> import torchapi as B
+        >>> g = B.manual_seed(1)
+        >>> preds = B.randn(8000)
+        >>> target = B.randn(8000)
+        >>> pesq(preds, target, 8000, 'nb')
+        tensor(2.2076)
+        >>> pesq(preds, target, 16000, 'wb')
+        tensor(1.7359)
+
+    References:
+        [1] https://github.com/ludlows/python-pesq
+    """
+    if not _PESQ_AVAILABLE:
+        raise ValueError(
+            "PESQ metric requires that pesq is installed."
+            "Either install as `pip install paddlemetrics[audio]` or `pip install pesq`"
+        )
+    if fs not in (8000, 16000):
+        raise ValueError(f"Expected argument `fs` to either be 8000 or 16000 but got {fs}")
+    if mode not in ("wb", "nb"):
+        raise ValueError(f"Expected argument `mode` to either be 'wb' or 'nb' but got {mode}")
+    _check_same_shape(preds, target)
+
+    if preds.ndim == 1:
+        pesq_val_np = pesq_backend.pesq(fs, target.detach().cpu().numpy(), preds.detach().cpu().numpy(), mode)
+        pesq_val = B.tensor(pesq_val_np)
+    else:
+        preds_np = preds.reshape(-1, preds.shape[-1]).detach().cpu().numpy()
+        target_np = target.reshape(-1, preds.shape[-1]).detach().cpu().numpy()
+        pesq_val_np = np.empty(shape=(preds_np.shape[0]))
+        for b in range(preds_np.shape[0]):
+            pesq_val_np[b] = pesq_backend.pesq(fs, target_np[b, :], preds_np[b, :], mode)
+        pesq_val = B.from_numpy(pesq_val_np)
+        pesq_val = pesq_val.reshape(preds.shape[:-1])
+
+    if keep_same_device:
+        pesq_val = pesq_val.to(preds.device)
+
+    return pesq_val
diff --git a/RE/paddlemetric/src/paddlemetrics/functional/audio/pit.py b/RE/paddlemetric/src/paddlemetrics/functional/audio/pit.py
new file mode 100644
index 00000000..3ca729a2
--- /dev/null
+++ b/RE/paddlemetric/src/paddlemetrics/functional/audio/pit.py
@@ -0,0 +1,206 @@
+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import warnings
+from itertools import permutations
+from typing import Any, Callable, Dict, Tuple, Union
+
+import paddleext.torchapi as B
+from paddleext.torchapi import  Tensor
+
+from paddlemetrics.utilities.checks import _check_same_shape
+from paddlemetrics.utilities.imports import _SCIPY_AVAILABLE
+
+# _ps_dict: cache of permutations
+# it's necessary to cache it, otherwise it will consume a large amount of time
+_ps_dict: dict = {}  # _ps_dict[str(spk_num)+str(device)] = permutations
+
+
+def _find_best_perm_by_linear_sum_assignment(
+    metric_mtx: B.Tensor,
+    eval_func: Union[B.min, B.max],
+) -> Tuple[Tensor, Tensor]:
+    """Solves the linear sum assignment problem using scipy, and returns the best metric values and the
+    corresponding permutations.
+
+    Args:
+        metric_mtx:
+            the metric matrix, shape [batch_size, spk_num, spk_num]
+        eval_func:
+            the function to reduce the metric values of different the permutations
+
+    Returns:
+        best_metric:
+            shape [batch]
+        best_perm:
+            shape [batch, spk]
+    """
+    from scipy.optimize import linear_sum_assignment
+
+    mmtx = metric_mtx.detach().cpu()
+    best_perm = B.tensor([linear_sum_assignment(pwm, eval_func == B.max)[1] for pwm in mmtx])
+    best_perm = best_perm.to(metric_mtx.device)
+    best_metric = B.gather(metric_mtx, 2, best_perm[:, :, None]).mean([-1, -2])
+    return best_metric, best_perm  # shape [batch], shape [batch, spk]
+
+
+def _find_best_perm_by_exhuastive_method(
+    metric_mtx: B.Tensor,
+    eval_func: Union[B.min, B.max],
+) -> Tuple[Tensor, Tensor]:
+    """Solves the linear sum assignment problem using exhuastive method, i.e. exhuastively calculates the metric
+    values of all possible permutations, and returns the best metric values and the corresponding permutations.
+
+    Args:
+        metric_mtx:
+            the metric matrix, shape [batch_size, spk_num, spk_num]
+        eval_func:
+            the function to reduce the metric values of different the permutations
+
+    Returns:
+        best_metric:
+            shape [batch]
+        best_perm:
+            shape [batch, spk]
+    """
+    # create/read/cache the permutations and its indexes
+    # reading from cache would be much faster than creating in CPU then moving to GPU
+    batch_size, spk_num = metric_mtx.shape[:2]
+    key = str(spk_num) + str(metric_mtx.device)
+    if key not in _ps_dict:
+        # ps: all the permutations, shape [spk_num, perm_num]
+        # ps: In i-th permutation, the predcition corresponds to the j-th target is ps[j,i]
+        ps = B.tensor(list(permutations(range(spk_num))), device=metric_mtx.device).T
+        _ps_dict[key] = ps
+    else:
+        ps = _ps_dict[key]  # all the permutations, shape [spk_num, perm_num]
+
+    # find the metric of each permutation
+    perm_num = ps.shape[-1]
+    # shape [batch_size, spk_num, perm_num]
+    bps = ps[None, ...].expand(batch_size, spk_num, perm_num)
+    # shape [batch_size, spk_num, perm_num]
+    metric_of_ps_details = B.gather(metric_mtx, 2, bps)
+    # shape [batch_size, perm_num]
+    metric_of_ps = metric_of_ps_details.mean(dim=1)
+
+    # find the best metric and best permutation
+    best_metric, best_indexes = eval_func(metric_of_ps, dim=1)
+    best_indexes = best_indexes.detach()
+    best_perm = ps.T[best_indexes, :]
+    return best_metric, best_perm  # shape [batch], shape [batch, spk]
+
+
+def pit(
+    preds: B.Tensor, target: B.Tensor, metric_func: Callable, eval_func: str = "max", **kwargs: Dict[str, Any]
+) -> Tuple[Tensor, Tensor]:
+    """Permutation invariant training (PIT). The PIT implements the famous Permutation Invariant Training method.
+
+    [1] in speech separation field in order to calculate audio metrics in a permutation invariant way.
+
+    Args:
+        preds:
+            shape [batch, spk, ...]
+        target:
+            shape [batch, spk, ...]
+        metric_func:
+            a metric function accept a batch of target and estimate,
+            i.e. metric_func(preds[:, i, ...], target[:, j, ...]), and returns a batch of metric tensors [batch]
+        eval_func:
+            the function to find the best permutation, can be 'min' or 'max',
+            i.e. the smaller the better or the larger the better.
+        kwargs:
+            additional args for metric_func
+
+    Returns:
+        best_metric of shape [batch],
+        best_perm of shape [batch]
+
+    Example:
+        >>> from paddlemetrics.functional.audio import si_sdr
+        >>> # [batch, spk, time]
+        >>> preds = B.tensor([[[-0.0579,  0.3560, -0.9604], [-0.1719,  0.3205,  0.2951]]])
+        >>> target = B.tensor([[[ 1.0958, -0.1648,  0.5228], [-0.4100,  1.1942, -0.5103]]])
+        >>> best_metric, best_perm = pit(preds, target, si_sdr, 'max')
+        >>> best_metric
+        tensor([-5.1091])
+        >>> best_perm
+        tensor([[0, 1]])
+        >>> pit_permutate(preds, best_perm)
+        tensor([[[-0.0579,  0.3560, -0.9604],
+                 [-0.1719,  0.3205,  0.2951]]])
+
+    Reference:
+        [1]	`Permutation Invariant Training of Deep Models`_
+    """
+    _check_same_shape(preds, target)
+    if eval_func not in ["max", "min"]:
+        raise ValueError(f'eval_func can only be "max" or "min" but got {eval_func}')
+    if target.ndim < 2:
+        raise ValueError(f"Inputs must be of shape [batch, spk, ...], got {target.shape} and {preds.shape} instead")
+
+    # calculate the metric matrix
+    batch_size, spk_num = target.shape[0:2]
+    metric_mtx = None
+    for target_idx in range(spk_num):  # we have spk_num speeches in target in each sample
+        for preds_idx in range(spk_num):  # we have spk_num speeches in preds in each sample
+            if metric_mtx is not None:
+                metric_mtx[:, target_idx, preds_idx] = metric_func(
+                    preds[:, preds_idx, ...], target[:, target_idx, ...], **kwargs
+                )
+            else:
+                first_ele = metric_func(preds[:, preds_idx, ...], target[:, target_idx, ...], **kwargs)
+                metric_mtx = B.empty((batch_size, spk_num, spk_num), dtype=first_ele.dtype, device=first_ele.device)
+                metric_mtx[:, target_idx, preds_idx] = first_ele
+
+    # find best
+    op = B.max if eval_func == "max" else B.min
+    if spk_num < 3 or not _SCIPY_AVAILABLE:
+        if spk_num >= 3 and not _SCIPY_AVAILABLE:
+            warnings.warn(
+                f"In pit metric for speaker-num {spk_num}>3, we recommend installing scipy for better performance"
+            )
+
+        best_metric, best_perm = _find_best_perm_by_exhuastive_method(metric_mtx, op)
+    else:
+        best_metric, best_perm = _find_best_perm_by_linear_sum_assignment(metric_mtx, op)
+
+    return best_metric, best_perm
+
+
+def pit_permutate(preds: Tensor, perm: Tensor) -> Tensor:
+    """permutate estimate according to perm.
+
+    Args:
+        preds (Tensor): the estimates you want to permutate, shape [batch, spk, ...]
+        perm (Tensor): the permutation returned from pit, shape [batch, spk]
+
+    Returns:
+        Tensor: the permutated version of estimate
+
+    Example:
+        >>> from paddlemetrics.functional.audio import si_sdr
+        >>> # [batch, spk, time]
+        >>> preds = B.tensor([[[-0.0579,  0.3560, -0.9604], [-0.1719,  0.3205,  0.2951]]])
+        >>> target = B.tensor([[[ 1.0958, -0.1648,  0.5228], [-0.4100,  1.1942, -0.5103]]])
+        >>> best_metric, best_perm = pit(preds, target, si_sdr, 'max')
+        >>> best_metric
+        tensor([-5.1091])
+        >>> best_perm
+        tensor([[0, 1]])
+        >>> pit_permutate(preds, best_perm)
+        tensor([[[-0.0579,  0.3560, -0.9604],
+                 [-0.1719,  0.3205,  0.2951]]])
+    """
+    preds_pmted = B.stack([B.index_select(pred, 0, p) for pred, p in zip(preds, perm)])
+    return preds_pmted
diff --git a/RE/paddlemetric/src/paddlemetrics/functional/audio/si_sdr.py b/RE/paddlemetric/src/paddlemetrics/functional/audio/si_sdr.py
new file mode 100644
index 00000000..66eb9e3a
--- /dev/null
+++ b/RE/paddlemetric/src/paddlemetrics/functional/audio/si_sdr.py
@@ -0,0 +1,64 @@
+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import paddleext.torchapi as B
+from paddleext.torchapi import  Tensor
+
+from paddlemetrics.utilities.checks import _check_same_shape
+
+
+def si_sdr(preds: Tensor, target: Tensor, zero_mean: bool = False) -> Tensor:
+    """Calculates Scale-invariant signal-to-distortion ratio (SI-SDR) metric. The SI-SDR value is in general
+    considered an overall measure of how good a source sound.
+
+    Args:
+        preds:
+            shape ``[...,time]``
+        target:
+            shape ``[...,time]``
+        zero_mean:
+            If to zero mean target and preds or not
+
+    Returns:
+        si-sdr value of shape [...]
+
+    Example:
+        >>> from paddlemetrics.functional.audio import si_sdr
+        >>> target = B.tensor([3.0, -0.5, 2.0, 7.0])
+        >>> preds = B.tensor([2.5, 0.0, 2.0, 8.0])
+        >>> si_sdr_val = si_sdr(preds, target)
+        >>> si_sdr_val
+        tensor(18.4030)
+
+    References:
+        [1] Le Roux, Jonathan, et al. "SDR half-baked or well done." IEEE International Conference on Acoustics, Speech
+        and Signal Processing (ICASSP) 2019.
+    """
+    _check_same_shape(preds, target)
+    EPS = B.finfo(preds.dtype).eps
+
+    if zero_mean:
+        target = target - B.mean(target, dim=-1, keepdim=True)
+        preds = preds - B.mean(preds, dim=-1, keepdim=True)
+
+    alpha = (B.sum(preds * target, dim=-1, keepdim=True) + EPS) / (
+        B.sum(target ** 2, dim=-1, keepdim=True) + EPS
+    )
+    target_scaled = alpha * target
+
+    noise = target_scaled - preds
+
+    si_sdr_value = (B.sum(target_scaled ** 2, dim=-1) + EPS) / (B.sum(noise ** 2, dim=-1) + EPS)
+    si_sdr_value = 10 * B.log10(si_sdr_value)
+
+    return si_sdr_value
diff --git a/RE/paddlemetric/src/paddlemetrics/functional/audio/si_snr.py b/RE/paddlemetric/src/paddlemetrics/functional/audio/si_snr.py
new file mode 100644
index 00000000..abddf039
--- /dev/null
+++ b/RE/paddlemetric/src/paddlemetrics/functional/audio/si_snr.py
@@ -0,0 +1,46 @@
+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from paddleext.torchapi import  Tensor
+
+from paddlemetrics.functional.audio.si_sdr import si_sdr
+
+
+def si_snr(preds: Tensor, target: Tensor) -> Tensor:
+    """Scale-invariant signal-to-noise ratio (SI-SNR).
+
+    Args:
+        preds:
+            shape ``[...,time]``
+        target:
+            shape ``[...,time]``
+
+    Returns:
+        si-snr value of shape [...]
+
+    Example:
+        >>> import torchapi as B
+        >>> from paddlemetrics.functional.audio import si_snr
+        >>> target = B.tensor([3.0, -0.5, 2.0, 7.0])
+        >>> preds = B.tensor([2.5, 0.0, 2.0, 8.0])
+        >>> si_snr_val = si_snr(preds, target)
+        >>> si_snr_val
+        tensor(15.0918)
+
+    References:
+        [1] Y. Luo and N. Mesgarani, "TaSNet: Time-Domain Audio Separation Network for Real-Time, Single-Channel Speech
+        Separation," 2018 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), 2018, pp.
+        696-700, doi: 10.1109/ICASSP.2018.8462116.
+    """
+
+    return si_sdr(target=target, preds=preds, zero_mean=True)
diff --git a/RE/paddlemetric/src/paddlemetrics/functional/audio/snr.py b/RE/paddlemetric/src/paddlemetrics/functional/audio/snr.py
new file mode 100644
index 00000000..8c54128b
--- /dev/null
+++ b/RE/paddlemetric/src/paddlemetrics/functional/audio/snr.py
@@ -0,0 +1,66 @@
+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import paddleext.torchapi as B
+from paddleext.torchapi import  Tensor
+
+from paddlemetrics.utilities.checks import _check_same_shape
+
+
+def snr(preds: Tensor, target: Tensor, zero_mean: bool = False) -> Tensor:
+    r"""Signal-to-noise ratio (SNR_):
+
+    .. math::
+        \text{SNR} = \frac{P_{signal}}{P_{noise}}
+
+    where  :math:`P` denotes the power of each signal. The SNR metric compares the level
+    of the desired signal to the level of background noise. Therefore, a high value of
+    SNR means that the audio is clear.
+
+    Args:
+        preds:
+            shape ``[...,time]``
+        target:
+            shape ``[...,time]``
+        zero_mean:
+            if to zero mean target and preds or not
+
+    Returns:
+        snr value of shape [...]
+
+    Example:
+        >>> from paddlemetrics.functional.audio import snr
+        >>> target = B.tensor([3.0, -0.5, 2.0, 7.0])
+        >>> preds = B.tensor([2.5, 0.0, 2.0, 8.0])
+        >>> snr_val = snr(preds, target)
+        >>> snr_val
+        tensor(16.1805)
+
+    References:
+        [1] Le Roux, Jonathan, et al. "SDR half-baked or well done." IEEE International Conference on Acoustics, Speech
+         and Signal Processing (ICASSP) 2019.
+
+    """
+    _check_same_shape(preds, target)
+    EPS = B.finfo(preds.dtype).eps
+
+    if zero_mean:
+        target = target - B.mean(target, dim=-1, keepdim=True)
+        preds = preds - B.mean(preds, dim=-1, keepdim=True)
+
+    noise = target - preds
+
+    snr_value = (B.sum(target ** 2, dim=-1) + EPS) / (B.sum(noise ** 2, dim=-1) + EPS)
+    snr_value = 10 * B.log10(snr_value)
+
+    return snr_value
diff --git a/RE/paddlemetric/src/paddlemetrics/functional/audio/stoi.py b/RE/paddlemetric/src/paddlemetrics/functional/audio/stoi.py
new file mode 100644
index 00000000..4c1f5806
--- /dev/null
+++ b/RE/paddlemetric/src/paddlemetrics/functional/audio/stoi.py
@@ -0,0 +1,105 @@
+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import numpy as np
+import paddleext.torchapi as B
+
+from paddlemetrics.utilities.imports import _PYSTOI_AVAILABLE
+
+if _PYSTOI_AVAILABLE:
+    from pystoi import stoi as stoi_backend
+else:
+    stoi_backend = None
+from paddleext.torchapi import  Tensor
+
+from paddlemetrics.utilities.checks import _check_same_shape
+
+
+def stoi(preds: Tensor, target: Tensor, fs: int, extended: bool = False, keep_same_device: bool = False) -> Tensor:
+    r"""STOI (Short Term Objective Intelligibility, see [2,3]), a wrapper for the pystoi package [1].
+    Note that input will be moved to `cpu` to perform the metric calculation.
+
+    Intelligibility measure which is highly correlated with the intelligibility of degraded speech signals, e.g., due
+    to additive noise, single/multi-channel noise reduction, binary masking and vocoded speech as in CI simulations.
+    The STOI-measure is intrusive, i.e., a function of the clean and degraded speech signals. STOI may be a good
+    alternative to the speech intelligibility index (SII) or the speech transmission index (STI), when you are
+    interested in the effect of nonlinear processing to noisy speech, e.g., noise reduction, binary masking algorithms,
+    on speech intelligibility. Description taken from [Cees Taal's website](http://www.ceestaal.nl/code/).
+
+    .. note:: using this metrics requires you to have ``pystoi`` install. Either install as ``pip install
+        paddlemetrics[audio]`` or ``pip install pystoi``
+
+    Args:
+        preds:
+            shape ``[...,time]``
+        target:
+            shape ``[...,time]``
+        fs:
+            sampling frequency (Hz)
+        extended:
+            whether to use the extended STOI described in [4]
+        keep_same_device:
+            whether to move the stoi value to the device of preds
+
+    Returns:
+        stoi value of shape [...]
+
+    Raises:
+        ValueError:
+            If ``pystoi`` package is not installed
+
+    Example:
+        >>> from paddlemetrics.functional.audio import stoi
+        >>> import torchapi as B
+        >>> g = B.manual_seed(1)
+        >>> preds = B.randn(8000)
+        >>> target = B.randn(8000)
+        >>> stoi(preds, target, 8000).float()
+        tensor(-0.0100)
+
+    References:
+        [1] https://github.com/mpariente/pystoi
+
+        [2] C.H.Taal, R.C.Hendriks, R.Heusdens, J.Jensen 'A Short-Time Objective Intelligibility Measure for
+        Time-Frequency Weighted Noisy Speech', ICASSP 2010, Texas, Dallas.
+
+        [3] C.H.Taal, R.C.Hendriks, R.Heusdens, J.Jensen 'An Algorithm for Intelligibility Prediction of
+        Time-Frequency Weighted Noisy Speech', IEEE Transactions on Audio, Speech, and Language Processing, 2011.
+
+        [4] J. Jensen and C. H. Taal, 'An Algorithm for Predicting the Intelligibility of Speech Masked by Modulated
+        Noise Maskers', IEEE Transactions on Audio, Speech and Language Processing, 2016.
+
+    """
+    if not _PYSTOI_AVAILABLE:
+        raise ValueError(
+            "STOI metric requires that pystoi is installed."
+            "Either install as `pip install paddlemetrics[audio]` or `pip install pystoi`"
+        )
+    _check_same_shape(preds, target)
+
+    if len(preds.shape) == 1:
+        stoi_val_np = stoi_backend(target.detach().cpu().numpy(), preds.detach().cpu().numpy(), fs, extended)
+        stoi_val = B.tensor(stoi_val_np)
+    else:
+        preds_np = preds.reshape(-1, preds.shape[-1]).detach().cpu().numpy()
+        target_np = target.reshape(-1, preds.shape[-1]).detach().cpu().numpy()
+        stoi_val_np = np.empty(shape=(preds_np.shape[0]))
+        for b in range(preds_np.shape[0]):
+            stoi_val_np[b] = stoi_backend(target_np[b, :], preds_np[b, :], fs, extended)
+        stoi_val = B.from_numpy(stoi_val_np)
+        stoi_val = stoi_val.reshape(preds.shape[:-1])
+
+    if keep_same_device:
+        stoi_val = stoi_val.to(preds.device)
+
+    return stoi_val
diff --git a/RE/paddlemetric/src/paddlemetrics/functional/classification/__init__.py b/RE/paddlemetric/src/paddlemetrics/functional/classification/__init__.py
new file mode 100644
index 00000000..a03982c8
--- /dev/null
+++ b/RE/paddlemetric/src/paddlemetrics/functional/classification/__init__.py
@@ -0,0 +1,32 @@
+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from paddlemetrics.functional.classification.accuracy import accuracy  # noqa: F401
+from paddlemetrics.functional.classification.auc import auc  # noqa: F401
+from paddlemetrics.functional.classification.auroc import auroc  # noqa: F401
+from paddlemetrics.functional.classification.average_precision import average_precision  # noqa: F401
+from paddlemetrics.functional.classification.calibration_error import calibration_error  # noqa: F401
+from paddlemetrics.functional.classification.cohen_kappa import cohen_kappa  # noqa: F401
+from paddlemetrics.functional.classification.confusion_matrix import confusion_matrix  # noqa: F401
+from paddlemetrics.functional.classification.dice import dice_score  # noqa: F401
+from paddlemetrics.functional.classification.f_beta import f1, fbeta  # noqa: F401
+from paddlemetrics.functional.classification.hamming_distance import hamming_distance  # noqa: F401
+from paddlemetrics.functional.classification.hinge import hinge  # noqa: F401
+from paddlemetrics.functional.classification.iou import iou  # noqa: F401
+from paddlemetrics.functional.classification.kl_divergence import kl_divergence  # noqa: F401
+from paddlemetrics.functional.classification.matthews_corrcoef import matthews_corrcoef  # noqa: F401
+from paddlemetrics.functional.classification.precision_recall import precision, precision_recall, recall  # noqa: F401
+from paddlemetrics.functional.classification.precision_recall_curve import precision_recall_curve  # noqa: F401
+from paddlemetrics.functional.classification.roc import roc  # noqa: F401
+from paddlemetrics.functional.classification.specificity import specificity  # noqa: F401
+from paddlemetrics.functional.classification.stat_scores import stat_scores  # noqa: F401
diff --git a/RE/paddlemetric/src/paddlemetrics/functional/classification/accuracy.py b/RE/paddlemetric/src/paddlemetrics/functional/classification/accuracy.py
new file mode 100644
index 00000000..44c89fa9
--- /dev/null
+++ b/RE/paddlemetric/src/paddlemetrics/functional/classification/accuracy.py
@@ -0,0 +1,418 @@
+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from typing import Optional, Tuple
+
+import paddleext.torchapi as B
+from paddleext.torchapi import  Tensor, tensor
+
+from paddlemetrics.functional.classification.stat_scores import _reduce_stat_scores, _stat_scores_update
+from paddlemetrics.utilities.checks import _check_classification_inputs, _input_format_classification, _input_squeeze
+from paddlemetrics.utilities.enums import AverageMethod, DataType, MDMCAverageMethod
+
+
+def _check_subset_validity(mode: DataType) -> bool:
+    """Checks input mode is valid."""
+    return mode in (DataType.MULTILABEL, DataType.MULTIDIM_MULTICLASS)
+
+
+def _mode(
+    preds: Tensor,
+    target: Tensor,
+    threshold: float,
+    top_k: Optional[int],
+    num_classes: Optional[int],
+    multiclass: Optional[bool],
+) -> DataType:
+    """Finds the mode of the input tensors.
+
+    Args:
+        preds: Predicted tensor
+        target: Ground truth tensor
+        threshold: Threshold for transforming probability or logit predictions to binary (0,1) predictions, in the
+            case of binary or multi-label inputs. Default value of 0.5 corresponds to input being probabilities.
+        top_k: Number of highest probability or logit score predictions considered to find the correct label,
+            relevant only for (multi-dimensional) multi-class inputs.
+        num_classes: Number of classes. Necessary for ``'macro'``, ``'weighted'`` and ``None`` average methods.
+        multiclass:
+            Used only in certain special cases, where you want to treat inputs as a different type
+            than what they appear to be.
+
+    Example:
+        >>> target = B.tensor([0, 1, 2, 3])
+        >>> preds = B.tensor([0, 2, 1, 3])
+        >>> _mode(preds, target, 0.5, None, None, None)
+        <DataType.MULTICLASS: 'multi-class'>
+    """
+
+    mode = _check_classification_inputs(
+        preds, target, threshold=threshold, top_k=top_k, num_classes=num_classes, multiclass=multiclass
+    )
+    return mode
+
+
+def _accuracy_update(
+    preds: Tensor,
+    target: Tensor,
+    reduce: Optional[str],
+    mdmc_reduce: Optional[str],
+    threshold: float,
+    num_classes: Optional[int],
+    top_k: Optional[int],
+    multiclass: Optional[bool],
+    ignore_index: Optional[int],
+    mode: DataType,
+) -> Tuple[Tensor, Tensor, Tensor, Tensor]:
+    """Updates and returns stat scores (true positives, false positives, true negatives, false negatives) required
+    to compute accuracy.
+
+    Args:
+        preds: Predicted tensor
+        target: Ground truth tensor
+        reduce: Defines the reduction that is applied.
+        mdmc_reduce: Defines how the multi-dimensional multi-class inputs are handeled.
+        threshold: Threshold for transforming probability or logit predictions to binary (0,1) predictions, in
+            the case of binary or multi-label inputs.
+        num_classes: Number of classes. Necessary for ``'macro'``, ``'weighted'`` and ``None`` average methods.
+        top_k: Number of highest probability or logit score predictions considered to find the correct label,
+            relevant only for (multi-dimensional) multi-class inputs.
+        multiclass: Used only in certain special cases, where you want to treat inputs as a different type
+            than what they appear to be.
+        ignore_index: Integer specifying a target class to ignore. If given, this class index does not contribute
+            to the returned score, regardless of reduction method. If an index is ignored, and ``average=None``
+            or ``'none'``, the score for the ignored class will be returned as ``nan``.
+        mode: Mode of the input tensors
+    """
+
+    if mode == DataType.MULTILABEL and top_k:
+        raise ValueError("You can not use the `top_k` parameter to calculate accuracy for multi-label inputs.")
+
+    preds, target = _input_squeeze(preds, target)
+    tp, fp, tn, fn = _stat_scores_update(
+        preds,
+        target,
+        reduce=reduce,
+        mdmc_reduce=mdmc_reduce,
+        threshold=threshold,
+        num_classes=num_classes,
+        top_k=top_k,
+        multiclass=multiclass,
+        ignore_index=ignore_index,
+    )
+    return tp, fp, tn, fn
+
+
+def _accuracy_compute(
+    tp: Tensor,
+    fp: Tensor,
+    tn: Tensor,
+    fn: Tensor,
+    average: Optional[str],
+    mdmc_average: Optional[str],
+    mode: DataType,
+) -> Tensor:
+    """Computes accuracy from stat scores: true positives, false positives, true negatives, false negatives.
+
+    Args:
+        tp: True positives
+        fp: False positives
+        tn: True negatives
+        fn: False negatives
+        average: Defines the reduction that is applied.
+        mdmc_average: Defines how averaging is done for multi-dimensional multi-class inputs (on top of the
+            ``average`` parameter).
+        mode: Mode of the input tensors
+
+    Example:
+        >>> preds = B.tensor([0, 2, 1, 3])
+        >>> target = B.tensor([0, 1, 2, 3])
+        >>> threshold = 0.5
+        >>> reduce = average = 'micro'
+        >>> mdmc_average = 'global'
+        >>> mode = _mode(preds, target, threshold, top_k=None, num_classes=None, multiclass=None)
+        >>> tp, fp, tn, fn = _accuracy_update(
+        ...                     preds,
+        ...                     target,
+        ...                     reduce,
+        ...                     mdmc_average,
+        ...                     threshold=0.5,
+        ...                     num_classes=None,
+        ...                     top_k=None,
+        ...                     multiclass=None,
+        ...                     ignore_index=None,
+        ...                     mode=mode)
+        >>> _accuracy_compute(tp, fp, tn, fn, average, mdmc_average, mode)
+        tensor(0.5000)
+
+        >>> target = B.tensor([0, 1, 2])
+        >>> preds = B.tensor([[0.1, 0.9, 0], [0.3, 0.1, 0.6], [0.2, 0.5, 0.3]])
+        >>> top_k, threshold = 2, 0.5
+        >>> reduce = average = 'micro'
+        >>> mdmc_average = 'global'
+        >>> mode = _mode(preds, target, threshold, top_k, num_classes=None, multiclass=None)
+        >>> tp, fp, tn, fn = _accuracy_update(
+        ...                     preds,
+        ...                     target,
+        ...                     reduce,
+        ...                     mdmc_average,
+        ...                     threshold,
+        ...                     num_classes=None,
+        ...                     top_k=top_k,
+        ...                     multiclass=None,
+        ...                     ignore_index=None,
+        ...                     mode=mode)
+        >>> _accuracy_compute(tp, fp, tn, fn, average, mdmc_average, mode)
+        tensor(0.6667)
+    """
+
+    simple_average = [AverageMethod.MICRO, AverageMethod.SAMPLES]
+    if (mode == DataType.BINARY and average in simple_average) or mode == DataType.MULTILABEL:
+        numerator = tp + tn
+        denominator = tp + tn + fp + fn
+    else:
+        numerator = tp
+        denominator = tp + fn
+
+    if average == AverageMethod.MACRO and mdmc_average != MDMCAverageMethod.SAMPLEWISE:
+        cond = tp + fp + fn == 0
+        numerator = numerator[~cond]
+        denominator = denominator[~cond]
+
+    if average == AverageMethod.NONE and mdmc_average != MDMCAverageMethod.SAMPLEWISE:
+        # a class is not present if there exists no TPs, no FPs, and no FNs
+        meaningless_indeces = B.nonzero((tp | fn | fp) == 0).cpu()
+        numerator[meaningless_indeces, ...] = -1
+        denominator[meaningless_indeces, ...] = -1
+
+    return _reduce_stat_scores(
+        numerator=numerator,
+        denominator=denominator,
+        weights=None if average != AverageMethod.WEIGHTED else tp + fn,
+        average=average,
+        mdmc_average=mdmc_average,
+    )
+
+
+def _subset_accuracy_update(
+    preds: Tensor,
+    target: Tensor,
+    threshold: float,
+    top_k: Optional[int],
+) -> Tuple[Tensor, Tensor]:
+    """Updates and returns variables required to compute subset accuracy.
+
+    Args:
+        preds: Predicted tensor
+        target: Ground truth tensor
+        threshold: Threshold for transforming probability or logit predictions to binary (0,1) predictions, in the case
+            of binary or multi-label inputs. Default value of 0.5 corresponds to input being probabilities.
+        top_k: Number of highest probability or logit score predictions considered to find the correct label,
+            relevant only for (multi-dimensional) multi-class inputs.
+    """
+
+    preds, target = _input_squeeze(preds, target)
+    preds, target, mode = _input_format_classification(preds, target, threshold=threshold, top_k=top_k)
+
+    if mode == DataType.MULTILABEL and top_k:
+        raise ValueError("You can not use the `top_k` parameter to calculate accuracy for multi-label inputs.")
+
+    if mode == DataType.MULTILABEL:
+        correct = (preds == target).all(dim=1).sum()
+        total = tensor(target.shape[0], device=target.device)
+    elif mode == DataType.MULTICLASS:
+        correct = (preds * target).sum()
+        total = target.sum()
+    elif mode == DataType.MULTIDIM_MULTICLASS:
+        sample_correct = (preds * target).sum(dim=(1, 2))
+        correct = (sample_correct == target.shape[2]).sum()
+        total = tensor(target.shape[0], device=target.device)
+    else:
+        correct, total = tensor(0), tensor(0)
+
+    return correct, total
+
+
+def _subset_accuracy_compute(correct: Tensor, total: Tensor) -> Tensor:
+    """Computes subset accuracy from number of correct observations and total number of observations.
+
+    Args:
+        correct: Number of correct observations
+        total: Number of observations
+    """
+
+    return correct.float() / total
+
+
+def accuracy(
+    preds: Tensor,
+    target: Tensor,
+    average: str = "micro",
+    mdmc_average: Optional[str] = "global",
+    threshold: float = 0.5,
+    top_k: Optional[int] = None,
+    subset_accuracy: bool = False,
+    num_classes: Optional[int] = None,
+    multiclass: Optional[bool] = None,
+    ignore_index: Optional[int] = None,
+) -> Tensor:
+    r"""Computes `Accuracy`_
+
+    .. math::
+        \text{Accuracy} = \frac{1}{N}\sum_i^N 1(y_i = \hat{y}_i)
+
+    Where :math:`y` is a tensor of target values, and :math:`\hat{y}` is a
+    tensor of predictions.
+
+    For multi-class and multi-dimensional multi-class data with probability or logits predictions, the
+    parameter ``top_k`` generalizes this metric to a Top-K accuracy metric: for each sample the
+    top-K highest probability or logits items are considered to find the correct label.
+
+    For multi-label and multi-dimensional multi-class inputs, this metric computes the "global"
+    accuracy by default, which counts all labels or sub-samples separately. This can be
+    changed to subset accuracy (which requires all labels or sub-samples in the sample to
+    be correctly predicted) by setting ``subset_accuracy=True``.
+
+    Accepts all input types listed in :ref:`references/modules:input types`.
+
+    Args:
+        preds: Predictions from model (probabilities, logits or labels)
+        target: Ground truth labels
+        average:
+            Defines the reduction that is applied. Should be one of the following:
+
+            - ``'micro'`` [default]: Calculate the metric globally, across all samples and classes.
+            - ``'macro'``: Calculate the metric for each class separately, and average the
+              metrics across classes (with equal weights for each class).
+            - ``'weighted'``: Calculate the metric for each class separately, and average the
+              metrics across classes, weighting each class by its support (``tp + fn``).
+            - ``'none'`` or ``None``: Calculate the metric for each class separately, and return
+              the metric for every class.
+            - ``'samples'``: Calculate the metric for each sample, and average the metrics
+              across samples (with equal weights for each sample).
+
+            .. note:: What is considered a sample in the multi-dimensional multi-class case
+                depends on the value of ``mdmc_average``.
+
+            .. note:: If ``'none'`` and a given class doesn't occur in the `preds` or `target`,
+                the value for the class will be ``nan``.
+
+        mdmc_average:
+            Defines how averaging is done for multi-dimensional multi-class inputs (on top of the
+            ``average`` parameter). Should be one of the following:
+
+            - ``None`` [default]: Should be left unchanged if your data is not multi-dimensional
+              multi-class.
+
+            - ``'samplewise'``: In this case, the statistics are computed separately for each
+              sample on the ``N`` axis, and then averaged over samples.
+              The computation for each sample is done by treating the flattened extra axes ``...``
+              (see :ref:`references/modules:input types`) as the ``N`` dimension within the sample,
+              and computing the metric for the sample based on that.
+
+            - ``'global'``: In this case the ``N`` and ``...`` dimensions of the inputs
+              (see :ref:`references/modules:input types`)
+              are flattened into a new ``N_X`` sample axis, i.e. the inputs are treated as if they
+              were ``(N_X, C)``. From here on the ``average`` parameter applies as usual.
+        num_classes:
+            Number of classes. Necessary for ``'macro'``, ``'weighted'`` and ``None`` average methods.
+
+        threshold:
+            Threshold for transforming probability or logit predictions to binary (0,1) predictions, in the case
+            of binary or multi-label inputs. Default value of 0.5 corresponds to input being probabilities.
+        top_k:
+            Number of highest probability or logit score predictions considered to find the correct label,
+            relevant only for (multi-dimensional) multi-class inputs. The
+            default value (``None``) will be interpreted as 1 for these inputs.
+
+            Should be left at default (``None``) for all other types of inputs.
+        multiclass:
+            Used only in certain special cases, where you want to treat inputs as a different type
+            than what they appear to be. See the parameter's
+            :ref:`documentation section <references/modules:using the multiclass parameter>`
+            for a more detailed explanation and examples.
+        ignore_index:
+            Integer specifying a target class to ignore. If given, this class index does not contribute
+            to the returned score, regardless of reduction method. If an index is ignored, and ``average=None``
+            or ``'none'``, the score for the ignored class will be returned as ``nan``.
+        subset_accuracy:
+            Whether to compute subset accuracy for multi-label and multi-dimensional
+            multi-class inputs (has no effect for other input types).
+
+            - For multi-label inputs, if the parameter is set to ``True``, then all labels for
+              each sample must be correctly predicted for the sample to count as correct. If it
+              is set to ``False``, then all labels are counted separately - this is equivalent to
+              flattening inputs beforehand (i.e. ``preds = preds.flatten()`` and same for ``target``).
+
+            - For multi-dimensional multi-class inputs, if the parameter is set to ``True``, then all
+              sub-sample (on the extra axis) must be correct for the sample to be counted as correct.
+              If it is set to ``False``, then all sub-samples are counter separately - this is equivalent,
+              in the case of label predictions, to flattening the inputs beforehand (i.e.
+              ``preds = preds.flatten()`` and same for ``target``). Note that the ``top_k`` parameter
+              still applies in both cases, if set.
+
+    Raises:
+        ValueError:
+            If ``top_k`` parameter is set for ``multi-label`` inputs.
+        ValueError:
+            If ``average`` is none of ``"micro"``, ``"macro"``, ``"weighted"``, ``"samples"``, ``"none"``, ``None``.
+        ValueError:
+            If ``mdmc_average`` is not one of ``None``, ``"samplewise"``, ``"global"``.
+        ValueError:
+            If ``average`` is set but ``num_classes`` is not provided.
+        ValueError:
+            If ``num_classes`` is set
+            and ``ignore_index`` is not in the range ``[0, num_classes)``.
+        ValueError:
+            If ``top_k`` is not an ``integer`` larger than ``0``.
+
+    Example:
+        >>> import torchapi as B
+        >>> from paddlemetrics.functional import accuracy
+        >>> target = B.tensor([0, 1, 2, 3])
+        >>> preds = B.tensor([0, 2, 1, 3])
+        >>> accuracy(preds, target)
+        tensor(0.5000)
+
+        >>> target = B.tensor([0, 1, 2])
+        >>> preds = B.tensor([[0.1, 0.9, 0], [0.3, 0.1, 0.6], [0.2, 0.5, 0.3]])
+        >>> accuracy(preds, target, top_k=2)
+        tensor(0.6667)
+    """
+    allowed_average = ["micro", "macro", "weighted", "samples", "none", None]
+    if average not in allowed_average:
+        raise ValueError(f"The `average` has to be one of {allowed_average}, got {average}.")
+
+    if average in ["macro", "weighted", "none", None] and (not num_classes or num_classes < 1):
+        raise ValueError(f"When you set `average` as {average}, you have to provide the number of classes.")
+
+    allowed_mdmc_average = [None, "samplewise", "global"]
+    if mdmc_average not in allowed_mdmc_average:
+        raise ValueError(f"The `mdmc_average` has to be one of {allowed_mdmc_average}, got {mdmc_average}.")
+
+    if num_classes and ignore_index is not None and (not 0 <= ignore_index < num_classes or num_classes == 1):
+        raise ValueError(f"The `ignore_index` {ignore_index} is not valid for inputs with {num_classes} classes")
+
+    if top_k is not None and (not isinstance(top_k, int) or top_k <= 0):
+        raise ValueError(f"The `top_k` should be an integer larger than 0, got {top_k}")
+
+    preds, target = _input_squeeze(preds, target)
+    mode = _mode(preds, target, threshold, top_k, num_classes, multiclass)
+    reduce = "macro" if average in ["weighted", "none", None] else average
+
+    if subset_accuracy and _check_subset_validity(mode):
+        correct, total = _subset_accuracy_update(preds, target, threshold, top_k)
+        return _subset_accuracy_compute(correct, total)
+    tp, fp, tn, fn = _accuracy_update(
+        preds, target, reduce, mdmc_average, threshold, num_classes, top_k, multiclass, ignore_index, mode
+    )
+    return _accuracy_compute(tp, fp, tn, fn, average, mdmc_average, mode)
diff --git a/RE/paddlemetric/src/paddlemetrics/functional/classification/auc.py b/RE/paddlemetric/src/paddlemetrics/functional/classification/auc.py
new file mode 100644
index 00000000..0e2fddb3
--- /dev/null
+++ b/RE/paddlemetric/src/paddlemetrics/functional/classification/auc.py
@@ -0,0 +1,133 @@
+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from typing import Tuple
+
+import paddleext.torchapi as B
+from paddleext.torchapi import  Tensor
+
+
+def _auc_update(x: Tensor, y: Tensor) -> Tuple[Tensor, Tensor]:
+    """Updates and returns variables required to compute area under the curve. Checks if the 2 input tenseor have
+    the same number of elements and if they are 1d.
+
+    Args:
+        x: x-coordinates
+        y: y-coordinates
+    """
+
+    if x.ndim > 1:
+        x = x.squeeze()
+
+    if y.ndim > 1:
+        y = y.squeeze()
+
+    if x.ndim > 1 or y.ndim > 1:
+        raise ValueError(
+            f"Expected both `x` and `y` tensor to be 1d, but got tensors with dimension {x.ndim} and {y.ndim}"
+        )
+    if x.numel() != y.numel():
+        raise ValueError(
+            f"Expected the same number of elements in `x` and `y` tensor but received {x.numel()} and {y.numel()}"
+        )
+    return x, y
+
+
+def _auc_compute_without_check(x: Tensor, y: Tensor, direction: float) -> Tensor:
+    """Computes area under the curve using the trapezoidal rule. Assumes increasing or decreasing order of `x`.
+
+    Args:
+        x: x-coordinates, must be either increasing or decreasing
+        y: y-coordinates
+        direction: 1 if increaing, -1 if decreasing
+
+    Example:
+        >>> x = B.tensor([0, 1, 2, 3])
+        >>> y = B.tensor([0, 1, 2, 2])
+        >>> x, y = _auc_update(x, y)
+        >>> _auc_compute_without_check(x, y, direction=1.0)
+        tensor(4.)
+    """
+
+    with B.no_grad():
+        auc_: Tensor = B.trapz(y, x) * direction
+    return auc_
+
+
+def _auc_compute(x: Tensor, y: Tensor, reorder: bool = False) -> Tensor:
+    """Computes area under the curve using the trapezoidal rule. Checks for increasing or decreasing order of `x`.
+
+    Args:
+        x: x-coordinates, must be either increasing or decreasing
+        y: y-coordinates
+        reorder: if True, will reorder the arrays to make it either increasing or decreasing
+
+    Example:
+        >>> x = B.tensor([0, 1, 2, 3])
+        >>> y = B.tensor([0, 1, 2, 2])
+        >>> x, y = _auc_update(x, y)
+        >>> _auc_compute(x, y)
+        tensor(4.)
+        >>> _auc_compute(x, y, reorder=True)
+        tensor(4.)
+    """
+
+    with B.no_grad():
+        if reorder:
+            # TODO: include stable=True arg when pytorch v1.9 is released
+            x, x_idx = B.sort(x)
+            y = y[x_idx]
+
+        dx = x[1:] - x[:-1]
+        if (dx < 0).any():
+            if (dx <= 0).all():
+                direction = -1.0
+            else:
+                raise ValueError(
+                    "The `x` tensor is neither increasing or decreasing. Try setting the reorder argument to `True`."
+                )
+        else:
+            direction = 1.0
+        return _auc_compute_without_check(x, y, direction)
+
+
+def auc(x: Tensor, y: Tensor, reorder: bool = False) -> Tensor:
+    """Computes Area Under the Curve (AUC) using the trapezoidal rule.
+
+    Args:
+        x: x-coordinates, must be either increasing or decreasing
+        y: y-coordinates
+        reorder: if True, will reorder the arrays to make it either increasing or decreasing
+
+    Return:
+        Tensor containing AUC score (float)
+
+    Raises:
+        ValueError:
+            If both ``x`` and ``y`` tensors are not ``1d``.
+        ValueError:
+            If both ``x`` and ``y`` don't have the same numnber of elements.
+        ValueError:
+            If ``x`` tesnsor is neither increasing or decreasing.
+
+    Example:
+        >>> from paddlemetrics.functional import auc
+        >>> x = B.tensor([0, 1, 2, 3])
+        >>> y = B.tensor([0, 1, 2, 2])
+        >>> auc(x, y)
+        tensor(4.)
+        >>> auc(x, y, reorder=True)
+        tensor(4.)
+    """
+    x, y = _auc_update(x, y)
+    return _auc_compute(x, y, reorder=reorder)
diff --git a/RE/paddlemetric/src/paddlemetrics/functional/classification/auroc.py b/RE/paddlemetric/src/paddlemetrics/functional/classification/auroc.py
new file mode 100644
index 00000000..a393f20e
--- /dev/null
+++ b/RE/paddlemetric/src/paddlemetrics/functional/classification/auroc.py
@@ -0,0 +1,257 @@
+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import warnings
+from typing import Optional, Sequence, Tuple
+
+import paddleext.torchapi as B
+from paddleext.torchapi import  Tensor, tensor
+
+from paddlemetrics.functional.classification.auc import _auc_compute_without_check
+from paddlemetrics.functional.classification.roc import roc
+from paddlemetrics.utilities.checks import _input_format_classification
+from paddlemetrics.utilities.enums import AverageMethod, DataType
+from paddlemetrics.utilities.imports import _TORCH_LOWER_1_6
+
+
+def _auroc_update(preds: Tensor, target: Tensor) -> Tuple[Tensor, Tensor, DataType]:
+    """Updates and returns variables required to compute Area Under the Receiver Operating Characteristic Curve.
+    Validates the inputs and returns the mode of the inputs.
+
+    Args:
+        preds: Predicted tensor
+        target: Ground truth tensor
+    """
+
+    # use _input_format_classification for validating the input and get the mode of data
+    _, _, mode = _input_format_classification(preds, target)
+
+    if mode == "multi class multi dim":
+        n_classes = preds.shape[1]
+        preds = preds.transpose(0, 1).reshape(n_classes, -1).transpose(0, 1)
+        target = target.flatten()
+    if mode == "multi-label" and preds.ndim > 2:
+        n_classes = preds.shape[1]
+        preds = preds.transpose(0, 1).reshape(n_classes, -1).transpose(0, 1)
+        target = target.transpose(0, 1).reshape(n_classes, -1).transpose(0, 1)
+
+    return preds, target, mode
+
+
+def _auroc_compute(
+    preds: Tensor,
+    target: Tensor,
+    mode: DataType,
+    num_classes: Optional[int] = None,
+    pos_label: Optional[int] = None,
+    average: Optional[str] = "macro",
+    max_fpr: Optional[float] = None,
+    sample_weights: Optional[Sequence] = None,
+) -> Tensor:
+    """Computes Area Under the Receiver Operating Characteristic Curve.
+
+    Args:
+        preds: predictions from model (logits or probabilities)
+        target: Ground truth labels
+        mode: 'multi class multi dim' or 'multi-label' or 'binary'
+        num_classes: integer with number of classes for multi-label and multiclass problems.
+            Should be set to ``None`` for binary problems
+        pos_label: integer determining the positive class.
+            Should be set to ``None`` for binary problems
+        average: Defines the reduction that is applied to the output:
+        max_fpr: If not ``None``, calculates standardized partial AUC over the
+            range [0, max_fpr]. Should be a float between 0 and 1.
+        sample_weights: sample weights for each data point
+
+    Example:
+        >>> # binary case
+        >>> preds = B.tensor([0.13, 0.26, 0.08, 0.19, 0.34])
+        >>> target = B.tensor([0, 0, 1, 1, 1])
+        >>> preds, target, mode = _auroc_update(preds, target)
+        >>> _auroc_compute(preds, target, mode, pos_label=1)
+        tensor(0.5000)
+
+        >>> # multiclass case
+        >>> preds = B.tensor([[0.90, 0.05, 0.05],
+        ...                       [0.05, 0.90, 0.05],
+        ...                       [0.05, 0.05, 0.90],
+        ...                       [0.85, 0.05, 0.10],
+        ...                       [0.10, 0.10, 0.80]])
+        >>> target = B.tensor([0, 1, 1, 2, 2])
+        >>> preds, target, mode = _auroc_update(preds, target)
+        >>> _auroc_compute(preds, target, mode, num_classes=3)
+        tensor(0.7778)
+    """
+
+    # binary mode override num_classes
+    if mode == DataType.BINARY:
+        num_classes = 1
+
+    # check max_fpr parameter
+    if max_fpr is not None:
+        if not isinstance(max_fpr, float) and 0 < max_fpr <= 1:
+            raise ValueError(f"`max_fpr` should be a float in range (0, 1], got: {max_fpr}")
+
+        if _TORCH_LOWER_1_6:
+            raise RuntimeError(
+                "`max_fpr` argument requires `B.bucketize` which" " is not available below PyTorch version 1.6"
+            )
+
+        # max_fpr parameter is only support for binary
+        if mode != DataType.BINARY:
+            raise ValueError(
+                f"Partial AUC computation not available in"
+                f" multilabel/multiclass setting, 'max_fpr' must be"
+                f" set to `None`, received `{max_fpr}`."
+            )
+
+    # calculate fpr, tpr
+    if mode == DataType.MULTILABEL:
+        if average == AverageMethod.MICRO:
+            fpr, tpr, _ = roc(preds.flatten(), target.flatten(), 1, pos_label, sample_weights)
+        elif num_classes:
+            # for multilabel we iteratively evaluate roc in a binary fashion
+            output = [
+                roc(preds[:, i], target[:, i], num_classes=1, pos_label=1, sample_weights=sample_weights)
+                for i in range(num_classes)
+            ]
+            fpr = [o[0] for o in output]
+            tpr = [o[1] for o in output]
+        else:
+            raise ValueError("Detected input to be `multilabel` but you did not provide `num_classes` argument")
+    else:
+        if mode != DataType.BINARY:
+            if num_classes is None:
+                raise ValueError("Detected input to `multiclass` but you did not provide `num_classes` argument")
+            if average == AverageMethod.WEIGHTED and len(B.unique(target)) < num_classes:
+                # If one or more classes has 0 observations, we should exclude them, as its weight will be 0
+                target_bool_mat = B.zeros((len(target), num_classes), dtype=bool)
+                target_bool_mat[B.arange(len(target)), target.long()] = 1
+                class_observed = target_bool_mat.sum(axis=0) > 0
+                for c in range(num_classes):
+                    if not class_observed[c]:
+                        warnings.warn(f"Class {c} had 0 observations, omitted from AUROC calculation", UserWarning)
+                preds = preds[:, class_observed]
+                target = target_bool_mat[:, class_observed]
+                target = B.where(target)[1]
+                num_classes = class_observed.sum()
+                if num_classes == 1:
+                    raise ValueError("Found 1 non-empty class in `multiclass` AUROC calculation")
+        fpr, tpr, _ = roc(preds, target, num_classes, pos_label, sample_weights)
+
+    # calculate standard roc auc score
+    if max_fpr is None or max_fpr == 1:
+        if mode == DataType.MULTILABEL and average == AverageMethod.MICRO:
+            pass
+        elif num_classes != 1:
+            # calculate auc scores per class
+            auc_scores = [_auc_compute_without_check(x, y, 1.0) for x, y in zip(fpr, tpr)]
+
+            # calculate average
+            if average == AverageMethod.NONE:
+                return tensor(auc_scores)
+            if average == AverageMethod.MACRO:
+                return B.mean(B.stack(auc_scores))
+            if average == AverageMethod.WEIGHTED:
+                if mode == DataType.MULTILABEL:
+                    support = B.sum(target, dim=0)
+                else:
+                    support = B.bincount(target.flatten(), minlength=num_classes)
+                return B.sum(B.stack(auc_scores) * support / support.sum())
+
+            allowed_average = (AverageMethod.NONE.value, AverageMethod.MACRO.value, AverageMethod.WEIGHTED.value)
+            raise ValueError(
+                f"Argument `average` expected to be one of the following:" f" {allowed_average} but got {average}"
+            )
+
+        return _auc_compute_without_check(fpr, tpr, 1.0)
+
+    _device = fpr.device if isinstance(fpr, Tensor) else fpr[0].device
+    max_area: Tensor = tensor(max_fpr, device=_device)
+    # Add a single point at max_fpr and interpolate its tpr value
+    stop = B.bucketize(max_area, fpr, out_int32=True, right=True)
+    weight = (max_area - fpr[stop - 1]) / (fpr[stop] - fpr[stop - 1])
+    interp_tpr: Tensor = B.lerp(tpr[stop - 1], tpr[stop], weight)
+    tpr = B.cat([tpr[:stop], interp_tpr.view(1)])
+    fpr = B.cat([fpr[:stop], max_area.view(1)])
+
+    # Compute partial AUC
+    partial_auc = _auc_compute_without_check(fpr, tpr, 1.0)
+
+    # McClish correction: standardize result to be 0.5 if non-discriminant and 1 if maximal
+    min_area: Tensor = 0.5 * max_area ** 2
+    return 0.5 * (1 + (partial_auc - min_area) / (max_area - min_area))
+
+
+def auroc(
+    preds: Tensor,
+    target: Tensor,
+    num_classes: Optional[int] = None,
+    pos_label: Optional[int] = None,
+    average: Optional[str] = "macro",
+    max_fpr: Optional[float] = None,
+    sample_weights: Optional[Sequence] = None,
+) -> Tensor:
+    """Compute Area Under the Receiver Operating Characteristic Curve (`ROC AUC`_)
+
+    Args:
+        preds: predictions from model (logits or probabilities)
+        target: Ground truth labels
+        num_classes: integer with number of classes for multi-label and multiclass problems.
+            Should be set to ``None`` for binary problems
+        pos_label: integer determining the positive class. Default is ``None``
+            which for binary problem is translate to 1. For multiclass problems
+            this argument should not be set as we iteratively change it in the
+            range [0,num_classes-1]
+        average:
+            - ``'micro'`` computes metric globally. Only works for multilabel problems
+            - ``'macro'`` computes metric for each class and uniformly averages them
+            - ``'weighted'`` computes metric for each class and does a weighted-average,
+              where each class is weighted by their support (accounts for class imbalance)
+            - ``None`` computes and returns the metric per class
+        max_fpr:
+            If not ``None``, calculates standardized partial AUC over the
+            range [0, max_fpr]. Should be a float between 0 and 1.
+        sample_weights: sample weights for each data point
+
+    Raises:
+        ValueError:
+            If ``max_fpr`` is not a ``float`` in the range ``(0, 1]``.
+        RuntimeError:
+            If ``PyTorch version`` is ``below 1.6`` since max_fpr requires `B.bucketize`
+            which is not available below 1.6.
+        ValueError:
+            If ``max_fpr`` is not set to ``None`` and the mode is ``not binary``
+            since partial AUC computation is not available in multilabel/multiclass.
+        ValueError:
+            If ``average`` is none of ``None``, ``"macro"`` or ``"weighted"``.
+
+    Example (binary case):
+        >>> from paddlemetrics.functional import auroc
+        >>> preds = B.tensor([0.13, 0.26, 0.08, 0.19, 0.34])
+        >>> target = B.tensor([0, 0, 1, 1, 1])
+        >>> auroc(preds, target, pos_label=1)
+        tensor(0.5000)
+
+    Example (multiclass case):
+        >>> preds = B.tensor([[0.90, 0.05, 0.05],
+        ...                       [0.05, 0.90, 0.05],
+        ...                       [0.05, 0.05, 0.90],
+        ...                       [0.85, 0.05, 0.10],
+        ...                       [0.10, 0.10, 0.80]])
+        >>> target = B.tensor([0, 1, 1, 2, 2])
+        >>> auroc(preds, target, num_classes=3)
+        tensor(0.7778)
+    """
+    preds, target, mode = _auroc_update(preds, target)
+    return _auroc_compute(preds, target, mode, num_classes, pos_label, average, max_fpr, sample_weights)
diff --git a/RE/paddlemetric/src/paddlemetrics/functional/classification/average_precision.py b/RE/paddlemetric/src/paddlemetrics/functional/classification/average_precision.py
new file mode 100644
index 00000000..bc611816
--- /dev/null
+++ b/RE/paddlemetric/src/paddlemetrics/functional/classification/average_precision.py
@@ -0,0 +1,236 @@
+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import warnings
+from typing import List, Optional, Sequence, Tuple, Union
+
+import paddleext.torchapi as B
+from paddleext.torchapi import  Tensor
+
+from paddlemetrics.functional.classification.precision_recall_curve import (
+    _precision_recall_curve_compute,
+    _precision_recall_curve_update,
+)
+
+
+def _average_precision_update(
+    preds: Tensor,
+    target: Tensor,
+    num_classes: Optional[int] = None,
+    pos_label: Optional[int] = None,
+    average: Optional[str] = "macro",
+) -> Tuple[Tensor, Tensor, int, Optional[int]]:
+    """Format the predictions and target based on the ``num_classes``, ``pos_label`` and ``average`` parameter
+    Args:
+        preds: predictions from model (logits or probabilities)
+        target: ground truth values
+        num_classes: integer with number of classes.
+        pos_label: integer determining the positive class. Default is ``None``
+            which for binary problem is translate to 1. For multiclass problems
+            this argument should not be set as we iteratively change it in the
+            range [0,num_classes-1]
+        average: reduction method for multi-class or multi-label problems
+    """
+    preds, target, num_classes, pos_label = _precision_recall_curve_update(preds, target, num_classes, pos_label)
+    if average == "micro":
+        if preds.ndim == target.ndim:
+            # Considering each element of the label indicator matrix as a label
+            preds = preds.flatten()
+            target = target.flatten()
+            num_classes = 1
+        else:
+            raise ValueError("Cannot use `micro` average with multi-class input")
+
+    return preds, target, num_classes, pos_label
+
+
+def _average_precision_compute(
+    preds: Tensor,
+    target: Tensor,
+    num_classes: int,
+    pos_label: Optional[int] = None,
+    average: Optional[str] = "macro",
+    sample_weights: Optional[Sequence] = None,
+) -> Union[List[Tensor], Tensor]:
+    """Computes the average precision score.
+
+    Args:
+        preds: predictions from model (logits or probabilities)
+        target: ground truth values
+        num_classes: integer with number of classes.
+        pos_label: integer determining the positive class. Default is ``None``
+            which for binary problem is translate to 1. For multiclass problems
+            this argument should not be set as we iteratively change it in the
+            range [0,num_classes-1]
+        average: reduction method for multi-class or multi-label problems
+        sample_weights: sample weights for each data point
+
+    Example:
+        >>> # binary case
+        >>> preds = B.tensor([0, 1, 2, 3])
+        >>> target = B.tensor([0, 1, 1, 1])
+        >>> pos_label = 1
+        >>> preds, target, num_classes, pos_label = _average_precision_update(preds, target, pos_label=pos_label)
+        >>> _average_precision_compute(preds, target, num_classes, pos_label)
+        tensor(1.)
+
+        >>> # multiclass case
+        >>> preds = B.tensor([[0.75, 0.05, 0.05, 0.05, 0.05],
+        ...                      [0.05, 0.75, 0.05, 0.05, 0.05],
+        ...                      [0.05, 0.05, 0.75, 0.05, 0.05],
+        ...                      [0.05, 0.05, 0.05, 0.75, 0.05]])
+        >>> target = B.tensor([0, 1, 3, 2])
+        >>> num_classes = 5
+        >>> preds, target, num_classes, pos_label = _average_precision_update(preds, target, num_classes)
+        >>> _average_precision_compute(preds, target, num_classes, average=None)
+        [tensor(1.), tensor(1.), tensor(0.2500), tensor(0.2500), tensor(nan)]
+    """
+
+    # todo: `sample_weights` is unused
+    precision, recall, _ = _precision_recall_curve_compute(preds, target, num_classes, pos_label)
+    if average == "weighted":
+        if preds.ndim == target.ndim and target.ndim > 1:
+            weights = target.sum(dim=0).float()
+        else:
+            weights = B.bincount(target, minlength=num_classes).float()
+        weights = weights / B.sum(weights)
+    else:
+        weights = None
+    return _average_precision_compute_with_precision_recall(precision, recall, num_classes, average, weights)
+
+
+def _average_precision_compute_with_precision_recall(
+    precision: Tensor,
+    recall: Tensor,
+    num_classes: int,
+    average: Optional[str] = "macro",
+    weights: Optional[Tensor] = None,
+) -> Union[List[Tensor], Tensor]:
+    """Computes the average precision score from precision and recall.
+
+    Args:
+        precision: precision values
+        recall: recall values
+        num_classes: integer with number of classes. Not nessesary to provide
+            for binary problems.
+        average: reduction method for multi-class or multi-label problems
+        weights: weights to use when average='weighted'
+
+    Example:
+        >>> # binary case
+        >>> preds = B.tensor([0, 1, 2, 3])
+        >>> target = B.tensor([0, 1, 1, 1])
+        >>> pos_label = 1
+        >>> preds, target, num_classes, pos_label = _average_precision_update(preds, target, pos_label=pos_label)
+        >>> precision, recall, _ = _precision_recall_curve_compute(preds, target, num_classes, pos_label)
+        >>> _average_precision_compute_with_precision_recall(precision, recall, num_classes, average=None)
+        tensor(1.)
+
+        >>> # multiclass case
+        >>> preds = B.tensor([[0.75, 0.05, 0.05, 0.05, 0.05],
+        ...                      [0.05, 0.75, 0.05, 0.05, 0.05],
+        ...                      [0.05, 0.05, 0.75, 0.05, 0.05],
+        ...                      [0.05, 0.05, 0.05, 0.75, 0.05]])
+        >>> target = B.tensor([0, 1, 3, 2])
+        >>> num_classes = 5
+        >>> preds, target, num_classes, pos_label = _average_precision_update(preds, target, num_classes)
+        >>> precision, recall, _ = _precision_recall_curve_compute(preds, target, num_classes)
+        >>> _average_precision_compute_with_precision_recall(precision, recall, num_classes, average=None)
+        [tensor(1.), tensor(1.), tensor(0.2500), tensor(0.2500), tensor(nan)]
+    """
+
+    # Return the step function integral
+    # The following works because the last entry of precision is
+    # guaranteed to be 1, as returned by precision_recall_curve
+    if num_classes == 1:
+        return -B.sum((recall[1:] - recall[:-1]) * precision[:-1])
+
+    res = []
+    for p, r in zip(precision, recall):
+        res.append(-B.sum((r[1:] - r[:-1]) * p[:-1]))
+
+    # Reduce
+    if average in ("macro", "weighted"):
+        res = B.stack(res)
+        if B.isnan(res).any():
+            warnings.warn(
+                "Average precision score for one or more classes was `nan`. Ignoring these classes in average",
+                UserWarning,
+            )
+        if average == "macro":
+            return res[~B.isnan(res)].mean()
+        weights = B.ones_like(res) if weights is None else weights
+        return (res * weights)[~B.isnan(res)].sum()
+    if average is None:
+        return res
+    allowed_average = ("micro", "macro", "weighted", None)
+    raise ValueError(f"Expected argument `average` to be one of {allowed_average}" f" but got {average}")
+
+
+def average_precision(
+    preds: Tensor,
+    target: Tensor,
+    num_classes: Optional[int] = None,
+    pos_label: Optional[int] = None,
+    average: Optional[str] = "macro",
+    sample_weights: Optional[Sequence] = None,
+) -> Union[List[Tensor], Tensor]:
+    """Computes the average precision score.
+
+    Args:
+        preds: predictions from model (logits or probabilities)
+        target: ground truth values
+        num_classes: integer with number of classes. Not nessesary to provide
+            for binary problems.
+        pos_label: integer determining the positive class. Default is ``None``
+            which for binary problem is translate to 1. For multiclass problems
+            this argument should not be set as we iteratively change it in the
+            range [0,num_classes-1]
+        average:
+            defines the reduction that is applied in the case of multiclass and multilabel input.
+            Should be one of the following:
+
+            - ``'macro'`` [default]: Calculate the metric for each class separately, and average the
+              metrics across classes (with equal weights for each class).
+            - ``'micro'``: Calculate the metric globally, across all samples and classes. Cannot be
+              used with multiclass input.
+            - ``'weighted'``: Calculate the metric for each class separately, and average the
+              metrics across classes, weighting each class by its support.
+            - ``'none'`` or ``None``: Calculate the metric for each class separately, and return
+              the metric for every class.
+
+        sample_weights: sample weights for each data point
+
+    Returns:
+        tensor with average precision. If multiclass will return list
+        of such tensors, one for each class
+
+    Example (binary case):
+        >>> from paddlemetrics.functional import average_precision
+        >>> pred = B.tensor([0, 1, 2, 3])
+        >>> target = B.tensor([0, 1, 1, 1])
+        >>> average_precision(pred, target, pos_label=1)
+        tensor(1.)
+
+    Example (multiclass case):
+        >>> pred = B.tensor([[0.75, 0.05, 0.05, 0.05, 0.05],
+        ...                      [0.05, 0.75, 0.05, 0.05, 0.05],
+        ...                      [0.05, 0.05, 0.75, 0.05, 0.05],
+        ...                      [0.05, 0.05, 0.05, 0.75, 0.05]])
+        >>> target = B.tensor([0, 1, 3, 2])
+        >>> average_precision(pred, target, num_classes=5, average=None)
+        [tensor(1.), tensor(1.), tensor(0.2500), tensor(0.2500), tensor(nan)]
+    """
+    # fixme: `sample_weights` is unused
+    preds, target, num_classes, pos_label = _average_precision_update(preds, target, num_classes, pos_label, average)
+    return _average_precision_compute(preds, target, num_classes, pos_label, average, sample_weights)
diff --git a/RE/paddlemetric/src/paddlemetrics/functional/classification/calibration_error.py b/RE/paddlemetric/src/paddlemetrics/functional/classification/calibration_error.py
new file mode 100644
index 00000000..13203641
--- /dev/null
+++ b/RE/paddlemetric/src/paddlemetrics/functional/classification/calibration_error.py
@@ -0,0 +1,156 @@
+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from typing import Tuple
+
+import paddleext.torchapi as B
+from paddleext.torchapi import FloatTensor, Tensor
+
+from paddlemetrics.utilities.checks import _input_format_classification
+from paddlemetrics.utilities.enums import DataType
+
+
+def _ce_compute(
+    confidences: FloatTensor,
+    accuracies: FloatTensor,
+    bin_boundaries: FloatTensor,
+    norm: str = "l1",
+    debias: bool = False,
+) -> Tensor:
+    """Computes the calibration error given the provided bin boundaries and norm.
+
+    Args:
+        confidences (FloatTensor): The confidence (i.e. predicted prob) of the top1 prediction.
+        accuracies (FloatTensor): 1.0 if the top-1 prediction was correct, 0.0 otherwise.
+        bin_boundaries (FloatTensor): Bin boundaries separating the linspace from 0 to 1.
+        norm (str, optional): Norm function to use when computing calibration error. Defaults to "l1".
+        debias (bool, optional): Apply debiasing to L2 norm computation as in
+            `Verified Uncertainty Calibration`_. Defaults to False.
+
+    Raises:
+        ValueError: If an unsupported norm function is provided.
+
+    Returns:
+        Tensor: Calibration error scalar.
+    """
+    if norm not in {"l1", "l2", "max"}:
+        raise ValueError(f"Norm {norm} is not supported. Please select from l1, l2, or max. ")
+
+    conf_bin = B.zeros_like(bin_boundaries)
+    acc_bin = B.zeros_like(bin_boundaries)
+    prop_bin = B.zeros_like(bin_boundaries)
+    for i, (bin_lower, bin_upper) in enumerate(zip(bin_boundaries[:-1], bin_boundaries[1:])):
+        # Calculated confidence and accuracy in each bin
+        in_bin = confidences.gt(bin_lower.item()) * confidences.le(bin_upper.item())
+        prop_in_bin = in_bin.float().mean()
+        if prop_in_bin.item() > 0:
+            acc_bin[i] = accuracies[in_bin].float().mean()
+            conf_bin[i] = confidences[in_bin].mean()
+            prop_bin[i] = prop_in_bin
+
+    if norm == "l1":
+        ce = B.sum(B.abs(acc_bin - conf_bin) * prop_bin)
+    elif norm == "max":
+        ce = B.max(B.abs(acc_bin - conf_bin))
+    elif norm == "l2":
+        ce = B.sum(B.pow(acc_bin - conf_bin, 2) * prop_bin)
+        # NOTE: debiasing is disabled in the wrapper functions. This implementation differs from that in sklearn.
+        if debias:
+            # the order here (acc_bin - 1 ) vs (1 - acc_bin) is flipped from
+            # the equation in Verified Uncertainty Prediction (Kumar et al 2019)/
+            debias_bins = (acc_bin * (acc_bin - 1) * prop_bin) / (prop_bin * accuracies.size()[0] - 1)
+            ce += B.sum(B.nan_to_num(debias_bins))  # replace nans with zeros if nothing appeared in a bin
+        ce = B.sqrt(ce) if ce > 0 else B.tensor(0)
+    return ce
+
+
+def _ce_update(preds: Tensor, target: Tensor) -> Tuple[FloatTensor, FloatTensor]:
+    """Given a predictions and targets tensor, computes the confidences of the top-1 prediction and records their
+    correctness.
+
+    Args:
+        preds (Tensor):  Input softmaxed predictions.
+        target (Tensor): Labels.
+
+    Raises:
+        ValueError: If the dataset shape is not binary, multiclass, or multidimensional-multiclass.
+
+    Returns:
+        Tuple[FloatTensor, FloatTensor]: [description]
+    """
+    _, _, mode = _input_format_classification(preds, target)
+
+    if mode == DataType.BINARY:
+        confidences, accuracies = preds, target
+    elif mode == DataType.MULTICLASS:
+        confidences, predictions = preds.max(dim=1)
+        accuracies = predictions.eq(target)
+    elif mode == DataType.MULTIDIM_MULTICLASS:
+        # reshape tensors
+        # for preds, move the class dimension to the final axis and flatten the rest
+        confidences, predictions = B.transpose(preds, 1, -1).flatten(0, -2).max(dim=1)
+        # for targets, just flatten the target
+        accuracies = predictions.eq(target.flatten())
+    else:
+        raise ValueError(
+            f"Calibration error is not well-defined for data with size {preds.size()} and targets {target.size()}."
+        )
+    # must be cast to float for ddp allgather to work
+    return confidences.float(), accuracies.float()
+
+
+def calibration_error(preds: Tensor, target: Tensor, n_bins: int = 15, norm: str = "l1") -> Tensor:
+    r"""
+    `Computes the Top-label Calibration Error`_
+
+    Three different norms are implemented, each corresponding to variations on the calibration error metric.
+
+    L1 norm (Expected Calibration Error)
+
+    .. math::
+        \text{ECE} = \frac{1}{N}\sum_i^N \|(p_i - c_i)\|
+
+    Infinity norm (Maximum Calibration Error)
+
+    .. math::
+        \text{RMSCE} =  \max_{i} (p_i - c_i)
+
+    L2 norm (Root Mean Square Calibration Error)
+
+    .. math::
+        \text{MCE} = \frac{1}{N}\sum_i^N (p_i - c_i)^2
+
+    Where :math:`p_i` is the top-1 prediction accuracy in
+    bin i and :math:`c_i` is the average confidence of predictions in bin i.
+
+    .. note:
+        L2-norm debiasing is not yet supported.
+
+    Args:
+        preds (Tensor): Model output probabilities.
+        target (Tensor): Ground-truth target class labels.
+        n_bins (int, optional): Number of bins to use when computing t. Defaults to 15.
+        norm (str, optional): Norm used to compare empirical and expected probability bins.
+            Defaults to "l1", or Expected Calibration Error.
+    """
+    if norm not in ("l1", "l2", "max"):
+        raise ValueError(f"Norm {norm} is not supported. Please select from l1, l2, or max. ")
+
+    if not isinstance(n_bins, int) or n_bins <= 0:
+        raise ValueError(f"Expected argument `n_bins` to be a int larger than 0 but got {n_bins}")
+
+    confidences, accuracies = _ce_update(preds, target)
+
+    bin_boundaries = B.linspace(0, 1, n_bins + 1, dtype=B.float, device=preds.device)
+
+    return _ce_compute(confidences, accuracies, bin_boundaries, norm=norm)
diff --git a/RE/paddlemetric/src/paddlemetrics/functional/classification/cohen_kappa.py b/RE/paddlemetric/src/paddlemetrics/functional/classification/cohen_kappa.py
new file mode 100644
index 00000000..2face7a5
--- /dev/null
+++ b/RE/paddlemetric/src/paddlemetrics/functional/classification/cohen_kappa.py
@@ -0,0 +1,112 @@
+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from typing import Optional
+
+import paddleext.torchapi as B
+from paddleext.torchapi import  Tensor
+
+from paddlemetrics.functional.classification.confusion_matrix import _confusion_matrix_compute, _confusion_matrix_update
+
+_cohen_kappa_update = _confusion_matrix_update
+
+
+def _cohen_kappa_compute(confmat: Tensor, weights: Optional[str] = None) -> Tensor:
+    """Computes Cohen's kappa based on the weighting type.
+
+    Args:
+        confmat: Confusion matrix without normalization
+        weights: Weighting type to calculate the score. Choose from
+            - ``None`` or ``'none'``: no weighting
+            - ``'linear'``: linear weighting
+            - ``'quadratic'``: quadratic weighting
+
+    Example:
+        >>> target = B.tensor([1, 1, 0, 0])
+        >>> preds = B.tensor([0, 1, 0, 0])
+        >>> confmat = _cohen_kappa_update(preds, target, num_classes=2)
+        >>> _cohen_kappa_compute(confmat)
+        tensor(0.5000)
+    """
+
+    confmat = _confusion_matrix_compute(confmat)
+    confmat = confmat.float() if not confmat.is_floating_point() else confmat
+    n_classes = confmat.shape[0]
+    sum0 = confmat.sum(dim=0, keepdim=True)
+    sum1 = confmat.sum(dim=1, keepdim=True)
+    expected = sum1 @ sum0 / sum0.sum()  # outer product
+
+    if weights is None:
+        w_mat = B.ones_like(confmat).flatten()
+        w_mat[:: n_classes + 1] = 0
+        w_mat = w_mat.reshape(n_classes, n_classes)
+    elif weights in ("linear", "quadratic"):
+        w_mat = B.zeros_like(confmat)
+        w_mat += B.arange(n_classes, dtype=w_mat.dtype, device=w_mat.device)
+        if weights == "linear":
+            w_mat = B.abs(w_mat - w_mat.T)
+        else:
+            w_mat = B.pow(w_mat - w_mat.T, 2.0)
+    else:
+        raise ValueError(
+            f"Received {weights} for argument ``weights`` but should be either" " None, 'linear' or 'quadratic'"
+        )
+
+    k = B.sum(w_mat * confmat) / B.sum(w_mat * expected)
+    return 1 - k
+
+
+def cohen_kappa(
+    preds: Tensor,
+    target: Tensor,
+    num_classes: int,
+    weights: Optional[str] = None,
+    threshold: float = 0.5,
+) -> Tensor:
+    r"""
+    Calculates `Cohen's kappa score`_ that measures inter-annotator agreement.
+     It is defined as
+
+     .. math::
+         \kappa = (p_o - p_e) / (1 - p_e)
+
+     where :math:`p_o` is the empirical probability of agreement and :math:`p_e` isg
+     the expected agreement when both annotators assign labels randomly. Note that
+     :math:`p_e` is estimated using a per-annotator empirical prior over the
+     class labels.
+
+     Args:
+         preds: (float or long tensor), Either a ``(N, ...)`` tensor with labels or
+             ``(N, C, ...)`` where C is the number of classes, tensor with labels/probabilities
+
+         target: ``target`` (long tensor), tensor with shape ``(N, ...)`` with ground true labels
+
+         num_classes: Number of classes in the dataset.
+
+         weights: Weighting type to calculate the score. Choose from
+             - ``None`` or ``'none'``: no weighting
+             - ``'linear'``: linear weighting
+             - ``'quadratic'``: quadratic weighting
+
+         threshold:
+             Threshold value for binary or multi-label probabilities. default: 0.5
+
+     Example:
+         >>> from paddlemetrics.functional import cohen_kappa
+         >>> target = B.tensor([1, 1, 0, 0])
+         >>> preds = B.tensor([0, 1, 0, 0])
+         >>> cohen_kappa(preds, target, num_classes=2)
+         tensor(0.5000)
+    """
+    confmat = _cohen_kappa_update(preds, target, num_classes, threshold)
+    return _cohen_kappa_compute(confmat, weights)
diff --git a/RE/paddlemetric/src/paddlemetrics/functional/classification/confusion_matrix.py b/RE/paddlemetric/src/paddlemetrics/functional/classification/confusion_matrix.py
new file mode 100644
index 00000000..b4f3c12d
--- /dev/null
+++ b/RE/paddlemetric/src/paddlemetrics/functional/classification/confusion_matrix.py
@@ -0,0 +1,184 @@
+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from typing import Optional
+
+import paddleext.torchapi as B
+from paddleext.torchapi import  Tensor
+
+from paddlemetrics.utilities import rank_zero_warn
+from paddlemetrics.utilities.checks import _input_format_classification
+from paddlemetrics.utilities.enums import DataType
+
+
+def _confusion_matrix_update(
+    preds: Tensor, target: Tensor, num_classes: int, threshold: float = 0.5, multilabel: bool = False
+) -> Tensor:
+    """Updates and returns confusion matrix (without any normalization) based on the mode of the input.
+
+    Args:
+        preds: Predicted tensor
+        target: Ground truth tensor
+        threshold: Threshold for transforming probability or logit predictions to binary (0,1) predictions, in the
+            case of binary or multi-label inputs. Default value of 0.5 corresponds to input being probabilities.
+        multilabel: determines if data is multilabel or not.
+    """
+
+    preds, target, mode = _input_format_classification(preds, target, threshold)
+    if mode not in (DataType.BINARY, DataType.MULTILABEL):
+        preds = preds.argmax(dim=1)
+        target = target.argmax(dim=1)
+    if multilabel:
+        unique_mapping = ((2 * target + preds) + 4 * B.arange(num_classes, device=preds.device)).flatten()
+        minlength = 4 * num_classes
+    else:
+        unique_mapping = (target.view(-1) * num_classes + preds.view(-1)).to(B.long)
+        minlength = num_classes ** 2
+
+    bins = B.bincount(unique_mapping, minlength=minlength)
+    if multilabel:
+        confmat = bins.reshape(num_classes, 2, 2)
+    else:
+        confmat = bins.reshape(num_classes, num_classes)
+    return confmat
+
+
+def _confusion_matrix_compute(confmat: Tensor, normalize: Optional[str] = None) -> Tensor:
+    """Computes confusion matrix based on the normalization mode.
+
+    Args:
+        confmat: Confusion matrix without normalization
+        normalize: Normalization mode for confusion matrix. Choose from
+            - ``None`` or ``'none'``: no normalization (default)
+            - ``'true'``: normalization over the targets (most commonly used)
+            - ``'pred'``: normalization over the predictions
+            - ``'all'``: normalization over the whole matrix
+
+    Example:
+        >>> # binary case
+        >>> target = B.tensor([1, 1, 0, 0])
+        >>> preds = B.tensor([0, 1, 0, 0])
+        >>> confmat = _confusion_matrix_update(preds, target, num_classes=2)
+        >>> _confusion_matrix_compute(confmat)
+        tensor([[2, 0],
+                [1, 1]])
+
+        >>> # multiclass case
+        >>> target = B.tensor([2, 1, 0, 0])
+        >>> preds = B.tensor([2, 1, 0, 1])
+        >>> confmat = _confusion_matrix_update(preds, target, num_classes=3)
+        >>> _confusion_matrix_compute(confmat)
+        tensor([[1, 1, 0],
+                [0, 1, 0],
+                [0, 0, 1]])
+
+        >>> # multilabel case
+        >>> target = B.tensor([[0, 1, 0], [1, 0, 1]])
+        >>> preds = B.tensor([[0, 0, 1], [1, 0, 1]])
+        >>> confmat = _confusion_matrix_update(preds, target, num_classes=3, multilabel=True)
+        >>> _confusion_matrix_compute(confmat)  # doctest: +NORMALIZE_WHITESPACE
+        tensor([[[1, 0], [0, 1]],
+                [[1, 0], [1, 0]],
+                [[0, 1], [0, 1]]])
+    """
+
+    allowed_normalize = ("true", "pred", "all", "none", None)
+    if normalize not in allowed_normalize:
+        raise ValueError(f"Argument average needs to one of the following: {allowed_normalize}")
+    if normalize is not None and normalize != "none":
+        confmat = confmat.float() if not confmat.is_floating_point() else confmat
+        if normalize == "true":
+            confmat = confmat / confmat.sum(axis=1, keepdim=True)
+        elif normalize == "pred":
+            confmat = confmat / confmat.sum(axis=0, keepdim=True)
+        elif normalize == "all":
+            confmat = confmat / confmat.sum()
+
+        nan_elements = confmat[B.isnan(confmat)].nelement()
+        if nan_elements != 0:
+            confmat[B.isnan(confmat)] = 0
+            rank_zero_warn(f"{nan_elements} nan values found in confusion matrix have been replaced with zeros.")
+    return confmat
+
+
+def confusion_matrix(
+    preds: Tensor,
+    target: Tensor,
+    num_classes: int,
+    normalize: Optional[str] = None,
+    threshold: float = 0.5,
+    multilabel: bool = False,
+) -> Tensor:
+    r"""
+    Computes the `confusion matrix`_.  Works with binary,
+    multiclass, and multilabel data.  Accepts probabilities or logits from a model output or integer class
+    values in prediction. Works with multi-dimensional preds and target, but it should be noted that
+    additional dimensions will be flattened.
+
+    If preds and target are the same shape and preds is a float tensor, we use the ``self.threshold`` argument
+    to convert into integer labels. This is the case for binary and multi-label probabilities or logits.
+
+    If preds has an extra dimension as in the case of multi-class scores we perform an argmax on ``dim=1``.
+
+    If working with multilabel data, setting the `is_multilabel` argument to `True` will make sure that a
+    `confusion matrix gets calculated per label`_.
+
+    Args:
+        preds: (float or long tensor), Either a ``(N, ...)`` tensor with labels or
+            ``(N, C, ...)`` where C is the number of classes, tensor with labels/logits/probabilities
+        target: ``target`` (long tensor), tensor with shape ``(N, ...)`` with ground true labels
+        num_classes: Number of classes in the dataset.
+        normalize: Normalization mode for confusion matrix. Choose from
+
+            - ``None`` or ``'none'``: no normalization (default)
+            - ``'true'``: normalization over the targets (most commonly used)
+            - ``'pred'``: normalization over the predictions
+            - ``'all'``: normalization over the whole matrix
+
+        threshold:
+            Threshold for transforming probability or logit predictions to binary (0,1) predictions, in the case
+            of binary or multi-label inputs. Default value of 0.5 corresponds to input being probabilities.
+
+        multilabel:
+            determines if data is multilabel or not.
+
+    Example (binary data):
+        >>> from paddlemetrics import ConfusionMatrix
+        >>> target = B.tensor([1, 1, 0, 0])
+        >>> preds = B.tensor([0, 1, 0, 0])
+        >>> confmat = ConfusionMatrix(num_classes=2)
+        >>> confmat(preds, target)
+        tensor([[2., 0.],
+                [1., 1.]])
+
+    Example (multiclass data):
+        >>> target = B.tensor([2, 1, 0, 0])
+        >>> preds = B.tensor([2, 1, 0, 1])
+        >>> confmat = ConfusionMatrix(num_classes=3)
+        >>> confmat(preds, target)
+        tensor([[1., 1., 0.],
+                [0., 1., 0.],
+                [0., 0., 1.]])
+
+    Example (multilabel data):
+        >>> target = B.tensor([[0, 1, 0], [1, 0, 1]])
+        >>> preds = B.tensor([[0, 0, 1], [1, 0, 1]])
+        >>> confmat = ConfusionMatrix(num_classes=3, multilabel=True)
+        >>> confmat(preds, target)  # doctest: +NORMALIZE_WHITESPACE
+        tensor([[[1., 0.], [0., 1.]],
+                [[1., 0.], [1., 0.]],
+                [[0., 1.], [0., 1.]]])
+
+    """
+    confmat = _confusion_matrix_update(preds, target, num_classes, threshold, multilabel)
+    return _confusion_matrix_compute(confmat, normalize)
diff --git a/RE/paddlemetric/src/paddlemetrics/functional/classification/dice.py b/RE/paddlemetric/src/paddlemetrics/functional/classification/dice.py
new file mode 100644
index 00000000..5f90fe02
--- /dev/null
+++ b/RE/paddlemetric/src/paddlemetrics/functional/classification/dice.py
@@ -0,0 +1,112 @@
+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from typing import Tuple
+
+import paddleext.torchapi as B
+from paddleext.torchapi import  Tensor
+
+from paddlemetrics.utilities.data import to_categorical
+from paddlemetrics.utilities.distributed import reduce
+
+
+def _stat_scores(
+    preds: Tensor,
+    target: Tensor,
+    class_index: int,
+    argmax_dim: int = 1,
+) -> Tuple[Tensor, Tensor, Tensor, Tensor, Tensor]:
+    """Calculates the number of true positive, false positive, true negative and false negative for a specific
+    class.
+
+    Args:
+        preds: prediction tensor
+        target: target tensor
+        class_index: class to calculate over
+        argmax_dim: if pred is a tensor of probabilities, this indicates the
+            axis the argmax transformation will be applied over
+
+    Return:
+        True Positive, False Positive, True Negative, False Negative, Support
+
+    Example:
+        >>> x = B.tensor([1, 2, 3])
+        >>> y = B.tensor([0, 2, 3])
+        >>> tp, fp, tn, fn, sup = _stat_scores(x, y, class_index=1)
+        >>> tp, fp, tn, fn, sup
+        (tensor(0), tensor(1), tensor(2), tensor(0), tensor(0))
+    """
+    if preds.ndim == target.ndim + 1:
+        preds = to_categorical(preds, argmax_dim=argmax_dim)
+
+    tp = ((preds == class_index) * (target == class_index)).to(B.long).sum()
+    fp = ((preds == class_index) * (target != class_index)).to(B.long).sum()
+    tn = ((preds != class_index) * (target != class_index)).to(B.long).sum()
+    fn = ((preds != class_index) * (target == class_index)).to(B.long).sum()
+    sup = (target == class_index).to(B.long).sum()
+
+    return tp, fp, tn, fn, sup
+
+
+def dice_score(
+    preds: Tensor,
+    target: Tensor,
+    bg: bool = False,
+    nan_score: float = 0.0,
+    no_fg_score: float = 0.0,
+    reduction: str = "elementwise_mean",
+) -> Tensor:
+    """Compute dice score from prediction scores.
+
+    Args:
+        preds: estimated probabilities
+        target: ground-truth labels
+        bg: whether to also compute dice for the background
+        nan_score: score to return, if a NaN occurs during computation
+        no_fg_score: score to return, if no foreground pixel was found in target
+        reduction: a method to reduce metric score over labels.
+
+            - ``'elementwise_mean'``: takes the mean (default)
+            - ``'sum'``: takes the sum
+            - ``'none'``: no reduction will be applied
+
+    Return:
+        Tensor containing dice score
+
+    Example:
+        >>> from paddlemetrics.functional import dice_score
+        >>> pred = B.tensor([[0.85, 0.05, 0.05, 0.05],
+        ...                      [0.05, 0.85, 0.05, 0.05],
+        ...                      [0.05, 0.05, 0.85, 0.05],
+        ...                      [0.05, 0.05, 0.05, 0.85]])
+        >>> target = B.tensor([0, 1, 3, 2])
+        >>> dice_score(pred, target)
+        tensor(0.3333)
+    """
+    num_classes = preds.shape[1]
+    bg_inv = 1 - int(bg)
+    scores = B.zeros(num_classes - bg_inv, device=preds.device, dtype=B.float32)
+    for i in range(bg_inv, num_classes):
+        if not (target == i).any():
+            # no foreground class
+            scores[i - bg_inv] += no_fg_score
+            continue
+
+        # TODO: rewrite to use general `stat_scores`
+        tp, fp, _, fn, _ = _stat_scores(preds=preds, target=target, class_index=i)
+        denom = (2 * tp + fp + fn).to(B.float)
+        # nan result
+        score_cls = (2 * tp).to(B.float) / denom if B.is_nonzero(denom) else nan_score
+        scores[i - bg_inv] += score_cls.item()
+
+    return reduce(scores, reduction=reduction)
diff --git a/RE/paddlemetric/src/paddlemetrics/functional/classification/f_beta.py b/RE/paddlemetric/src/paddlemetrics/functional/classification/f_beta.py
new file mode 100644
index 00000000..7b9b626c
--- /dev/null
+++ b/RE/paddlemetric/src/paddlemetrics/functional/classification/f_beta.py
@@ -0,0 +1,351 @@
+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from typing import Optional
+
+import paddleext.torchapi as B
+from paddleext.torchapi import  Tensor
+
+from paddlemetrics.functional.classification.stat_scores import _reduce_stat_scores, _stat_scores_update
+from paddlemetrics.utilities.enums import AverageMethod as AvgMethod
+from paddlemetrics.utilities.enums import MDMCAverageMethod
+
+
+def _safe_divide(num: Tensor, denom: Tensor) -> Tensor:
+    """prevent zero division."""
+    denom[denom == 0.0] = 1
+    return num / denom
+
+
+def _fbeta_compute(
+    tp: Tensor,
+    fp: Tensor,
+    tn: Tensor,
+    fn: Tensor,
+    beta: float,
+    ignore_index: Optional[int],
+    average: str,
+    mdmc_average: Optional[str],
+) -> Tensor:
+    """Computes f_beta metric from stat scores: true positives, false positives, true negatives, false negatives.
+
+    Args:
+        tp: True positives
+        fp: False positives
+        tn: True negatives
+        fn: False negatives
+        beta: The parameter `beta` (which determines the weight of recall in the combined score)
+        ignore_index: Integer specifying a target class to ignore. If given, this class index does not contribute
+            to the returned score, regardless of reduction method
+        average: Defines the reduction that is applied
+        mdmc_average: Defines how averaging is done for multi-dimensional multi-class inputs (on top of the
+            ``average`` parameter)
+
+    Example:
+        >>> from paddlemetrics.functional.classification.stat_scores import _stat_scores_update
+        >>> target = B.tensor([0, 1, 2, 0, 1, 2])
+        >>> preds = B.tensor([0, 2, 1, 0, 0, 1])
+        >>> tp, fp, tn, fn = _stat_scores_update(
+        ...                         preds,
+        ...                         target,
+        ...                         reduce='micro',
+        ...                         num_classes=3,
+        ...                     )
+        >>> _fbeta_compute(tp, fp, tn, fn, beta=0.5, ignore_index=None, average='micro', mdmc_average=None)
+        tensor(0.3333)
+    """
+    if average == AvgMethod.MICRO and mdmc_average != MDMCAverageMethod.SAMPLEWISE:
+        mask = tp >= 0
+        precision = _safe_divide(tp[mask].sum().float(), (tp[mask] + fp[mask]).sum())
+        recall = _safe_divide(tp[mask].sum().float(), (tp[mask] + fn[mask]).sum())
+    else:
+        precision = _safe_divide(tp.float(), tp + fp)
+        recall = _safe_divide(tp.float(), tp + fn)
+
+    num = (1 + beta ** 2) * precision * recall
+    denom = beta ** 2 * precision + recall
+    denom[denom == 0.0] = 1.0  # avoid division by 0
+
+    # if classes matter and a given class is not present in both the preds and the target,
+    # computing the score for this class is meaningless, thus they should be ignored
+    if average == AvgMethod.NONE and mdmc_average != MDMCAverageMethod.SAMPLEWISE:
+        # a class is not present if there exists no TPs, no FPs, and no FNs
+        meaningless_indeces = B.nonzero((tp | fn | fp) == 0).cpu()
+        if ignore_index is None:
+            ignore_index = meaningless_indeces
+        else:
+            ignore_index = B.unique(B.cat((meaningless_indeces, B.tensor([[ignore_index]]))))
+
+    if ignore_index is not None:
+        if average not in (AvgMethod.MICRO, AvgMethod.SAMPLES) and mdmc_average == MDMCAverageMethod.SAMPLEWISE:
+            num[..., ignore_index] = -1
+            denom[..., ignore_index] = -1
+        elif average not in (AvgMethod.MICRO, AvgMethod.SAMPLES):
+            num[ignore_index, ...] = -1
+            denom[ignore_index, ...] = -1
+
+    if average == AvgMethod.MACRO and mdmc_average != MDMCAverageMethod.SAMPLEWISE:
+        cond = (tp + fp + fn == 0) | (tp + fp + fn == -3)
+        num = num[~cond]
+        denom = denom[~cond]
+
+    return _reduce_stat_scores(
+        numerator=num,
+        denominator=denom,
+        weights=None if average != AvgMethod.WEIGHTED else tp + fn,
+        average=average,
+        mdmc_average=mdmc_average,
+    )
+
+
+def fbeta(
+    preds: Tensor,
+    target: Tensor,
+    beta: float = 1.0,
+    average: str = "micro",
+    mdmc_average: Optional[str] = None,
+    ignore_index: Optional[int] = None,
+    num_classes: Optional[int] = None,
+    threshold: float = 0.5,
+    top_k: Optional[int] = None,
+    multiclass: Optional[bool] = None,
+) -> Tensor:
+    r"""
+    Computes f_beta metric.
+
+    .. math::
+        F_{\beta} = (1 + \beta^2) * \frac{\text{precision} * \text{recall}}
+        {(\beta^2 * \text{precision}) + \text{recall}}
+
+    Works with binary, multiclass, and multilabel data.
+    Accepts probabilities or logits from a model output or integer class values in prediction.
+    Works with multi-dimensional preds and target.
+
+    If preds and target are the same shape and preds is a float tensor, we use the ``self.threshold`` argument
+    to convert into integer labels. This is the case for binary and multi-label logits or probabilities.
+
+    If preds has an extra dimension as in the case of multi-class scores we perform an argmax on ``dim=1``.
+
+    The reduction method (how the precision scores are aggregated) is controlled by the
+    ``average`` parameter, and additionally by the ``mdmc_average`` parameter in the
+    multi-dimensional multi-class case. Accepts all inputs listed in :ref:`references/modules:input types`.
+
+    Args:
+        preds: Predictions from model (probabilities, logits or labels)
+        target: Ground truth values
+        average:
+            Defines the reduction that is applied. Should be one of the following:
+
+                - ``'micro'`` [default]: Calculate the metric globally, across all samples and classes.
+                - ``'macro'``: Calculate the metric for each class separately, and average the
+                  metrics across classes (with equal weights for each class).
+                - ``'weighted'``: Calculate the metric for each class separately, and average the
+                  metrics across classes, weighting each class by its support (``tp + fn``).
+                - ``'none'`` or ``None``: Calculate the metric for each class separately, and return
+                  the metric for every class.
+                - ``'samples'``: Calculate the metric for each sample, and average the metrics
+                  across samples (with equal weights for each sample).
+
+            .. note:: What is considered a sample in the multi-dimensional multi-class case
+                depends on the value of ``mdmc_average``.
+
+            .. note:: If ``'none'`` and a given class doesn't occur in the `preds` or `target`,
+                the value for the class will be ``nan``.
+
+        mdmc_average:
+            Defines how averaging is done for multi-dimensional multi-class inputs (on top of the
+            ``average`` parameter). Should be one of the following:
+
+                - ``None`` [default]: Should be left unchanged if your data is not multi-dimensional
+                  multi-class.
+                - ``'samplewise'``: In this case, the statistics are computed separately for each
+                  sample on the ``N`` axis, and then averaged over samples.
+                  The computation for each sample is done by treating the flattened extra axes ``...``
+                  (see :ref:`references/modules:input types`) as the ``N`` dimension within the sample,
+                  and computing the metric for the sample based on that.
+                - ``'global'``: In this case the ``N`` and ``...`` dimensions of the inputs
+                  (see :ref:`references/modules:input types`)
+                  are flattened into a new ``N_X`` sample axis, i.e. the inputs are treated as if they
+                  were ``(N_X, C)``. From here on the ``average`` parameter applies as usual.
+
+        ignore_index:
+            Integer specifying a target class to ignore. If given, this class index does not contribute
+            to the returned score, regardless of reduction method. If an index is ignored, and ``average=None``
+            or ``'none'``, the score for the ignored class will be returned as ``nan``.
+        num_classes:
+            Number of classes. Necessary for ``'macro'``, ``'weighted'`` and ``None`` average methods.
+        threshold:
+            Threshold for transforming probability or logit predictions to binary (0,1) predictions, in the case
+            of binary or multi-label inputs. Default value of 0.5 corresponds to input being probabilities.
+        top_k:
+            Number of highest probability or logit score predictions considered to find the correct label,
+            relevant only for (multi-dimensional) multi-class inputs. The
+            default value (``None``) will be interpreted as 1 for these inputs.
+
+            Should be left at default (``None``) for all other types of inputs.
+        multiclass:
+            Used only in certain special cases, where you want to treat inputs as a different type
+            than what they appear to be. See the parameter's
+            :ref:`documentation section <references/modules:using the multiclass parameter>`
+            for a more detailed explanation and examples.
+
+    Return:
+        The shape of the returned tensor depends on the ``average`` parameter
+
+        - If ``average in ['micro', 'macro', 'weighted', 'samples']``, a one-element tensor will be returned
+        - If ``average in ['none', None]``, the shape will be ``(C,)``, where ``C`` stands  for the number
+          of classes
+
+    Example:
+        >>> from paddlemetrics.functional import fbeta
+        >>> target = B.tensor([0, 1, 2, 0, 1, 2])
+        >>> preds = B.tensor([0, 2, 1, 0, 0, 1])
+        >>> fbeta(preds, target, num_classes=3, beta=0.5)
+        tensor(0.3333)
+
+    """
+    allowed_average = list(AvgMethod)
+    if average not in allowed_average:
+        raise ValueError(f"The `average` has to be one of {allowed_average}, got {average}.")
+
+    if mdmc_average is not None and MDMCAverageMethod.from_str(mdmc_average) is None:
+        raise ValueError(f"The `mdmc_average` has to be one of {list(MDMCAverageMethod)}, got {mdmc_average}.")
+
+    if average in [AvgMethod.MACRO, AvgMethod.WEIGHTED, AvgMethod.NONE] and (not num_classes or num_classes < 1):
+        raise ValueError(f"When you set `average` as {average}, you have to provide the number of classes.")
+
+    if num_classes and ignore_index is not None and (not 0 <= ignore_index < num_classes or num_classes == 1):
+        raise ValueError(f"The `ignore_index` {ignore_index} is not valid for inputs with {num_classes} classes")
+
+    reduce = AvgMethod.MACRO if average in [AvgMethod.WEIGHTED, AvgMethod.NONE] else average
+    tp, fp, tn, fn = _stat_scores_update(
+        preds,
+        target,
+        reduce=reduce,
+        mdmc_reduce=mdmc_average,
+        threshold=threshold,
+        num_classes=num_classes,
+        top_k=top_k,
+        multiclass=multiclass,
+        ignore_index=ignore_index,
+    )
+
+    return _fbeta_compute(tp, fp, tn, fn, beta, ignore_index, average, mdmc_average)
+
+
+def f1(
+    preds: Tensor,
+    target: Tensor,
+    beta: float = 1.0,
+    average: str = "micro",
+    mdmc_average: Optional[str] = None,
+    ignore_index: Optional[int] = None,
+    num_classes: Optional[int] = None,
+    threshold: float = 0.5,
+    top_k: Optional[int] = None,
+    multiclass: Optional[bool] = None,
+) -> Tensor:
+    """Computes F1 metric. F1 metrics correspond to a equally weighted average of the precision and recall scores.
+
+    Works with binary, multiclass, and multilabel data.
+    Accepts probabilities or logits from a model output or integer class values in prediction.
+    Works with multi-dimensional preds and target.
+
+    If preds and target are the same shape and preds is a float tensor, we use the ``self.threshold`` argument
+    to convert into integer labels. This is the case for binary and multi-label probabilities or logits.
+
+    If preds has an extra dimension as in the case of multi-class scores we perform an argmax on ``dim=1``.
+
+    The reduction method (how the precision scores are aggregated) is controlled by the
+    ``average`` parameter, and additionally by the ``mdmc_average`` parameter in the
+    multi-dimensional multi-class case. Accepts all inputs listed in :ref:`references/modules:input types`.
+
+    Args:
+        preds: Predictions from model (probabilities, logits or labels)
+        target: Ground truth values
+        average:
+            Defines the reduction that is applied. Should be one of the following:
+
+            - ``'micro'`` [default]: Calculate the metric globally, across all samples and classes.
+            - ``'macro'``: Calculate the metric for each class separately, and average the
+              metrics across classes (with equal weights for each class).
+            - ``'weighted'``: Calculate the metric for each class separately, and average the
+              metrics across classes, weighting each class by its support (``tp + fn``).
+            - ``'none'`` or ``None``: Calculate the metric for each class separately, and return
+              the metric for every class.
+            - ``'samples'``: Calculate the metric for each sample, and average the metrics
+              across samples (with equal weights for each sample).
+
+            .. note:: What is considered a sample in the multi-dimensional multi-class case
+                depends on the value of ``mdmc_average``.
+
+            .. note:: If ``'none'`` and a given class doesn't occur in the `preds` or `target`,
+                the value for the class will be ``nan``.
+
+        mdmc_average:
+            Defines how averaging is done for multi-dimensional multi-class inputs (on top of the
+            ``average`` parameter). Should be one of the following:
+
+            - ``None`` [default]: Should be left unchanged if your data is not multi-dimensional
+              multi-class.
+
+            - ``'samplewise'``: In this case, the statistics are computed separately for each
+              sample on the ``N`` axis, and then averaged over samples.
+              The computation for each sample is done by treating the flattened extra axes ``...``
+              (see :ref:`references/modules:input types`) as the ``N`` dimension within the sample,
+              and computing the metric for the sample based on that.
+
+            - ``'global'``: In this case the ``N`` and ``...`` dimensions of the inputs
+              (see :ref:`references/modules:input types`)
+              are flattened into a new ``N_X`` sample axis, i.e. the inputs are treated as if they
+              were ``(N_X, C)``. From here on the ``average`` parameter applies as usual.
+
+        ignore_index:
+            Integer specifying a target class to ignore. If given, this class index does not contribute
+            to the returned score, regardless of reduction method. If an index is ignored, and ``average=None``
+            or ``'none'``, the score for the ignored class will be returned as ``nan``.
+
+        num_classes:
+            Number of classes. Necessary for ``'macro'``, ``'weighted'`` and ``None`` average methods.
+
+        threshold:
+            Threshold for transforming probability or logit predictions to binary (0,1) predictions, in the case
+            of binary or multi-label inputs. Default value of 0.5 corresponds to input being probabilities.
+        top_k:
+            Number of highest probability or logit score predictions considered to find the correct label,
+            relevant only for (multi-dimensional) multi-class inputs. The
+            default value (``None``) will be interpreted as 1 for these inputs.
+
+            Should be left at default (``None``) for all other types of inputs.
+
+        multiclass:
+            Used only in certain special cases, where you want to treat inputs as a different type
+            than what they appear to be. See the parameter's
+            :ref:`documentation section <references/modules:using the multiclass parameter>`
+            for a more detailed explanation and examples.
+
+    Return:
+        The shape of the returned tensor depends on the ``average`` parameter
+
+        - If ``average in ['micro', 'macro', 'weighted', 'samples']``, a one-element tensor will be returned
+        - If ``average in ['none', None]``, the shape will be ``(C,)``, where ``C`` stands  for the number
+          of classes
+
+    Example:
+        >>> from paddlemetrics.functional import f1
+        >>> target = B.tensor([0, 1, 2, 0, 1, 2])
+        >>> preds = B.tensor([0, 2, 1, 0, 0, 1])
+        >>> f1(preds, target, num_classes=3)
+        tensor(0.3333)
+    """
+    return fbeta(preds, target, 1.0, average, mdmc_average, ignore_index, num_classes, threshold, top_k, multiclass)
diff --git a/RE/paddlemetric/src/paddlemetrics/functional/classification/hamming_distance.py b/RE/paddlemetric/src/paddlemetrics/functional/classification/hamming_distance.py
new file mode 100644
index 00000000..e3f95bad
--- /dev/null
+++ b/RE/paddlemetric/src/paddlemetrics/functional/classification/hamming_distance.py
@@ -0,0 +1,97 @@
+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from typing import Tuple, Union
+
+import paddleext.torchapi as B
+from paddleext.torchapi import  Tensor
+
+from paddlemetrics.utilities.checks import _input_format_classification
+
+
+def _hamming_distance_update(
+    preds: Tensor,
+    target: Tensor,
+    threshold: float = 0.5,
+) -> Tuple[Tensor, int]:
+    """Returns the number of positions where prediction equals target, and number of predictions.
+
+    Args:
+        preds: Predicted tensor
+        target: Ground truth tensor
+        threshold: Threshold for transforming probability or logit predictions to binary (0,1) predictions, in the case
+            of binary or multi-label inputs. Default value of 0.5 corresponds to input being probabilities.
+    """
+
+    preds, target, _ = _input_format_classification(preds, target, threshold=threshold)
+
+    correct = (preds == target).sum()
+    total = preds.numel()
+
+    return correct, total
+
+
+def _hamming_distance_compute(correct: Tensor, total: Union[int, Tensor]) -> Tensor:
+    """Computes the Hamming distance.
+
+    Args:
+        correct: Number of positions where prediction equals target
+        total: Total number of predictions
+
+    Example:
+        >>> target = B.tensor([[0, 1], [1, 1]])
+        >>> preds = B.tensor([[0, 1], [0, 1]])
+        >>> correct, total = _hamming_distance_update(preds, target)
+        >>> _hamming_distance_compute(correct, total)
+        tensor(0.2500)
+    """
+
+    return 1 - correct.float() / total
+
+
+def hamming_distance(preds: Tensor, target: Tensor, threshold: float = 0.5) -> Tensor:
+    r"""
+    Computes the average `Hamming distance`_ (also
+    known as Hamming loss) between targets and predictions:
+
+    .. math::
+        \text{Hamming distance} = \frac{1}{N \cdot L} \sum_i^N \sum_l^L 1(y_{il} \neq \hat{y}_{il})
+
+    Where :math:`y` is a tensor of target values, :math:`\hat{y}` is a tensor of predictions,
+    and :math:`\bullet_{il}` refers to the :math:`l`-th label of the :math:`i`-th sample of that
+    tensor.
+
+    This is the same as ``1-accuracy`` for binary data, while for all other types of inputs it
+    treats each possible label separately - meaning that, for example, multi-class data is
+    treated as if it were multi-label.
+
+    Accepts all input types listed in :ref:`references/modules:input types`.
+
+    Args:
+        preds: Predictions from model (probabilities, logits or labels)
+        target: Ground truth
+        threshold:
+            Threshold for transforming probability or logit predictions to binary (0,1) predictions, in the case
+            of binary or multi-label inputs. Default value of 0.5 corresponds to input being probabilities.
+
+    Example:
+        >>> from paddlemetrics.functional import hamming_distance
+        >>> target = B.tensor([[0, 1], [1, 1]])
+        >>> preds = B.tensor([[0, 1], [0, 1]])
+        >>> hamming_distance(preds, target)
+        tensor(0.2500)
+
+    """
+
+    correct, total = _hamming_distance_update(preds, target, threshold)
+    return _hamming_distance_compute(correct, total)
diff --git a/RE/paddlemetric/src/paddlemetrics/functional/classification/hinge.py b/RE/paddlemetric/src/paddlemetrics/functional/classification/hinge.py
new file mode 100644
index 00000000..59d8be1a
--- /dev/null
+++ b/RE/paddlemetric/src/paddlemetrics/functional/classification/hinge.py
@@ -0,0 +1,231 @@
+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from typing import Optional, Tuple, Union
+
+import paddleext.torchapi as B
+from paddleext.torchapi import  Tensor, tensor
+
+from paddlemetrics.utilities.checks import _input_squeeze
+from paddlemetrics.utilities.data import to_onehot
+from paddlemetrics.utilities.enums import DataType, EnumStr
+
+
+class MulticlassMode(EnumStr):
+    """Enum to represent possible multiclass modes of hinge.
+
+    >>> "Crammer-Singer" in list(MulticlassMode)
+    True
+    """
+
+    CRAMMER_SINGER = "crammer-singer"
+    ONE_VS_ALL = "one-vs-all"
+
+
+def _check_shape_and_type_consistency_hinge(
+    preds: Tensor,
+    target: Tensor,
+) -> DataType:
+    """Checks shape and type of `preds` and `target` and returns mode of the input tensors.
+
+    Args:
+        preds: Predicted tensor
+        target: Ground truth tensor
+
+    Raises:
+        `ValueError`: if `target` is not one dimensional
+        `ValueError`: if `preds` and `target` do not have the same shape in the first dimension
+        `ValueError`: if `pred` is neither one nor two dimensional
+    """
+
+    if target.ndim > 1:
+        raise ValueError(
+            f"The `target` should be one dimensional, got `target` with shape={target.shape}.",
+        )
+
+    if preds.ndim == 1:
+        if preds.shape != target.shape:
+            raise ValueError(
+                "The `preds` and `target` should have the same shape,",
+                f" got `preds` with shape={preds.shape} and `target` with shape={target.shape}.",
+            )
+        mode = DataType.BINARY
+    elif preds.ndim == 2:
+        if preds.shape[0] != target.shape[0]:
+            raise ValueError(
+                "The `preds` and `target` should have the same shape in the first dimension,",
+                f" got `preds` with shape={preds.shape} and `target` with shape={target.shape}.",
+            )
+        mode = DataType.MULTICLASS
+    else:
+        raise ValueError(f"The `preds` should be one or two dimensional, got `preds` with shape={preds.shape}.")
+    return mode
+
+
+def _hinge_update(
+    preds: Tensor,
+    target: Tensor,
+    squared: bool = False,
+    multiclass_mode: Optional[Union[str, MulticlassMode]] = None,
+) -> Tuple[Tensor, Tensor]:
+    """Updates and returns sum over Hinge loss scores for each observation and the total number of observations.
+
+    Args:
+        preds: Predicted tensor
+        target: Ground truth tensor
+        squared: If True, this will compute the squared hinge loss. Otherwise, computes the regular hinge loss.
+        multiclass_mode:
+            Which approach to use for multi-class inputs (has no effect in the binary case). ``None`` (default),
+            ``MulticlassMode.CRAMMER_SINGER`` or ``"crammer-singer"``, uses the Crammer Singer multi-class hinge loss.
+            ``MulticlassMode.ONE_VS_ALL`` or ``"one-vs-all"`` computes the hinge loss in a one-vs-all fashion.
+    """
+    preds, target = _input_squeeze(preds, target)
+
+    mode = _check_shape_and_type_consistency_hinge(preds, target)
+
+    if mode == DataType.MULTICLASS:
+        target = to_onehot(target, max(2, preds.shape[1])).bool()
+
+    if mode == DataType.MULTICLASS and (multiclass_mode is None or multiclass_mode == MulticlassMode.CRAMMER_SINGER):
+        margin = preds[target]
+        margin -= B.max(preds[~target].view(preds.shape[0], -1), dim=1)[0]
+    elif mode == DataType.BINARY or multiclass_mode == MulticlassMode.ONE_VS_ALL:
+        target = target.bool()
+        margin = B.zeros_like(preds)
+        margin[target] = preds[target]
+        margin[~target] = -preds[~target]
+    else:
+        raise ValueError(
+            "The `multiclass_mode` should be either None / 'crammer-singer' / MulticlassMode.CRAMMER_SINGER"
+            "(default) or 'one-vs-all' / MulticlassMode.ONE_VS_ALL,"
+            f" got {multiclass_mode}."
+        )
+
+    measures = 1 - margin
+    measures = B.clamp(measures, 0)
+
+    if squared:
+        measures = measures.pow(2)
+
+    total = tensor(target.shape[0], device=target.device)
+    return measures.sum(dim=0), total
+
+
+def _hinge_compute(measure: Tensor, total: Tensor) -> Tensor:
+    """Computes mean Hinge loss.
+
+    Args:
+        measure: Sum over hinge losses for each each observation
+        total: Number of observations
+
+    Example:
+        >>> # binary case
+        >>> target = B.tensor([0, 1, 1])
+        >>> preds = B.tensor([-2.2, 2.4, 0.1])
+        >>> measure, total = _hinge_update(preds, target)
+        >>> _hinge_compute(measure, total)
+        tensor(0.3000)
+
+        >>> # multiclass case
+        >>> target = B.tensor([0, 1, 2])
+        >>> preds = B.tensor([[-1.0, 0.9, 0.2], [0.5, -1.1, 0.8], [2.2, -0.5, 0.3]])
+        >>> measure, total = _hinge_update(preds, target)
+        >>> _hinge_compute(measure, total)
+        tensor(2.9000)
+
+        >>> # multiclass one-vs-all mode case
+        >>> target = B.tensor([0, 1, 2])
+        >>> preds = B.tensor([[-1.0, 0.9, 0.2], [0.5, -1.1, 0.8], [2.2, -0.5, 0.3]])
+        >>> measure, total = _hinge_update(preds, target, multiclass_mode="one-vs-all")
+        >>> _hinge_compute(measure, total)
+        tensor([2.2333, 1.5000, 1.2333])
+    """
+
+    return measure / total
+
+
+def hinge(
+    preds: Tensor,
+    target: Tensor,
+    squared: bool = False,
+    multiclass_mode: Optional[Union[str, MulticlassMode]] = None,
+) -> Tensor:
+    r"""
+    Computes the mean `Hinge loss`_ typically used for Support Vector Machines (SVMs).
+
+     In the binary case it is defined as:
+
+    .. math::
+        \text{Hinge loss} = \max(0, 1 - y \times \hat{y})
+
+    Where :math:`y \in {-1, 1}` is the target, and :math:`\hat{y} \in \mathbb{R}` is the prediction.
+
+    In the multi-class case, when ``multiclass_mode=None`` (default), ``multiclass_mode=MulticlassMode.CRAMMER_SINGER``
+    or ``multiclass_mode="crammer-singer"``, this metric will compute the multi-class hinge loss defined by Crammer and
+    Singer as:
+
+    .. math::
+        \text{Hinge loss} = \max\left(0, 1 - \hat{y}_y + \max_{i \ne y} (\hat{y}_i)\right)
+
+    Where :math:`y \in {0, ..., \mathrm{C}}` is the target class (where :math:`\mathrm{C}` is the number of classes),
+    and :math:`\hat{y} \in \mathbb{R}^\mathrm{C}` is the predicted output per class.
+
+    In the multi-class case when ``multiclass_mode=MulticlassMode.ONE_VS_ALL`` or ``multiclass_mode='one-vs-all'``, this
+    metric will use a one-vs-all approach to compute the hinge loss, giving a vector of C outputs where each entry pits
+    that class against all remaining classes.
+
+    This metric can optionally output the mean of the squared hinge loss by setting ``squared=True``
+
+    Only accepts inputs with preds shape of (N) (binary) or (N, C) (multi-class) and target shape of (N).
+
+    Args:
+        preds: Predictions from model (as float outputs from decision function).
+        target: Ground truth labels.
+        squared:
+            If True, this will compute the squared hinge loss. Otherwise, computes the regular hinge loss (default).
+        multiclass_mode:
+            Which approach to use for multi-class inputs (has no effect in the binary case). ``None`` (default),
+            ``MulticlassMode.CRAMMER_SINGER`` or ``"crammer-singer"``, uses the Crammer Singer multi-class hinge loss.
+            ``MulticlassMode.ONE_VS_ALL`` or ``"one-vs-all"`` computes the hinge loss in a one-vs-all fashion.
+
+    Raises:
+        ValueError:
+            If preds shape is not of size (N) or (N, C).
+        ValueError:
+            If target shape is not of size (N).
+        ValueError:
+            If ``multiclass_mode`` is not: None, ``MulticlassMode.CRAMMER_SINGER``, ``"crammer-singer"``,
+            ``MulticlassMode.ONE_VS_ALL`` or ``"one-vs-all"``.
+
+    Example (binary case):
+        >>> import torchapi as B
+        >>> from paddlemetrics.functional import hinge
+        >>> target = B.tensor([0, 1, 1])
+        >>> preds = B.tensor([-2.2, 2.4, 0.1])
+        >>> hinge(preds, target)
+        tensor(0.3000)
+
+    Example (default / multiclass case):
+        >>> target = B.tensor([0, 1, 2])
+        >>> preds = B.tensor([[-1.0, 0.9, 0.2], [0.5, -1.1, 0.8], [2.2, -0.5, 0.3]])
+        >>> hinge(preds, target)
+        tensor(2.9000)
+
+    Example (multiclass example, one vs all mode):
+        >>> target = B.tensor([0, 1, 2])
+        >>> preds = B.tensor([[-1.0, 0.9, 0.2], [0.5, -1.1, 0.8], [2.2, -0.5, 0.3]])
+        >>> hinge(preds, target, multiclass_mode="one-vs-all")
+        tensor([2.2333, 1.5000, 1.2333])
+    """
+    measure, total = _hinge_update(preds, target, squared=squared, multiclass_mode=multiclass_mode)
+    return _hinge_compute(measure, total)
diff --git a/RE/paddlemetric/src/paddlemetrics/functional/classification/iou.py b/RE/paddlemetric/src/paddlemetrics/functional/classification/iou.py
new file mode 100644
index 00000000..b7cf6077
--- /dev/null
+++ b/RE/paddlemetric/src/paddlemetrics/functional/classification/iou.py
@@ -0,0 +1,133 @@
+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from typing import Optional
+
+import paddleext.torchapi as B
+from paddleext.torchapi import  Tensor
+
+from paddlemetrics.functional.classification.confusion_matrix import _confusion_matrix_update
+from paddlemetrics.utilities.data import get_num_classes
+from paddlemetrics.utilities.distributed import reduce
+
+
+def _iou_from_confmat(
+    confmat: Tensor,
+    num_classes: int,
+    ignore_index: Optional[int] = None,
+    absent_score: float = 0.0,
+    reduction: str = "elementwise_mean",
+) -> Tensor:
+    """Computes the intersection over union from confusion matrix.
+
+    Args:
+        confmat: Confusion matrix without normalization
+        num_classes: Number of classes for a given prediction and target tensor
+        ignore_index: optional int specifying a target class to ignore. If given, this class index does not contribute
+            to the returned score, regardless of reduction method.
+        absent_score: score to use for an individual class, if no instances of the class index were present in `pred`
+            AND no instances of the class index were present in `target`.
+        reduction: a method to reduce metric score over labels.
+
+            - ``'elementwise_mean'``: takes the mean (default)
+            - ``'sum'``: takes the sum
+            - ``'none'``: no reduction will be applied
+    """
+
+    # Remove the ignored class index from the scores.
+    if ignore_index is not None and 0 <= ignore_index < num_classes:
+        confmat[ignore_index] = 0.0
+
+    intersection = B.diag(confmat)
+    union = confmat.sum(0) + confmat.sum(1) - intersection
+
+    # If this class is absent in both target AND pred (union == 0), then use the absent_score for this class.
+    scores = intersection.float() / union.float()
+    scores[union == 0] = absent_score
+
+    if ignore_index is not None and 0 <= ignore_index < num_classes:
+        scores = B.cat(
+            [
+                scores[:ignore_index],
+                scores[ignore_index + 1 :],
+            ]
+        )
+
+    return reduce(scores, reduction=reduction)
+
+
+def iou(
+    preds: Tensor,
+    target: Tensor,
+    ignore_index: Optional[int] = None,
+    absent_score: float = 0.0,
+    threshold: float = 0.5,
+    num_classes: Optional[int] = None,
+    reduction: str = "elementwise_mean",
+) -> Tensor:
+    r"""
+    Computes `Jaccard index`_
+
+    .. math:: J(A,B) = \frac{|A\cap B|}{|A\cup B|}
+
+    Where: :math:`A` and :math:`B` are both tensors of the same size,
+    containing integer class values. They may be subject to conversion from
+    input data (see description below).
+
+    Note that it is different from box IoU.
+
+    If preds and target are the same shape and preds is a float tensor, we use the ``self.threshold`` argument
+    to convert into integer labels. This is the case for binary and multi-label probabilities.
+
+    If pred has an extra dimension as in the case of multi-class scores we
+    perform an argmax on ``dim=1``.
+
+    Args:
+        preds: tensor containing predictions from model (probabilities, or labels) with shape ``[N, d1, d2, ...]``
+        target: tensor containing ground truth labels with shape ``[N, d1, d2, ...]``
+        ignore_index: optional int specifying a target class to ignore. If given,
+            this class index does not contribute to the returned score, regardless
+            of reduction method. Has no effect if given an int that is not in the
+            range [0, num_classes-1], where num_classes is either given or derived
+            from pred and target. By default, no index is ignored, and all classes are used.
+        absent_score: score to use for an individual class, if no instances of
+            the class index were present in `pred` AND no instances of the class
+            index were present in `target`. For example, if we have 3 classes,
+            [0, 0] for `pred`, and [0, 2] for `target`, then class 1 would be
+            assigned the `absent_score`.
+        threshold:
+            Threshold value for binary or multi-label probabilities. default: 0.5
+        num_classes:
+            Optionally specify the number of classes
+        reduction: a method to reduce metric score over labels.
+
+            - ``'elementwise_mean'``: takes the mean (default)
+            - ``'sum'``: takes the sum
+            - ``'none'``: no reduction will be applied
+
+    Return:
+        IoU score: Tensor containing single value if reduction is
+        'elementwise_mean', or number of classes if reduction is 'none'
+
+    Example:
+        >>> from paddlemetrics.functional import iou
+        >>> target = B.randint(0, 2, (10, 25, 25))
+        >>> pred = B.tensor(target)
+        >>> pred[2:5, 7:13, 9:15] = 1 - pred[2:5, 7:13, 9:15]
+        >>> iou(pred, target)
+        tensor(0.9660)
+    """
+
+    num_classes = get_num_classes(preds=preds, target=target, num_classes=num_classes)
+    confmat = _confusion_matrix_update(preds, target, num_classes, threshold)
+    return _iou_from_confmat(confmat, num_classes, ignore_index, absent_score, reduction)
diff --git a/RE/paddlemetric/src/paddlemetrics/functional/classification/kl_divergence.py b/RE/paddlemetric/src/paddlemetrics/functional/classification/kl_divergence.py
new file mode 100644
index 00000000..0d7685c1
--- /dev/null
+++ b/RE/paddlemetric/src/paddlemetrics/functional/classification/kl_divergence.py
@@ -0,0 +1,110 @@
+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from typing import Optional, Tuple
+
+import paddleext.torchapi as B
+from paddleext.torchapi import  Tensor
+
+from paddlemetrics.utilities.checks import _check_same_shape
+from paddlemetrics.utilities.data import METRIC_EPS
+
+
+def _kld_update(p: Tensor, q: Tensor, log_prob: bool) -> Tuple[Tensor, int]:
+    """Updates and returns KL divergence scores for each observation and the total number of observations. Checks
+    same shape and 2D nature of the input tensors else raises ValueError.
+
+    Args:
+        p: data distribution with shape ``[N, d]``
+        q: prior or approximate distribution with shape ``[N, d]``
+        log_prob: bool indicating if input is log-probabilities or probabilities. If given as probabilities,
+            will normalize to make sure the distributes sum to 1
+    """
+    _check_same_shape(p, q)
+    if p.ndim != 2 or q.ndim != 2:
+        raise ValueError(f"Expected both p and q distribution to be 2D but got {p.ndim} and {q.ndim} respectively")
+
+    total = p.shape[0]
+    if log_prob:
+        measures = B.sum(p.exp() * (p - q), axis=-1)
+    else:
+        p = p / p.sum(axis=-1, keepdim=True)
+        q = q / q.sum(axis=-1, keepdim=True)
+        q = B.clamp(q, METRIC_EPS)
+        measures = B.sum(p * B.log(p / q), axis=-1)
+
+    return measures, total
+
+
+def _kld_compute(measures: Tensor, total: Tensor, reduction: Optional[str] = "mean") -> Tensor:
+    """Computes the KL divergenece based on the type of reduction.
+
+    Args:
+        measures: Tensor of KL divergence scores for each observation
+        total: Number of observations
+        reduction:
+            Determines how to reduce over the ``N``/batch dimension:
+
+            - ``'mean'`` [default]: Averages score across samples
+            - ``'sum'``: Sum score across samples
+            - ``'none'`` or ``None``: Returns score per sample
+
+    Example:
+        >>> p = B.tensor([[0.36, 0.48, 0.16]])
+        >>> q = B.tensor([[1/3, 1/3, 1/3]])
+        >>> measures, total = _kld_update(p, q, log_prob=False)
+        >>> _kld_compute(measures, total)
+        tensor(0.0853)
+    """
+
+    if reduction == "sum":
+        return measures.sum()
+    if reduction == "mean":
+        return measures.sum() / total
+    if reduction is None or reduction == "none":
+        return measures
+    return measures / total
+
+
+def kl_divergence(p: Tensor, q: Tensor, log_prob: bool = False, reduction: Optional[str] = "mean") -> Tensor:
+    r"""Computes `KL divergence`_
+
+    .. math::
+        D_{KL}(P||Q) = \sum_{x\in\mathcal{X}} P(x) \log\frac{P(x)}{Q{x}}
+
+    Where :math:`P` and :math:`Q` are probability distributions where :math:`P` usually represents a distribution
+    over data and :math:`Q` is often a prior or approximation of :math:`P`. It should be noted that the KL divergence
+    is a non-symetrical metric i.e. :math:`D_{KL}(P||Q) \neq D_{KL}(Q||P)`.
+
+    Args:
+        p: data distribution with shape ``[N, d]``
+        q: prior or approximate distribution with shape ``[N, d]``
+        log_prob: bool indicating if input is log-probabilities or probabilities. If given as probabilities,
+            will normalize to make sure the distributes sum to 1
+        reduction:
+            Determines how to reduce over the ``N``/batch dimension:
+
+            - ``'mean'`` [default]: Averages score across samples
+            - ``'sum'``: Sum score across samples
+            - ``'none'`` or ``None``: Returns score per sample
+
+    Example:
+        >>> import torchapi as B
+        >>> p = B.tensor([[0.36, 0.48, 0.16]])
+        >>> q = B.tensor([[1/3, 1/3, 1/3]])
+        >>> kl_divergence(p, q)
+        tensor(0.0853)
+    """
+    measures, total = _kld_update(p, q, log_prob)
+    return _kld_compute(measures, total, reduction)
diff --git a/RE/paddlemetric/src/paddlemetrics/functional/classification/matthews_corrcoef.py b/RE/paddlemetric/src/paddlemetrics/functional/classification/matthews_corrcoef.py
new file mode 100644
index 00000000..8532a358
--- /dev/null
+++ b/RE/paddlemetric/src/paddlemetrics/functional/classification/matthews_corrcoef.py
@@ -0,0 +1,78 @@
+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import paddleext.torchapi as B
+from paddleext.torchapi import  Tensor
+
+from paddlemetrics.functional.classification.confusion_matrix import _confusion_matrix_update
+
+_matthews_corrcoef_update = _confusion_matrix_update
+
+
+def _matthews_corrcoef_compute(confmat: Tensor) -> Tensor:
+    """Computes Matthews correlation coefficient.
+
+    Args:
+        confmat: Confusion matrix
+
+    Example:
+        >>> target = B.tensor([1, 1, 0, 0])
+        >>> preds = B.tensor([0, 1, 0, 0])
+        >>> confmat = _matthews_corrcoef_update(preds, target, num_classes=2)
+        >>> _matthews_corrcoef_compute(confmat)
+        tensor(0.5774)
+    """
+
+    tk = confmat.sum(dim=1).float()
+    pk = confmat.sum(dim=0).float()
+    c = B.trace(confmat).float()
+    s = confmat.sum().float()
+    return (c * s - sum(tk * pk)) / (B.sqrt(s ** 2 - sum(pk * pk)) * B.sqrt(s ** 2 - sum(tk * tk)))
+
+
+def matthews_corrcoef(
+    preds: Tensor,
+    target: Tensor,
+    num_classes: int,
+    threshold: float = 0.5,
+) -> Tensor:
+    r"""
+    Calculates `Matthews correlation coefficient`_ that measures
+    the general correlation or quality of a classification. In the binary case it
+    is defined as:
+
+    .. math::
+        MCC = \frac{TP*TN - FP*FN}{\sqrt{(TP+FP)*(TP+FN)*(TN+FP)*(TN+FN)}}
+
+    where TP, TN, FP and FN are respectively the true postitives, true negatives,
+    false positives and false negatives. Also works in the case of multi-label or
+    multi-class input.
+
+    Args:
+        preds: (float or long tensor), Either a ``(N, ...)`` tensor with labels or
+            ``(N, C, ...)`` where C is the number of classes, tensor with labels/probabilities
+        target: ``target`` (long tensor), tensor with shape ``(N, ...)`` with ground true labels
+        num_classes: Number of classes in the dataset.
+        threshold:
+            Threshold value for binary or multi-label probabilities. default: 0.5
+
+    Example:
+        >>> from paddlemetrics.functional import matthews_corrcoef
+        >>> target = B.tensor([1, 1, 0, 0])
+        >>> preds = B.tensor([0, 1, 0, 0])
+        >>> matthews_corrcoef(preds, target, num_classes=2)
+        tensor(0.5774)
+
+    """
+    confmat = _matthews_corrcoef_update(preds, target, num_classes, threshold)
+    return _matthews_corrcoef_compute(confmat)
diff --git a/RE/paddlemetric/src/paddlemetrics/functional/classification/precision_recall.py b/RE/paddlemetric/src/paddlemetrics/functional/classification/precision_recall.py
new file mode 100644
index 00000000..4b8528dc
--- /dev/null
+++ b/RE/paddlemetric/src/paddlemetrics/functional/classification/precision_recall.py
@@ -0,0 +1,568 @@
+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from typing import Optional, Tuple
+
+import paddleext.torchapi as B
+from paddleext.torchapi import  Tensor
+
+from paddlemetrics.functional.classification.stat_scores import _reduce_stat_scores, _stat_scores_update
+from paddlemetrics.utilities.enums import AverageMethod, MDMCAverageMethod
+
+
+def _precision_compute(
+    tp: Tensor,
+    fp: Tensor,
+    fn: Tensor,
+    average: str,
+    mdmc_average: Optional[str],
+) -> Tensor:
+    """Computes precision from the stat scores: true positives, false positives, true negatives, false negatives.
+
+    Args:
+        tp: True positives
+        fp: False positives
+        fn: False negatives
+        average: Defines the reduction that is applied
+        mdmc_average: Defines how averaging is done for multi-dimensional multi-class inputs (on top of the
+            ``average`` parameter)
+
+    Example:
+        >>> from paddlemetrics.functional.classification.stat_scores import _stat_scores_update
+        >>> preds  = B.tensor([2, 0, 2, 1])
+        >>> target = B.tensor([1, 1, 2, 0])
+        >>> tp, fp, tn, fn = _stat_scores_update( preds, target, reduce='macro', num_classes=3)
+        >>> _precision_compute(tp, fp, fn, average='macro', mdmc_average=None)
+        tensor(0.1667)
+        >>> tp, fp, tn, fn = _stat_scores_update(preds, target, reduce='micro')
+        >>> _precision_compute(tp, fp, fn, average='micro', mdmc_average=None)
+        tensor(0.2500)
+    """
+
+    numerator = tp
+    denominator = tp + fp
+
+    if average == AverageMethod.MACRO and mdmc_average != MDMCAverageMethod.SAMPLEWISE:
+        cond = tp + fp + fn == 0
+        numerator = numerator[~cond]
+        denominator = denominator[~cond]
+
+    if average == AverageMethod.NONE and mdmc_average != MDMCAverageMethod.SAMPLEWISE:
+        # a class is not present if there exists no TPs, no FPs, and no FNs
+        meaningless_indeces = B.nonzero((tp | fn | fp) == 0).cpu()
+        numerator[meaningless_indeces, ...] = -1
+        denominator[meaningless_indeces, ...] = -1
+
+    return _reduce_stat_scores(
+        numerator=numerator,
+        denominator=denominator,
+        weights=None if average != "weighted" else tp + fn,
+        average=average,
+        mdmc_average=mdmc_average,
+    )
+
+
+def precision(
+    preds: Tensor,
+    target: Tensor,
+    average: str = "micro",
+    mdmc_average: Optional[str] = None,
+    ignore_index: Optional[int] = None,
+    num_classes: Optional[int] = None,
+    threshold: float = 0.5,
+    top_k: Optional[int] = None,
+    multiclass: Optional[bool] = None,
+) -> Tensor:
+    r"""
+    Computes `Precision`_
+
+    .. math:: \text{Precision} = \frac{\text{TP}}{\text{TP} + \text{FP}}
+
+    Where :math:`\text{TP}` and :math:`\text{FP}` represent the number of true positives and
+    false positives respecitively. With the use of ``top_k`` parameter, this metric can
+    generalize to Precision@K.
+
+    The reduction method (how the precision scores are aggregated) is controlled by the
+    ``average`` parameter, and additionally by the ``mdmc_average`` parameter in the
+    multi-dimensional multi-class case. Accepts all inputs listed in :ref:`references/modules:input types`.
+
+    Args:
+        preds: Predictions from model (probabilities, logits or labels)
+        target: Ground truth values
+        average:
+            Defines the reduction that is applied. Should be one of the following:
+
+            - ``'micro'`` [default]: Calculate the metric globally, across all samples and classes.
+            - ``'macro'``: Calculate the metric for each class separately, and average the
+              metrics across classes (with equal weights for each class).
+            - ``'weighted'``: Calculate the metric for each class separately, and average the
+              metrics across classes, weighting each class by its support (``tp + fn``).
+            - ``'none'`` or ``None``: Calculate the metric for each class separately, and return
+              the metric for every class.
+            - ``'samples'``: Calculate the metric for each sample, and average the metrics
+              across samples (with equal weights for each sample).
+
+            .. note:: What is considered a sample in the multi-dimensional multi-class case
+                depends on the value of ``mdmc_average``.
+
+            .. note:: If ``'none'`` and a given class doesn't occur in the `preds` or `target`,
+                the value for the class will be ``nan``.
+
+        mdmc_average:
+            Defines how averaging is done for multi-dimensional multi-class inputs (on top of the
+            ``average`` parameter). Should be one of the following:
+
+            - ``None`` [default]: Should be left unchanged if your data is not multi-dimensional
+              multi-class.
+
+            - ``'samplewise'``: In this case, the statistics are computed separately for each
+              sample on the ``N`` axis, and then averaged over samples.
+              The computation for each sample is done by treating the flattened extra axes ``...``
+              (see :ref:`references/modules:input types`) as the ``N`` dimension within the sample,
+              and computing the metric for the sample based on that.
+
+            - ``'global'``: In this case the ``N`` and ``...`` dimensions of the inputs
+              (see :ref:`references/modules:input types`)
+              are flattened into a new ``N_X`` sample axis, i.e. the inputs are treated as if they
+              were ``(N_X, C)``. From here on the ``average`` parameter applies as usual.
+
+        ignore_index:
+            Integer specifying a target class to ignore. If given, this class index does not contribute
+            to the returned score, regardless of reduction method. If an index is ignored, and ``average=None``
+            or ``'none'``, the score for the ignored class will be returned as ``nan``.
+
+        num_classes:
+            Number of classes. Necessary for ``'macro'``, ``'weighted'`` and ``None`` average methods.
+
+        threshold:
+            Threshold for transforming probability or logit predictions to binary (0,1) predictions, in the case
+            of binary or multi-label inputs. Default value of 0.5 corresponds to input being probabilities.
+        top_k:
+            Number of highest probability or logit score predictions considered to find the correct label,
+            relevant only for (multi-dimensional) multi-class inputs. The
+            default value (``None``) will be interpreted as 1 for these inputs.
+
+            Should be left at default (``None``) for all other types of inputs.
+        multiclass:
+            Used only in certain special cases, where you want to treat inputs as a different type
+            than what they appear to be. See the parameter's
+            :ref:`documentation section <references/modules:using the multiclass parameter>`
+            for a more detailed explanation and examples.
+
+    Return:
+        The shape of the returned tensor depends on the ``average`` parameter
+
+        - If ``average in ['micro', 'macro', 'weighted', 'samples']``, a one-element tensor will be returned
+        - If ``average in ['none', None]``, the shape will be ``(C,)``, where ``C`` stands  for the number
+          of classes
+
+    Raises:
+        ValueError:
+            If ``average`` is not one of ``"micro"``, ``"macro"``, ``"weighted"``,
+            ``"samples"``, ``"none"`` or ``None``.
+        ValueError:
+            If ``mdmc_average`` is not one of ``None``, ``"samplewise"``, ``"global"``.
+        ValueError:
+            If ``average`` is set but ``num_classes`` is not provided.
+        ValueError:
+            If ``num_classes`` is set
+            and ``ignore_index`` is not in the range ``[0, num_classes)``.
+
+    Example:
+        >>> from paddlemetrics.functional import precision
+        >>> preds  = B.tensor([2, 0, 2, 1])
+        >>> target = B.tensor([1, 1, 2, 0])
+        >>> precision(preds, target, average='macro', num_classes=3)
+        tensor(0.1667)
+        >>> precision(preds, target, average='micro')
+        tensor(0.2500)
+
+    """
+    allowed_average = ["micro", "macro", "weighted", "samples", "none", None]
+    if average not in allowed_average:
+        raise ValueError(f"The `average` has to be one of {allowed_average}, got {average}.")
+
+    allowed_mdmc_average = [None, "samplewise", "global"]
+    if mdmc_average not in allowed_mdmc_average:
+        raise ValueError(f"The `mdmc_average` has to be one of {allowed_mdmc_average}, got {mdmc_average}.")
+
+    if average in ["macro", "weighted", "none", None] and (not num_classes or num_classes < 1):
+        raise ValueError(f"When you set `average` as {average}, you have to provide the number of classes.")
+
+    if num_classes and ignore_index is not None and (not 0 <= ignore_index < num_classes or num_classes == 1):
+        raise ValueError(f"The `ignore_index` {ignore_index} is not valid for inputs with {num_classes} classes")
+
+    reduce = "macro" if average in ["weighted", "none", None] else average
+    tp, fp, _, fn = _stat_scores_update(
+        preds,
+        target,
+        reduce=reduce,
+        mdmc_reduce=mdmc_average,
+        threshold=threshold,
+        num_classes=num_classes,
+        top_k=top_k,
+        multiclass=multiclass,
+        ignore_index=ignore_index,
+    )
+
+    return _precision_compute(tp, fp, fn, average, mdmc_average)
+
+
+def _recall_compute(
+    tp: Tensor,
+    fp: Tensor,
+    fn: Tensor,
+    average: str,
+    mdmc_average: Optional[str],
+) -> Tensor:
+    """Computes precision from the stat scores: true positives, false positives, true negatives, false negatives.
+
+    Args:
+        tp: True positives
+        fp: False positives
+        fn: False negatives
+        average: Defines the reduction that is applied
+        mdmc_average: Defines how averaging is done for multi-dimensional multi-class inputs (on top of the
+            ``average`` parameter)
+
+    Example:
+        >>> from paddlemetrics.functional.classification.stat_scores import _stat_scores_update
+        >>> preds  = B.tensor([2, 0, 2, 1])
+        >>> target = B.tensor([1, 1, 2, 0])
+        >>> tp, fp, tn, fn = _stat_scores_update(preds, target, reduce='macro', num_classes=3)
+        >>> _recall_compute(tp, fp, fn, average='macro', mdmc_average=None)
+        tensor(0.3333)
+        >>> tp, fp, tn, fn = _stat_scores_update(preds, target, reduce='micro')
+        >>> _recall_compute(tp, fp, fn, average='micro', mdmc_average=None)
+        tensor(0.2500)
+    """
+    numerator = tp
+    denominator = tp + fn
+
+    if average == AverageMethod.MACRO and mdmc_average != MDMCAverageMethod.SAMPLEWISE:
+        cond = tp + fp + fn == 0
+        numerator = numerator[~cond]
+        denominator = denominator[~cond]
+
+    if average == AverageMethod.NONE and mdmc_average != MDMCAverageMethod.SAMPLEWISE:
+        # a class is not present if there exists no TPs, no FPs, and no FNs
+        meaningless_indeces = ((tp | fn | fp) == 0).nonzero().cpu()
+        numerator[meaningless_indeces, ...] = -1
+        denominator[meaningless_indeces, ...] = -1
+
+    return _reduce_stat_scores(
+        numerator=numerator,
+        denominator=denominator,
+        weights=None if average != AverageMethod.WEIGHTED else tp + fn,
+        average=average,
+        mdmc_average=mdmc_average,
+    )
+
+
+def recall(
+    preds: Tensor,
+    target: Tensor,
+    average: str = "micro",
+    mdmc_average: Optional[str] = None,
+    ignore_index: Optional[int] = None,
+    num_classes: Optional[int] = None,
+    threshold: float = 0.5,
+    top_k: Optional[int] = None,
+    multiclass: Optional[bool] = None,
+) -> Tensor:
+    r"""
+    Computes `Recall`_
+
+    .. math:: \text{Recall} = \frac{\text{TP}}{\text{TP} + \text{FN}}
+
+    Where :math:`\text{TP}` and :math:`\text{FN}` represent the number of true positives and
+    false negatives respecitively. With the use of ``top_k`` parameter, this metric can
+    generalize to Recall@K.
+
+    The reduction method (how the recall scores are aggregated) is controlled by the
+    ``average`` parameter, and additionally by the ``mdmc_average`` parameter in the
+    multi-dimensional multi-class case. Accepts all inputs listed in :ref:`references/modules:input types`.
+
+    Args:
+        preds: Predictions from model (probabilities, logits or labels)
+        target: Ground truth values
+        average:
+            Defines the reduction that is applied. Should be one of the following:
+
+            - ``'micro'`` [default]: Calculate the metric globally, across all samples and classes.
+            - ``'macro'``: Calculate the metric for each class separately, and average the
+              metrics across classes (with equal weights for each class).
+            - ``'weighted'``: Calculate the metric for each class separately, and average the
+              metrics across classes, weighting each class by its support (``tp + fn``).
+            - ``'none'`` or ``None``: Calculate the metric for each class separately, and return
+              the metric for every class.
+            - ``'samples'``: Calculate the metric for each sample, and average the metrics
+              across samples (with equal weights for each sample).
+
+            .. note:: What is considered a sample in the multi-dimensional multi-class case
+                depends on the value of ``mdmc_average``.
+
+            .. note:: If ``'none'`` and a given class doesn't occur in the `preds` or `target`,
+                the value for the class will be ``nan``.
+
+        mdmc_average:
+            Defines how averaging is done for multi-dimensional multi-class inputs (on top of the
+            ``average`` parameter). Should be one of the following:
+
+            - ``None`` [default]: Should be left unchanged if your data is not multi-dimensional
+              multi-class.
+
+            - ``'samplewise'``: In this case, the statistics are computed separately for each
+              sample on the ``N`` axis, and then averaged over samples.
+              The computation for each sample is done by treating the flattened extra axes ``...``
+              (see :ref:`references/modules:input types`) as the ``N`` dimension within the sample,
+              and computing the metric for the sample based on that.
+
+            - ``'global'``: In this case the ``N`` and ``...`` dimensions of the inputs
+              (see :ref:`references/modules:input types`)
+              are flattened into a new ``N_X`` sample axis, i.e. the inputs are treated as if they
+              were ``(N_X, C)``. From here on the ``average`` parameter applies as usual.
+
+        ignore_index:
+            Integer specifying a target class to ignore. If given, this class index does not contribute
+            to the returned score, regardless of reduction method. If an index is ignored, and ``average=None``
+            or ``'none'``, the score for the ignored class will be returned as ``nan``.
+
+        num_classes:
+            Number of classes. Necessary for ``'macro'``, ``'weighted'`` and ``None`` average methods.
+
+        threshold:
+            Threshold for transforming probability or logit predictions to binary (0,1) predictions, in the case
+            of binary or multi-label inputs. Default value of 0.5 corresponds to input being probabilities.
+        top_k:
+            Number of highest probability or logit score predictions considered to find the correct label,
+            relevant only for (multi-dimensional) multi-class inputs. The
+            default value (``None``) will be interpreted as 1 for these inputs.
+
+            Should be left at default (``None``) for all other types of inputs.
+        multiclass:
+            Used only in certain special cases, where you want to treat inputs as a different type
+            than what they appear to be. See the parameter's
+            :ref:`documentation section <references/modules:using the multiclass parameter>`
+            for a more detailed explanation and examples.
+
+    Return:
+        The shape of the returned tensor depends on the ``average`` parameter
+
+        - If ``average in ['micro', 'macro', 'weighted', 'samples']``, a one-element tensor will be returned
+        - If ``average in ['none', None]``, the shape will be ``(C,)``, where ``C`` stands  for the number
+          of classes
+
+    Raises:
+        ValueError:
+            If ``average`` is not one of ``"micro"``, ``"macro"``, ``"weighted"``,
+            ``"samples"``, ``"none"`` or ``None``.
+        ValueError:
+            If ``mdmc_average`` is not one of ``None``, ``"samplewise"``, ``"global"``.
+        ValueError:
+            If ``average`` is set but ``num_classes`` is not provided.
+        ValueError:
+            If ``num_classes`` is set
+            and ``ignore_index`` is not in the range ``[0, num_classes)``.
+
+    Example:
+        >>> from paddlemetrics.functional import recall
+        >>> preds  = B.tensor([2, 0, 2, 1])
+        >>> target = B.tensor([1, 1, 2, 0])
+        >>> recall(preds, target, average='macro', num_classes=3)
+        tensor(0.3333)
+        >>> recall(preds, target, average='micro')
+        tensor(0.2500)
+
+    """
+    allowed_average = ["micro", "macro", "weighted", "samples", "none", None]
+    if average not in allowed_average:
+        raise ValueError(f"The `average` has to be one of {allowed_average}, got {average}.")
+
+    allowed_mdmc_average = [None, "samplewise", "global"]
+    if mdmc_average not in allowed_mdmc_average:
+        raise ValueError("The `mdmc_average` has to be one of {allowed_mdmc_average}, got {mdmc_average}.")
+
+    if average in ["macro", "weighted", "none", None] and (not num_classes or num_classes < 1):
+        raise ValueError(f"When you set `average` as {average}, you have to provide the number of classes.")
+
+    if num_classes and ignore_index is not None and (not 0 <= ignore_index < num_classes or num_classes == 1):
+        raise ValueError(f"The `ignore_index` {ignore_index} is not valid for inputs with {num_classes} classes")
+
+    reduce = "macro" if average in ["weighted", "none", None] else average
+    tp, fp, _, fn = _stat_scores_update(
+        preds,
+        target,
+        reduce=reduce,
+        mdmc_reduce=mdmc_average,
+        threshold=threshold,
+        num_classes=num_classes,
+        top_k=top_k,
+        multiclass=multiclass,
+        ignore_index=ignore_index,
+    )
+
+    return _recall_compute(tp, fp, fn, average, mdmc_average)
+
+
+def precision_recall(
+    preds: Tensor,
+    target: Tensor,
+    average: str = "micro",
+    mdmc_average: Optional[str] = None,
+    ignore_index: Optional[int] = None,
+    num_classes: Optional[int] = None,
+    threshold: float = 0.5,
+    top_k: Optional[int] = None,
+    multiclass: Optional[bool] = None,
+) -> Tuple[Tensor, Tensor]:
+    r"""
+    Computes `Precision`_
+
+    .. math:: \text{Precision} = \frac{\text{TP}}{\text{TP} + \text{FP}}
+
+
+    .. math:: \text{Recall} = \frac{\text{TP}}{\text{TP} + \text{FN}}
+
+    Where :math:`\text{TP}`m :math:`\text{FN}` and :math:`\text{FP}` represent the number
+    of true positives, false negatives and false positives respecitively. With the use of
+    ``top_k`` parameter, this metric can generalize to Recall@K and Precision@K.
+
+    The reduction method (how the recall scores are aggregated) is controlled by the
+    ``average`` parameter, and additionally by the ``mdmc_average`` parameter in the
+    multi-dimensional multi-class case. Accepts all inputs listed in :ref:`references/modules:input types`.
+
+    Args:
+        preds: Predictions from model (probabilities, logits or labels)
+        target: Ground truth values
+        average:
+            Defines the reduction that is applied. Should be one of the following:
+
+            - ``'micro'`` [default]: Calculate the metric globally, across all samples and classes.
+            - ``'macro'``: Calculate the metric for each class separately, and average the
+              metrics across classes (with equal weights for each class).
+            - ``'weighted'``: Calculate the metric for each class separately, and average the
+              metrics across classes, weighting each class by its support (``tp + fn``).
+            - ``'none'`` or ``None``: Calculate the metric for each class separately, and return
+              the metric for every class.
+            - ``'samples'``: Calculate the metric for each sample, and average the metrics
+              across samples (with equal weights for each sample).
+
+            .. note:: What is considered a sample in the multi-dimensional multi-class case
+                depends on the value of ``mdmc_average``.
+
+            .. note:: If ``'none'`` and a given class doesn't occur in the `preds` or `target`,
+                the value for the class will be ``nan``.
+
+        mdmc_average:
+            Defines how averaging is done for multi-dimensional multi-class inputs (on top of the
+            ``average`` parameter). Should be one of the following:
+
+            - ``None`` [default]: Should be left unchanged if your data is not multi-dimensional
+              multi-class.
+
+            - ``'samplewise'``: In this case, the statistics are computed separately for each
+              sample on the ``N`` axis, and then averaged over samples.
+              The computation for each sample is done by treating the flattened extra axes ``...``
+              (see :ref:`references/modules:input types`) as the ``N`` dimension within the sample,
+              and computing the metric for the sample based on that.
+
+            - ``'global'``: In this case the ``N`` and ``...`` dimensions of the inputs
+              (see :ref:`references/modules:input types`)
+              are flattened into a new ``N_X`` sample axis, i.e. the inputs are treated as if they
+              were ``(N_X, C)``. From here on the ``average`` parameter applies as usual.
+
+        ignore_index:
+            Integer specifying a target class to ignore. If given, this class index does not contribute
+            to the returned score, regardless of reduction method. If an index is ignored, and ``average=None``
+            or ``'none'``, the score for the ignored class will be returned as ``nan``.
+
+        num_classes:
+            Number of classes. Necessary for ``'macro'``, ``'weighted'`` and ``None`` average methods.
+
+        threshold:
+            Threshold for transforming probability or logit predictions to binary (0,1) predictions, in the case
+            of binary or multi-label inputs. Default value of 0.5 corresponds to input being probabilities.
+        top_k:
+            Number of highest probability or logit score predictions considered to find the correct label,
+            relevant only for (multi-dimensional) multi-class inputs. The
+            default value (``None``) will be interpreted as 1 for these inputs.
+
+            Should be left at default (``None``) for all other types of inputs.
+        multiclass:
+            Used only in certain special cases, where you want to treat inputs as a different type
+            than what they appear to be. See the parameter's
+            :ref:`documentation section <references/modules:using the multiclass parameter>`
+            for a more detailed explanation and examples.
+
+    Return:
+        The function returns a tuple with two elements: precision and recall. Their shape
+        depends on the ``average`` parameter
+
+        - If ``average in ['micro', 'macro', 'weighted', 'samples']``, they are a single element tensor
+        - If ``average in ['none', None]``, they are a tensor of shape ``(C, )``, where ``C`` stands for
+          the number of classes
+
+    Raises:
+        ValueError:
+            If ``average`` is not one of ``"micro"``, ``"macro"``, ``"weighted"``,
+            ``"samples"``, ``"none"`` or ``None``.
+        ValueError:
+            If ``mdmc_average`` is not one of ``None``, ``"samplewise"``, ``"global"``.
+        ValueError:
+            If ``average`` is set but ``num_classes`` is not provided.
+        ValueError:
+            If ``num_classes`` is set
+            and ``ignore_index`` is not in the range ``[0, num_classes)``.
+
+    Example:
+        >>> from paddlemetrics.functional import precision_recall
+        >>> preds  = B.tensor([2, 0, 2, 1])
+        >>> target = B.tensor([1, 1, 2, 0])
+        >>> precision_recall(preds, target, average='macro', num_classes=3)
+        (tensor(0.1667), tensor(0.3333))
+        >>> precision_recall(preds, target, average='micro')
+        (tensor(0.2500), tensor(0.2500))
+
+    """
+    allowed_average = ["micro", "macro", "weighted", "samples", "none", None]
+    if average not in allowed_average:
+        raise ValueError(f"The `average` has to be one of {allowed_average}, got {average}.")
+
+    allowed_mdmc_average = [None, "samplewise", "global"]
+    if mdmc_average not in allowed_mdmc_average:
+        raise ValueError("The `mdmc_average` has to be one of {allowed_mdmc_average}, got {mdmc_average}.")
+
+    if average in ["macro", "weighted", "none", None] and (not num_classes or num_classes < 1):
+        raise ValueError(f"When you set `average` as {average}, you have to provide the number of classes.")
+
+    if num_classes and ignore_index is not None and (not 0 <= ignore_index < num_classes or num_classes == 1):
+        raise ValueError(f"The `ignore_index` {ignore_index} is not valid for inputs with {num_classes} classes")
+
+    reduce = "macro" if average in ["weighted", "none", None] else average
+    tp, fp, _, fn = _stat_scores_update(
+        preds,
+        target,
+        reduce=reduce,
+        mdmc_reduce=mdmc_average,
+        threshold=threshold,
+        num_classes=num_classes,
+        top_k=top_k,
+        multiclass=multiclass,
+        ignore_index=ignore_index,
+    )
+
+    precision_ = _precision_compute(tp, fp, fn, average, mdmc_average)
+    recall_ = _recall_compute(tp, fp, fn, average, mdmc_average)
+
+    return precision_, recall_
diff --git a/RE/paddlemetric/src/paddlemetrics/functional/classification/precision_recall_curve.py b/RE/paddlemetric/src/paddlemetrics/functional/classification/precision_recall_curve.py
new file mode 100644
index 00000000..11b32500
--- /dev/null
+++ b/RE/paddlemetric/src/paddlemetrics/functional/classification/precision_recall_curve.py
@@ -0,0 +1,332 @@
+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from typing import List, Optional, Sequence, Tuple, Union
+
+import paddleext.torchapi as B
+from paddleext.torchapi import  Tensor, tensor
+
+from paddlemetrics.utilities import rank_zero_warn
+
+
+def _binary_clf_curve(
+    preds: Tensor,
+    target: Tensor,
+    sample_weights: Optional[Sequence] = None,
+    pos_label: int = 1,
+) -> Tuple[Tensor, Tensor, Tensor]:
+    """adapted from https://github.com/scikit-learn/scikit- learn/blob/master/sklearn/metrics/_ranking.py."""
+    if sample_weights is not None and not isinstance(sample_weights, Tensor):
+        sample_weights = tensor(sample_weights, device=preds.device, dtype=B.float)
+
+    # remove class dimension if necessary
+    if preds.ndim > target.ndim:
+        preds = preds[:, 0]
+    desc_score_indices = B.argsort(preds, descending=True)
+
+    preds = preds[desc_score_indices]
+    target = target[desc_score_indices]
+
+    if sample_weights is not None:
+        weight = sample_weights[desc_score_indices]
+    else:
+        weight = 1.0
+
+    # pred typically has many tied values. Here we extract
+    # the indices associated with the distinct values. We also
+    # concatenate a value for the end of the curve.
+    distinct_value_indices = B.where(preds[1:] - preds[:-1])[0]
+    threshold_idxs = B.nn.functional.pad(distinct_value_indices, [0, 1], value=target.size(0) - 1)
+    target = (target == pos_label).to(B.long)
+    tps = B.cumsum(target * weight, dim=0)[threshold_idxs]
+
+    if sample_weights is not None:
+        # express fps as a cumsum to ensure fps is increasing even in
+        # the presence of floating point errors
+        fps = B.cumsum((1 - target) * weight, dim=0)[threshold_idxs]
+    else:
+        fps = 1 + threshold_idxs - tps
+
+    return fps, tps, preds[threshold_idxs]
+
+
+def _precision_recall_curve_update(
+    preds: Tensor,
+    target: Tensor,
+    num_classes: Optional[int] = None,
+    pos_label: Optional[int] = None,
+) -> Tuple[Tensor, Tensor, int, Optional[int]]:
+    """Updates and returns variables required to compute the precision-recall pairs for different thresholds.
+
+    Args:
+        preds: Predicted tensor
+        target: Ground truth tensor
+        num_classes: integer with number of classes for multi-label and multiclass problems.
+            Should be set to ``None`` for binary problems
+        pos_label: integer determining the positive class. Default is ``None``
+            which for binary problem is translate to 1. For multiclass problems
+            this argument should not be set as we iteratively change it in the
+            range [0,num_classes-1]
+    """
+
+    if len(preds.shape) == len(target.shape):
+        if pos_label is None:
+            pos_label = 1
+        if num_classes is not None and num_classes != 1:
+            # multilabel problem
+            if num_classes != preds.shape[1]:
+                raise ValueError(
+                    f"Argument `num_classes` was set to {num_classes} in"
+                    f" metric `precision_recall_curve` but detected {preds.shape[1]}"
+                    " number of classes from predictions"
+                )
+            preds = preds.transpose(0, 1).reshape(num_classes, -1).transpose(0, 1)
+            target = target.transpose(0, 1).reshape(num_classes, -1).transpose(0, 1)
+        else:
+            # binary problem
+            preds = preds.flatten()
+            target = target.flatten()
+            num_classes = 1
+
+    # multi class problem
+    elif len(preds.shape) == len(target.shape) + 1:
+        if pos_label is not None:
+            rank_zero_warn(
+                "Argument `pos_label` should be `None` when running"
+                f" multiclass precision recall curve. Got {pos_label}"
+            )
+        if num_classes != preds.shape[1]:
+            raise ValueError(
+                f"Argument `num_classes` was set to {num_classes} in"
+                f" metric `precision_recall_curve` but detected {preds.shape[1]}"
+                " number of classes from predictions"
+            )
+        preds = preds.transpose(0, 1).reshape(num_classes, -1).transpose(0, 1)
+        target = target.flatten()
+
+    else:
+        raise ValueError("preds and target must have same number of dimensions, or one additional dimension for preds")
+
+    return preds, target, num_classes, pos_label
+
+
+def _precision_recall_curve_compute_single_class(
+    preds: Tensor,
+    target: Tensor,
+    pos_label: int,
+    sample_weights: Optional[Sequence] = None,
+) -> Tuple[Tensor, Tensor, Tensor]:
+    """Computes precision-recall pairs for single class inputs.
+
+    Args:
+        preds: Predicted tensor
+        target: Ground truth tensor
+        pos_label: integer determining the positive class.
+        sample_weights: sample weights for each data point
+    """
+
+    fps, tps, thresholds = _binary_clf_curve(
+        preds=preds, target=target, sample_weights=sample_weights, pos_label=pos_label
+    )
+    precision = tps / (tps + fps)
+    recall = tps / tps[-1]
+
+    # stop when full recall attained and reverse the outputs so recall is decreasing
+    last_ind = B.where(tps == tps[-1])[0][0]
+    sl = slice(0, last_ind.item() + 1)
+
+    # need to call reversed explicitly, since including that to slice would
+    # introduce negative strides that are not yet supported in pytorch
+    precision = B.cat([reversed(precision[sl]), B.ones(1, dtype=precision.dtype, device=precision.device)])
+
+    recall = B.cat([reversed(recall[sl]), B.zeros(1, dtype=recall.dtype, device=recall.device)])
+
+    thresholds = reversed(thresholds[sl]).detach().clone()  # type: ignore
+
+    return precision, recall, thresholds
+
+
+def _precision_recall_curve_compute_multi_class(
+    preds: Tensor,
+    target: Tensor,
+    num_classes: int,
+    sample_weights: Optional[Sequence] = None,
+) -> Tuple[List[Tensor], List[Tensor], List[Tensor]]:
+    """Computes precision-recall pairs for multi class inputs.
+
+    Args:
+        preds: Predicted tensor
+        target: Ground truth tensor
+        num_classes: integer with number of classes for multi-label and multiclass problems.
+            Should be set to ``None`` for binary problems
+        sample_weights: sample weights for each data point
+    """
+
+    # Recursively call per class
+    precision, recall, thresholds = [], [], []
+    for cls in range(num_classes):
+        preds_cls = preds[:, cls]
+
+        prc_args = dict(
+            preds=preds_cls,
+            target=target,
+            num_classes=1,
+            pos_label=cls,
+            sample_weights=sample_weights,
+        )
+        if target.ndim > 1:
+            prc_args.update(
+                dict(
+                    target=target[:, cls],
+                    pos_label=1,
+                )
+            )
+        res = precision_recall_curve(**prc_args)
+        precision.append(res[0])
+        recall.append(res[1])
+        thresholds.append(res[2])
+
+    return precision, recall, thresholds
+
+
+def _precision_recall_curve_compute(
+    preds: Tensor,
+    target: Tensor,
+    num_classes: int,
+    pos_label: Optional[int] = None,
+    sample_weights: Optional[Sequence] = None,
+) -> Union[Tuple[Tensor, Tensor, Tensor], Tuple[List[Tensor], List[Tensor], List[Tensor]]]:
+    """Computes precision-recall pairs based on the number of classes.
+
+    Args:
+        preds: Predicted tensor
+        target: Ground truth tensor
+        num_classes: integer with number of classes for multi-label and multiclass problems.
+            Should be set to ``None`` for binary problems
+        pos_label: integer determining the positive class. Default is ``None``
+            which for binary problem is translate to 1. For multiclass problems
+            this argument should not be set as we iteratively change it in the
+            range [0,num_classes-1]
+        sample_weights: sample weights for each data point
+
+    Example:
+        >>> # binary case
+        >>> preds = B.tensor([0, 1, 2, 3])
+        >>> target = B.tensor([0, 1, 1, 0])
+        >>> pos_label = 1
+        >>> preds, target, num_classes, pos_label = _precision_recall_curve_update(preds, target, pos_label=pos_label)
+        >>> precision, recall, thresholds = _precision_recall_curve_compute(preds, target, num_classes, pos_label)
+        >>> precision
+        tensor([0.6667, 0.5000, 0.0000, 1.0000])
+        >>> recall
+        tensor([1.0000, 0.5000, 0.0000, 0.0000])
+        >>> thresholds
+        tensor([1, 2, 3])
+
+        >>> # multiclass case
+        >>> preds = B.tensor([[0.75, 0.05, 0.05, 0.05, 0.05],
+        ...                      [0.05, 0.75, 0.05, 0.05, 0.05],
+        ...                      [0.05, 0.05, 0.75, 0.05, 0.05],
+        ...                      [0.05, 0.05, 0.05, 0.75, 0.05]])
+        >>> target = B.tensor([0, 1, 3, 2])
+        >>> num_classes = 5
+        >>> preds, target, num_classes, pos_label = _precision_recall_curve_update(preds, target, num_classes)
+        >>> precision, recall, thresholds = _precision_recall_curve_compute(preds, target, num_classes)
+        >>> precision   # doctest: +NORMALIZE_WHITESPACE
+        [tensor([1., 1.]), tensor([1., 1.]), tensor([0.2500, 0.0000, 1.0000]),
+         tensor([0.2500, 0.0000, 1.0000]), tensor([0., 1.])]
+        >>> recall
+        [tensor([1., 0.]), tensor([1., 0.]), tensor([1., 0., 0.]), tensor([1., 0., 0.]), tensor([nan, 0.])]
+        >>> thresholds
+        [tensor([0.7500]), tensor([0.7500]), tensor([0.0500, 0.7500]), tensor([0.0500, 0.7500]), tensor([0.0500])]
+    """
+
+    with B.no_grad():
+        if num_classes == 1:
+            if pos_label is None:
+                pos_label = 1
+            return _precision_recall_curve_compute_single_class(preds, target, pos_label, sample_weights)
+        return _precision_recall_curve_compute_multi_class(preds, target, num_classes, sample_weights)
+
+
+def precision_recall_curve(
+    preds: Tensor,
+    target: Tensor,
+    num_classes: Optional[int] = None,
+    pos_label: Optional[int] = None,
+    sample_weights: Optional[Sequence] = None,
+) -> Union[Tuple[Tensor, Tensor, Tensor], Tuple[List[Tensor], List[Tensor], List[Tensor]]]:
+    """Computes precision-recall pairs for different thresholds.
+
+    Args:
+        preds: predictions from model (probabilities)
+        target: ground truth labels
+        num_classes: integer with number of classes for multi-label and multiclass problems.
+            Should be set to ``None`` for binary problems
+        pos_label: integer determining the positive class. Default is ``None``
+            which for binary problem is translate to 1. For multiclass problems
+            this argument should not be set as we iteratively change it in the
+            range [0,num_classes-1]
+        sample_weights: sample weights for each data point
+
+    Returns:
+        3-element tuple containing
+
+        precision:
+            tensor where element i is the precision of predictions with
+            score >= thresholds[i] and the last element is 1.
+            If multiclass, this is a list of such tensors, one for each class.
+        recall:
+            tensor where element i is the recall of predictions with
+            score >= thresholds[i] and the last element is 0.
+            If multiclass, this is a list of such tensors, one for each class.
+        thresholds:
+            Thresholds used for computing precision/recall scores
+
+    Raises:
+        ValueError:
+            If ``preds`` and ``target`` don't have the same number of dimensions,
+            or one additional dimension for ``preds``.
+        ValueError:
+            If the number of classes deduced from ``preds`` is not the same as the
+            ``num_classes`` provided.
+
+    Example (binary case):
+        >>> from paddlemetrics.functional import precision_recall_curve
+        >>> pred = B.tensor([0, 1, 2, 3])
+        >>> target = B.tensor([0, 1, 1, 0])
+        >>> precision, recall, thresholds = precision_recall_curve(pred, target, pos_label=1)
+        >>> precision
+        tensor([0.6667, 0.5000, 0.0000, 1.0000])
+        >>> recall
+        tensor([1.0000, 0.5000, 0.0000, 0.0000])
+        >>> thresholds
+        tensor([1, 2, 3])
+
+    Example (multiclass case):
+        >>> pred = B.tensor([[0.75, 0.05, 0.05, 0.05, 0.05],
+        ...                      [0.05, 0.75, 0.05, 0.05, 0.05],
+        ...                      [0.05, 0.05, 0.75, 0.05, 0.05],
+        ...                      [0.05, 0.05, 0.05, 0.75, 0.05]])
+        >>> target = B.tensor([0, 1, 3, 2])
+        >>> precision, recall, thresholds = precision_recall_curve(pred, target, num_classes=5)
+        >>> precision   # doctest: +NORMALIZE_WHITESPACE
+        [tensor([1., 1.]), tensor([1., 1.]), tensor([0.2500, 0.0000, 1.0000]),
+         tensor([0.2500, 0.0000, 1.0000]), tensor([0., 1.])]
+        >>> recall
+        [tensor([1., 0.]), tensor([1., 0.]), tensor([1., 0., 0.]), tensor([1., 0., 0.]), tensor([nan, 0.])]
+        >>> thresholds
+        [tensor([0.7500]), tensor([0.7500]), tensor([0.0500, 0.7500]), tensor([0.0500, 0.7500]), tensor([0.0500])]
+    """
+    preds, target, num_classes, pos_label = _precision_recall_curve_update(preds, target, num_classes, pos_label)
+    return _precision_recall_curve_compute(preds, target, num_classes, pos_label, sample_weights)
diff --git a/RE/paddlemetric/src/paddlemetrics/functional/classification/roc.py b/RE/paddlemetric/src/paddlemetrics/functional/classification/roc.py
new file mode 100644
index 00000000..86f4e2a4
--- /dev/null
+++ b/RE/paddlemetric/src/paddlemetrics/functional/classification/roc.py
@@ -0,0 +1,273 @@
+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from typing import List, Optional, Sequence, Tuple, Union
+
+import paddleext.torchapi as B
+from paddleext.torchapi import  Tensor
+
+from paddlemetrics.functional.classification.precision_recall_curve import (
+    _binary_clf_curve,
+    _precision_recall_curve_update,
+)
+
+
+def _roc_update(
+    preds: Tensor,
+    target: Tensor,
+    num_classes: Optional[int] = None,
+    pos_label: Optional[int] = None,
+) -> Tuple[Tensor, Tensor, int, Optional[int]]:
+    """Updates and returns variables required to compute the Receiver Operating Characteristic.
+
+    Args:
+        preds: Predicted tensor
+        target: Ground truth tensor
+        num_classes: integer with number of classes for multi-label and multiclass problems.
+            Should be set to ``None`` for binary problems
+        pos_label: integer determining the positive class. Default is ``None``
+            which for binary problem is translate to 1. For multiclass problems
+            this argument should not be set as we iteratively change it in the
+            range [0,num_classes-1]
+    """
+
+    return _precision_recall_curve_update(preds, target, num_classes, pos_label)
+
+
+def _roc_compute_single_class(
+    preds: Tensor,
+    target: Tensor,
+    pos_label: int,
+    sample_weights: Optional[Sequence] = None,
+) -> Tuple[Tensor, Tensor, Tensor]:
+    """Computes Receiver Operating Characteristic for single class inputs. Returns tensor with false positive
+    rates, tensor with true positive rates, tensor with thresholds used for computing false- and true postive
+    rates.
+
+    Args:
+        preds: Predicted tensor
+        target: Ground truth tensor
+        pos_label: integer determining the positive class. Default is ``None``
+            which for binary problem is translate to 1. For multiclass problems
+            this argument should not be set as we iteratively change it in the
+            range [0,num_classes-1]
+        sample_weights: sample weights for each data point
+    """
+
+    fps, tps, thresholds = _binary_clf_curve(
+        preds=preds, target=target, sample_weights=sample_weights, pos_label=pos_label
+    )
+    # Add an extra threshold position to make sure that the curve starts at (0, 0)
+    tps = B.cat([B.zeros(1, dtype=tps.dtype, device=tps.device), tps])
+    fps = B.cat([B.zeros(1, dtype=fps.dtype, device=fps.device), fps])
+    thresholds = B.cat([thresholds[0][None] + 1, thresholds])
+
+    if fps[-1] <= 0:
+        raise ValueError("No negative samples in targets, false positive value should be meaningless")
+    fpr = fps / fps[-1]
+
+    if tps[-1] <= 0:
+        raise ValueError("No positive samples in targets, true positive value should be meaningless")
+    tpr = tps / tps[-1]
+
+    return fpr, tpr, thresholds
+
+
+def _roc_compute_multi_class(
+    preds: Tensor,
+    target: Tensor,
+    num_classes: int,
+    sample_weights: Optional[Sequence] = None,
+) -> Tuple[List[Tensor], List[Tensor], List[Tensor]]:
+    """Computes Receiver Operating Characteristic for multi class inputs. Returns tensor with false positive rates,
+    tensor with true positive rates, tensor with thresholds used for computing false- and true postive rates.
+
+    Args:
+        preds: Predicted tensor
+        target: Ground truth tensor
+        pos_label: integer determining the positive class. Default is ``None``
+            which for binary problem is translate to 1. For multiclass problems
+            this argument should not be set as we iteratively change it in the
+            range [0,num_classes-1]
+        sample_weights: sample weights for each data point
+    """
+
+    fpr, tpr, thresholds = [], [], []
+    for cls in range(num_classes):
+        if preds.shape == target.shape:
+            target_cls = target[:, cls]
+            pos_label = 1
+        else:
+            target_cls = target
+            pos_label = cls
+        res = roc(
+            preds=preds[:, cls],
+            target=target_cls,
+            num_classes=1,
+            pos_label=pos_label,
+            sample_weights=sample_weights,
+        )
+        fpr.append(res[0])
+        tpr.append(res[1])
+        thresholds.append(res[2])
+
+    return fpr, tpr, thresholds
+
+
+def _roc_compute(
+    preds: Tensor,
+    target: Tensor,
+    num_classes: int,
+    pos_label: Optional[int] = None,
+    sample_weights: Optional[Sequence] = None,
+) -> Union[Tuple[Tensor, Tensor, Tensor], Tuple[List[Tensor], List[Tensor], List[Tensor]]]:
+    """Computes Receiver Operating Characteristic based on the number of classes.
+
+    Args:
+        preds: Predicted tensor
+        target: Ground truth tensor
+        num_classes: integer with number of classes for multi-label and multiclass problems.
+            Should be set to ``None`` for binary problems
+        pos_label: integer determining the positive class. Default is ``None``
+            which for binary problem is translate to 1. For multiclass problems
+            this argument should not be set as we iteratively change it in the
+            range [0,num_classes-1]
+        sample_weights: sample weights for each data point
+
+    Example:
+        >>> # binary case
+        >>> preds = B.tensor([0, 1, 2, 3])
+        >>> target = B.tensor([0, 1, 1, 1])
+        >>> pos_label = 1
+        >>> preds, target, num_classes, pos_label = _roc_update(preds, target, pos_label=pos_label)
+        >>> fpr, tpr, thresholds = _roc_compute(preds, target, num_classes, pos_label)
+        >>> fpr
+        tensor([0., 0., 0., 0., 1.])
+        >>> tpr
+        tensor([0.0000, 0.3333, 0.6667, 1.0000, 1.0000])
+        >>> thresholds
+        tensor([4, 3, 2, 1, 0])
+
+        >>> # multiclass case
+        >>> preds = B.tensor([[0.75, 0.05, 0.05, 0.05],
+        ...                      [0.05, 0.75, 0.05, 0.05],
+        ...                      [0.05, 0.05, 0.75, 0.05],
+        ...                      [0.05, 0.05, 0.05, 0.75]])
+        >>> target = B.tensor([0, 1, 3, 2])
+        >>> num_classes = 4
+        >>> preds, target, num_classes, pos_label = _roc_update(preds, target, num_classes)
+        >>> fpr, tpr, thresholds = _roc_compute(preds, target, num_classes)
+        >>> fpr
+        [tensor([0., 0., 1.]), tensor([0., 0., 1.]), tensor([0.0000, 0.3333, 1.0000]), tensor([0.0000, 0.3333, 1.0000])]
+        >>> tpr
+        [tensor([0., 1., 1.]), tensor([0., 1., 1.]), tensor([0., 0., 1.]), tensor([0., 0., 1.])]
+        >>> thresholds # doctest: +NORMALIZE_WHITESPACE
+        [tensor([1.7500, 0.7500, 0.0500]),
+         tensor([1.7500, 0.7500, 0.0500]),
+         tensor([1.7500, 0.7500, 0.0500]),
+         tensor([1.7500, 0.7500, 0.0500])]
+    """
+
+    with B.no_grad():
+        if num_classes == 1 and preds.ndim == 1:  # binary
+            if pos_label is None:
+                pos_label = 1
+            return _roc_compute_single_class(preds, target, pos_label, sample_weights)
+        return _roc_compute_multi_class(preds, target, num_classes, sample_weights)
+
+
+def roc(
+    preds: Tensor,
+    target: Tensor,
+    num_classes: Optional[int] = None,
+    pos_label: Optional[int] = None,
+    sample_weights: Optional[Sequence] = None,
+) -> Union[Tuple[Tensor, Tensor, Tensor], Tuple[List[Tensor], List[Tensor], List[Tensor]]]:
+    """Computes the Receiver Operating Characteristic (ROC). Works with both binary, multiclass and multilabel
+    input.
+
+    Args:
+        preds: predictions from model (logits or probabilities)
+        target: ground truth values
+        num_classes: integer with number of classes for multi-label and multiclass problems.
+            Should be set to ``None`` for binary problems
+        pos_label: integer determining the positive class. Default is ``None``
+            which for binary problem is translate to 1. For multiclass problems
+            this argument should not be set as we iteratively change it in the
+            range [0,num_classes-1]
+        sample_weights: sample weights for each data point
+
+    Returns:
+        3-element tuple containing
+
+        fpr:
+            tensor with false positive rates.
+            If multiclass or multilabel, this is a list of such tensors, one for each class/label.
+        tpr:
+            tensor with true positive rates.
+            If multiclass or multilabel, this is a list of such tensors, one for each class/label.
+        thresholds:
+            tensor with thresholds used for computing false- and true postive rates
+            If multiclass or multilabel, this is a list of such tensors, one for each class/label.
+
+    Example (binary case):
+        >>> from paddlemetrics.functional import roc
+        >>> pred = B.tensor([0, 1, 2, 3])
+        >>> target = B.tensor([0, 1, 1, 1])
+        >>> fpr, tpr, thresholds = roc(pred, target, pos_label=1)
+        >>> fpr
+        tensor([0., 0., 0., 0., 1.])
+        >>> tpr
+        tensor([0.0000, 0.3333, 0.6667, 1.0000, 1.0000])
+        >>> thresholds
+        tensor([4, 3, 2, 1, 0])
+
+    Example (multiclass case):
+        >>> from paddlemetrics.functional import roc
+        >>> pred = B.tensor([[0.75, 0.05, 0.05, 0.05],
+        ...                      [0.05, 0.75, 0.05, 0.05],
+        ...                      [0.05, 0.05, 0.75, 0.05],
+        ...                      [0.05, 0.05, 0.05, 0.75]])
+        >>> target = B.tensor([0, 1, 3, 2])
+        >>> fpr, tpr, thresholds = roc(pred, target, num_classes=4)
+        >>> fpr
+        [tensor([0., 0., 1.]), tensor([0., 0., 1.]), tensor([0.0000, 0.3333, 1.0000]), tensor([0.0000, 0.3333, 1.0000])]
+        >>> tpr
+        [tensor([0., 1., 1.]), tensor([0., 1., 1.]), tensor([0., 0., 1.]), tensor([0., 0., 1.])]
+        >>> thresholds # doctest: +NORMALIZE_WHITESPACE
+        [tensor([1.7500, 0.7500, 0.0500]),
+         tensor([1.7500, 0.7500, 0.0500]),
+         tensor([1.7500, 0.7500, 0.0500]),
+         tensor([1.7500, 0.7500, 0.0500])]
+
+    Example (multilabel case):
+        >>> from paddlemetrics.functional import roc
+        >>> pred = B.tensor([[0.8191, 0.3680, 0.1138],
+        ...                      [0.3584, 0.7576, 0.1183],
+        ...                      [0.2286, 0.3468, 0.1338],
+        ...                      [0.8603, 0.0745, 0.1837]])
+        >>> target = B.tensor([[1, 1, 0], [0, 1, 0], [0, 0, 0], [0, 1, 1]])
+        >>> fpr, tpr, thresholds = roc(pred, target, num_classes=3, pos_label=1)
+        >>> fpr # doctest: +NORMALIZE_WHITESPACE
+        [tensor([0.0000, 0.3333, 0.3333, 0.6667, 1.0000]),
+         tensor([0., 0., 0., 1., 1.]),
+         tensor([0.0000, 0.0000, 0.3333, 0.6667, 1.0000])]
+        >>> tpr
+        [tensor([0., 0., 1., 1., 1.]), tensor([0.0000, 0.3333, 0.6667, 0.6667, 1.0000]), tensor([0., 1., 1., 1., 1.])]
+        >>> thresholds # doctest: +NORMALIZE_WHITESPACE
+        [tensor([1.8603, 0.8603, 0.8191, 0.3584, 0.2286]),
+         tensor([1.7576, 0.7576, 0.3680, 0.3468, 0.0745]),
+         tensor([1.1837, 0.1837, 0.1338, 0.1183, 0.1138])]
+    """
+    preds, target, num_classes, pos_label = _roc_update(preds, target, num_classes, pos_label)
+    return _roc_compute(preds, target, num_classes, pos_label, sample_weights)
diff --git a/RE/paddlemetric/src/paddlemetrics/functional/classification/specificity.py b/RE/paddlemetric/src/paddlemetrics/functional/classification/specificity.py
new file mode 100644
index 00000000..be87dce7
--- /dev/null
+++ b/RE/paddlemetric/src/paddlemetrics/functional/classification/specificity.py
@@ -0,0 +1,215 @@
+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from typing import Optional
+
+import paddleext.torchapi as B
+from paddleext.torchapi import  Tensor
+
+from paddlemetrics.functional.classification.stat_scores import _reduce_stat_scores, _stat_scores_update
+from paddlemetrics.utilities.enums import AverageMethod, MDMCAverageMethod
+
+
+def _specificity_compute(
+    tp: Tensor,
+    fp: Tensor,
+    tn: Tensor,
+    fn: Tensor,
+    average: str,
+    mdmc_average: Optional[str],
+) -> Tensor:
+    """Computes specificity from the stat scores: true positives, false positives, true negatives, false negatives.
+
+    Args:
+        tp: True positives
+        fp: False positives
+        tn: True negatives
+        fn: False negatives
+        average: Defines the reduction that is applied
+        mdmc_average: Defines how averaging is done for multi-dimensional multi-class inputs (on top of the
+            ``average`` parameter)
+
+    Example:
+        >>> from paddlemetrics.functional.classification.stat_scores import _stat_scores_update
+        >>> preds  = B.tensor([2, 0, 2, 1])
+        >>> target = B.tensor([1, 1, 2, 0])
+        >>> tp, fp, tn, fn = _stat_scores_update(preds, target, reduce='macro', num_classes=3)
+        >>> _specificity_compute(tp, fp, tn, fn, average='macro', mdmc_average=None)
+        tensor(0.6111)
+        >>> tp, fp, tn, fn = _stat_scores_update(preds, target, reduce='micro')
+        >>> _specificity_compute(tp, fp, tn, fn, average='micro', mdmc_average=None)
+        tensor(0.6250)
+    """
+
+    numerator = tn
+    denominator = tn + fp
+    if average == AverageMethod.NONE and mdmc_average != MDMCAverageMethod.SAMPLEWISE:
+        # a class is not present if there exists no TPs, no FPs, and no FNs
+        meaningless_indeces = B.nonzero((tp | fn | fp) == 0).cpu()
+        numerator[meaningless_indeces, ...] = -1
+        denominator[meaningless_indeces, ...] = -1
+    return _reduce_stat_scores(
+        numerator=numerator,
+        denominator=denominator,
+        weights=None if average != AverageMethod.WEIGHTED else denominator,
+        average=average,
+        mdmc_average=mdmc_average,
+    )
+
+
+def specificity(
+    preds: Tensor,
+    target: Tensor,
+    average: str = "micro",
+    mdmc_average: Optional[str] = None,
+    ignore_index: Optional[int] = None,
+    num_classes: Optional[int] = None,
+    threshold: float = 0.5,
+    top_k: Optional[int] = None,
+    multiclass: Optional[bool] = None,
+) -> Tensor:
+    r"""
+    Computes `Specificity`_
+
+    .. math:: \text{Specificity} = \frac{\text{TN}}{\text{TN} + \text{FP}}
+
+    Where :math:`\text{TN}` and :math:`\text{FP}` represent the number of true negatives and
+    false positives respecitively. With the use of ``top_k`` parameter, this metric can
+    generalize to Specificity@K.
+
+    The reduction method (how the specificity scores are aggregated) is controlled by the
+    ``average`` parameter, and additionally by the ``mdmc_average`` parameter in the
+    multi-dimensional multi-class case. Accepts all inputs listed in :ref:`references/modules:input types`.
+
+    Args:
+        preds: Predictions from model (probabilities, or labels)
+        target: Ground truth values
+        average:
+            Defines the reduction that is applied. Should be one of the following:
+
+            - ``'micro'`` [default]: Calculate the metric globally, across all samples and classes.
+            - ``'macro'``: Calculate the metric for each class separately, and average the
+              metrics across classes (with equal weights for each class).
+            - ``'weighted'``: Calculate the metric for each class separately, and average the
+              metrics across classes, weighting each class by its support (``tn + fp``).
+            - ``'none'`` or ``None``: Calculate the metric for each class separately, and return
+              the metric for every class.
+            - ``'samples'``: Calculate the metric for each sample, and average the metrics
+              across samples (with equal weights for each sample).
+
+            .. note:: What is considered a sample in the multi-dimensional multi-class case
+                depends on the value of ``mdmc_average``.
+
+            .. note:: If ``'none'`` and a given class doesn't occur in the `preds` or `target`,
+                the value for the class will be ``nan``.
+
+        mdmc_average:
+            Defines how averaging is done for multi-dimensional multi-class inputs (on top of the
+            ``average`` parameter). Should be one of the following:
+
+            - ``None`` [default]: Should be left unchanged if your data is not multi-dimensional
+              multi-class.
+
+            - ``'samplewise'``: In this case, the statistics are computed separately for each
+              sample on the ``N`` axis, and then averaged over samples.
+              The computation for each sample is done by treating the flattened extra axes ``...``
+              (see :ref:`references/modules:input types`) as the ``N`` dimension within the sample,
+              and computing the metric for the sample based on that.
+
+            - ``'global'``: In this case the ``N`` and ``...`` dimensions of the inputs
+              (see :ref:`references/modules:input types`)
+              are flattened into a new ``N_X`` sample axis, i.e. the inputs are treated as if they
+              were ``(N_X, C)``. From here on the ``average`` parameter applies as usual.
+
+        ignore_index:
+            Integer specifying a target class to ignore. If given, this class index does not contribute
+            to the returned score, regardless of reduction method. If an index is ignored, and ``average=None``
+            or ``'none'``, the score for the ignored class will be returned as ``nan``.
+
+        num_classes:
+            Number of classes. Necessary for ``'macro'``, ``'weighted'`` and ``None`` average methods.
+
+        threshold:
+            Threshold probability value for transforming probability predictions to binary
+            (0,1) predictions, in the case of binary or multi-label inputs
+        top_k:
+            Number of highest probability entries for each sample to convert to 1s - relevant
+            only for inputs with probability predictions. If this parameter is set for multi-label
+            inputs, it will take precedence over ``threshold``. For (multi-dim) multi-class inputs,
+            this parameter defaults to 1.
+
+            Should be left unset (``None``) for inputs with label predictions.
+        multiclass:
+            Used only in certain special cases, where you want to treat inputs as a different type
+            than what they appear to be. See the parameter's
+            :ref:`documentation section <references/modules:using the multiclass parameter>`
+            for a more detailed explanation and examples.
+
+    Return:
+        The shape of the returned tensor depends on the ``average`` parameter
+
+        - If ``average in ['micro', 'macro', 'weighted', 'samples']``, a one-element tensor will be returned
+        - If ``average in ['none', None]``, the shape will be ``(C,)``, where ``C`` stands  for the number
+          of classes
+
+    Raises:
+        ValueError:
+            If ``average`` is not one of ``"micro"``, ``"macro"``, ``"weighted"``,
+            ``"samples"``, ``"none"`` or ``None``.
+        ValueError:
+            If ``mdmc_average`` is not one of ``None``, ``"samplewise"``, ``"global"``.
+        ValueError:
+            If ``average`` is set but ``num_classes`` is not provided.
+        ValueError:
+            If ``num_classes`` is set
+            and ``ignore_index`` is not in the range ``[0, num_classes)``.
+
+    Example:
+        >>> from paddlemetrics.functional import specificity
+        >>> preds  = B.tensor([2, 0, 2, 1])
+        >>> target = B.tensor([1, 1, 2, 0])
+        >>> specificity(preds, target, average='macro', num_classes=3)
+        tensor(0.6111)
+        >>> specificity(preds, target, average='micro')
+        tensor(0.6250)
+
+    """
+
+    allowed_average = ["micro", "macro", "weighted", "samples", "none", None]
+    if average not in allowed_average:
+        raise ValueError(f"The `average` has to be one of {allowed_average}, got {average}.")
+
+    allowed_mdmc_average = [None, "samplewise", "global"]
+    if mdmc_average not in allowed_mdmc_average:
+        raise ValueError("The `mdmc_average` has to be one of {allowed_mdmc_average}, got {mdmc_average}.")
+
+    if average in ["macro", "weighted", "none", None] and (not num_classes or num_classes < 1):
+        raise ValueError(f"When you set `average` as {average}, you have to provide the number of classes.")
+
+    if num_classes and ignore_index is not None and (not 0 <= ignore_index < num_classes or num_classes == 1):
+        raise ValueError(f"The `ignore_index` {ignore_index} is not valid for inputs with {num_classes} classes")
+
+    reduce = "macro" if average in ["weighted", "none", None] else average
+    tp, fp, tn, fn = _stat_scores_update(
+        preds,
+        target,
+        reduce=reduce,
+        mdmc_reduce=mdmc_average,
+        threshold=threshold,
+        num_classes=num_classes,
+        top_k=top_k,
+        multiclass=multiclass,
+        ignore_index=ignore_index,
+    )
+
+    return _specificity_compute(tp, fp, tn, fn, average, mdmc_average)
diff --git a/RE/paddlemetric/src/paddlemetrics/functional/classification/stat_scores.py b/RE/paddlemetric/src/paddlemetrics/functional/classification/stat_scores.py
new file mode 100644
index 00000000..33e1cafd
--- /dev/null
+++ b/RE/paddlemetric/src/paddlemetrics/functional/classification/stat_scores.py
@@ -0,0 +1,396 @@
+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from typing import List, Optional, Tuple, Union
+
+import paddleext.torchapi as B
+from paddleext.torchapi import  Tensor, tensor
+
+from paddlemetrics.utilities.checks import _input_format_classification
+from paddlemetrics.utilities.enums import AverageMethod, MDMCAverageMethod
+
+
+def _del_column(data: Tensor, idx: int) -> Tensor:
+    """Delete the column at index."""
+    return B.cat([data[:, :idx], data[:, (idx + 1) :]], 1)
+
+
+def _stat_scores(
+    preds: Tensor,
+    target: Tensor,
+    reduce: Optional[str] = "micro",
+) -> Tuple[Tensor, Tensor, Tensor, Tensor]:
+    """Calculate the number of tp, fp, tn, fn.
+
+    Args:
+        preds:
+            An ``(N, C)`` or ``(N, C, X)`` tensor of predictions (0 or 1)
+        target:
+            An ``(N, C)`` or ``(N, C, X)`` tensor of true labels (0 or 1)
+        reduce:
+            One of ``'micro'``, ``'macro'``, ``'samples'``
+
+    Return:
+        Returns a list of 4 tensors; tp, fp, tn, fn.
+        The shape of the returned tensors depnds on the shape of the inputs
+        and the ``reduce`` parameter:
+
+        If inputs are of the shape ``(N, C)``, then
+        - If ``reduce='micro'``, the returned tensors are 1 element tensors
+        - If ``reduce='macro'``, the returned tensors are ``(C,)`` tensors
+        - If ``reduce'samples'``, the returned tensors are ``(N,)`` tensors
+
+        If inputs are of the shape ``(N, C, X)``, then
+        - If ``reduce='micro'``, the returned tensors are ``(N,)`` tensors
+        - If ``reduce='macro'``, the returned tensors are ``(N,C)`` tensors
+        - If ``reduce='samples'``, the returned tensors are ``(N,X)`` tensors
+    """
+    dim: Union[int, List[int]] = 1  # for "samples"
+    if reduce == "micro":
+        dim = [0, 1] if preds.ndim == 2 else [1, 2]
+    elif reduce == "macro":
+        dim = 0 if preds.ndim == 2 else 2
+
+    true_pred, false_pred = target == preds, target != preds
+    pos_pred, neg_pred = preds == 1, preds == 0
+
+    tp = (true_pred * pos_pred).sum(dim=dim)
+    fp = (false_pred * pos_pred).sum(dim=dim)
+
+    tn = (true_pred * neg_pred).sum(dim=dim)
+    fn = (false_pred * neg_pred).sum(dim=dim)
+    return tp.long(), fp.long(), tn.long(), fn.long()
+
+
+def _stat_scores_update(
+    preds: Tensor,
+    target: Tensor,
+    reduce: Optional[str] = "micro",
+    mdmc_reduce: Optional[str] = None,
+    num_classes: Optional[int] = None,
+    top_k: Optional[int] = None,
+    threshold: float = 0.5,
+    multiclass: Optional[bool] = None,
+    ignore_index: Optional[int] = None,
+) -> Tuple[Tensor, Tensor, Tensor, Tensor]:
+    """Updates and returns the the number of true positives, false positives, true negatives, false negatives.
+    Raises ValueError if:
+
+        - The `ignore_index` is not valid
+        - When `ignore_index` is used with binary data
+        - When inputs are multi-dimensional multi-class, and the `mdmc_reduce` parameter is not set
+
+    Args:
+        preds: Predicted tensor
+        target: Ground truth tensor
+        reduce: Defines the reduction that is applied
+        mdmc_reduce: Defines how the multi-dimensional multi-class inputs are handeled
+        num_classes: Number of classes. Necessary for (multi-dimensional) multi-class or multi-label data.
+        top_k: Number of highest probability or logit score predictions considered to find the correct label,
+            relevant only for (multi-dimensional) multi-class inputs
+        threshold: Threshold for transforming probability or logit predictions to binary (0,1) predictions, in the case
+            of binary or multi-label inputs. Default value of 0.5 corresponds to input being probabilities
+        multiclass: Used only in certain special cases, where you want to treat inputs as a different type
+            than what they appear to be
+        ignore_index: Specify a class (label) to ignore. If given, this class index does not contribute
+            to the returned score, regardless of reduction method. If an index is ignored, and
+            ``reduce='macro'``, the class statistics for the ignored class will all be returned
+            as ``-1``.
+    """
+
+    preds, target, _ = _input_format_classification(
+        preds, target, threshold=threshold, num_classes=num_classes, multiclass=multiclass, top_k=top_k
+    )
+
+    if ignore_index is not None and not 0 <= ignore_index < preds.shape[1]:
+        raise ValueError(f"The `ignore_index` {ignore_index} is not valid for inputs with {preds.shape[0]} classes")
+
+    if ignore_index is not None and preds.shape[1] == 1:
+        raise ValueError("You can not use `ignore_index` with binary data.")
+
+    if preds.ndim == 3:
+        if not mdmc_reduce:
+            raise ValueError(
+                "When your inputs are multi-dimensional multi-class, you have to set the `mdmc_reduce` parameter"
+            )
+        if mdmc_reduce == "global":
+            preds = B.transpose(preds, 1, 2).reshape(-1, preds.shape[1])
+            target = B.transpose(target, 1, 2).reshape(-1, target.shape[1])
+
+    # Delete what is in ignore_index, if applicable (and classes don't matter):
+    if ignore_index is not None and reduce != "macro":
+        preds = _del_column(preds, ignore_index)
+        target = _del_column(target, ignore_index)
+
+    tp, fp, tn, fn = _stat_scores(preds, target, reduce=reduce)
+
+    # Take care of ignore_index
+    if ignore_index is not None and reduce == "macro":
+        tp[..., ignore_index] = -1
+        fp[..., ignore_index] = -1
+        tn[..., ignore_index] = -1
+        fn[..., ignore_index] = -1
+
+    return tp, fp, tn, fn
+
+
+def _stat_scores_compute(tp: Tensor, fp: Tensor, tn: Tensor, fn: Tensor) -> Tensor:
+    """Computes the number of true positives, false positives, true negatives, false negatives. Concatenates the
+    input tensors along with the support into one output.
+
+    Args:
+        tp: True positives
+        fp: False positives
+        tn: True negatives
+        fn: False negatives
+
+    Example:
+        >>> preds  = B.tensor([1, 0, 2, 1])
+        >>> target = B.tensor([1, 1, 2, 0])
+        >>> tp, fp, tn, fn = _stat_scores_update(preds, target, reduce='macro', num_classes=3)
+        >>> _stat_scores_compute(tp, fp, tn, fn)
+        tensor([[0, 1, 2, 1, 1],
+                [1, 1, 1, 1, 2],
+                [1, 0, 3, 0, 1]])
+        >>> tp, fp, tn, fn = _stat_scores_update(preds, target, reduce='micro')
+        >>> _stat_scores_compute(tp, fp, tn, fn)
+        tensor([2, 2, 6, 2, 4])
+    """
+    stats = [
+        tp.unsqueeze(-1),
+        fp.unsqueeze(-1),
+        tn.unsqueeze(-1),
+        fn.unsqueeze(-1),
+        tp.unsqueeze(-1) + fn.unsqueeze(-1),  # support
+    ]
+    outputs: Tensor = B.cat(stats, -1)
+    outputs = B.where(outputs < 0, tensor(-1, device=outputs.device, dtype=outputs.dtype), outputs)
+
+    return outputs
+
+
+def _reduce_stat_scores(
+    numerator: Tensor,
+    denominator: Tensor,
+    weights: Optional[Tensor],
+    average: Optional[str],
+    mdmc_average: Optional[str],
+    zero_division: int = 0,
+) -> Tensor:
+    """Reduces scores of type ``numerator/denominator`` or.
+
+    ``weights * (numerator/denominator)``, if ``average='weighted'``.
+
+    Args:
+        numerator: A tensor with numerator numbers.
+        denominator: A tensor with denominator numbers. If a denominator is
+            negative, the class will be ignored (if averaging), or its score
+            will be returned as ``nan`` (if ``average=None``).
+            If the denominator is zero, then ``zero_division`` score will be
+            used for those elements.
+        weights: A tensor of weights to be used if ``average='weighted'``.
+        average: The method to average the scores
+        mdmc_average: The method to average the scores if inputs were multi-dimensional multi-class (MDMC)
+        zero_division: The value to use for the score if denominator equals zero.
+    """
+    numerator, denominator = numerator.float(), denominator.float()
+    zero_div_mask = denominator == 0
+    ignore_mask = denominator < 0
+
+    if weights is None:
+        weights = B.ones_like(denominator)
+    else:
+        weights = weights.float()
+
+    numerator = B.where(zero_div_mask, tensor(float(zero_division), device=numerator.device), numerator)
+    denominator = B.where(zero_div_mask | ignore_mask, tensor(1.0, device=denominator.device), denominator)
+    weights = B.where(ignore_mask, tensor(0.0, device=weights.device), weights)
+
+    if average not in (AverageMethod.MICRO, AverageMethod.NONE, None):
+        weights = weights / weights.sum(dim=-1, keepdim=True)
+
+    scores = weights * (numerator / denominator)
+
+    # This is in case where sum(weights) = 0, which happens if we ignore the only present class with average='weighted'
+    scores = B.where(B.isnan(scores), tensor(float(zero_division), device=scores.device), scores)
+
+    if mdmc_average == MDMCAverageMethod.SAMPLEWISE:
+        scores = scores.mean(dim=0)
+        ignore_mask = ignore_mask.sum(dim=0).bool()
+
+    if average in (AverageMethod.NONE, None):
+        scores = B.where(ignore_mask, tensor(float("nan"), device=scores.device), scores)
+    else:
+        scores = scores.sum()
+
+    return scores
+
+
+def stat_scores(
+    preds: Tensor,
+    target: Tensor,
+    reduce: str = "micro",
+    mdmc_reduce: Optional[str] = None,
+    num_classes: Optional[int] = None,
+    top_k: Optional[int] = None,
+    threshold: float = 0.5,
+    multiclass: Optional[bool] = None,
+    ignore_index: Optional[int] = None,
+) -> Tensor:
+    r"""Computes the number of true positives, false positives, true negatives, false negatives.
+    Related to `Type I and Type II errors`_
+    and the `confusion matrix`_.
+
+    The reduction method (how the statistics are aggregated) is controlled by the
+    ``reduce`` parameter, and additionally by the ``mdmc_reduce`` parameter in the
+    multi-dimensional multi-class case. Accepts all inputs listed in :ref:`references/modules:input types`.
+
+    Args:
+        preds: Predictions from model (probabilities, logits or labels)
+        target: Ground truth values
+        threshold:
+            Threshold for transforming probability or logit predictions to binary (0,1) predictions, in the case
+            of binary or multi-label inputs. Default value of 0.5 corresponds to input being probabilities.
+
+        top_k:
+            Number of highest probability or logit score predictions considered to find the correct label,
+            relevant only for (multi-dimensional) multi-class inputs. The
+            default value (``None``) will be interpreted as 1 for these inputs.
+
+            Should be left at default (``None``) for all other types of inputs.
+
+        reduce:
+            Defines the reduction that is applied. Should be one of the following:
+
+            - ``'micro'`` [default]: Counts the statistics by summing over all [sample, class]
+              combinations (globally). Each statistic is represented by a single integer.
+            - ``'macro'``: Counts the statistics for each class separately (over all samples).
+              Each statistic is represented by a ``(C,)`` tensor. Requires ``num_classes``
+              to be set.
+            - ``'samples'``: Counts the statistics for each sample separately (over all classes).
+              Each statistic is represented by a ``(N, )`` 1d tensor.
+
+            .. note:: What is considered a sample in the multi-dimensional multi-class case
+                depends on the value of ``mdmc_reduce``.
+
+        num_classes:
+            Number of classes. Necessary for (multi-dimensional) multi-class or multi-label data.
+
+        ignore_index:
+            Specify a class (label) to ignore. If given, this class index does not contribute
+            to the returned score, regardless of reduction method. If an index is ignored, and
+            ``reduce='macro'``, the class statistics for the ignored class will all be returned
+            as ``-1``.
+
+        mdmc_reduce:
+            Defines how the multi-dimensional multi-class inputs are handeled. Should be
+            one of the following:
+
+            - ``None`` [default]: Should be left unchanged if your data is not multi-dimensional
+              multi-class (see :ref:`references/modules:input types` for the definition of input types).
+
+            - ``'samplewise'``: In this case, the statistics are computed separately for each
+              sample on the ``N`` axis, and then the outputs are concatenated together. In each
+              sample the extra axes ``...`` are flattened to become the sub-sample axis, and
+              statistics for each sample are computed by treating the sub-sample axis as the
+              ``N`` axis for that sample.
+
+            - ``'global'``: In this case the ``N`` and ``...`` dimensions of the inputs are
+              flattened into a new ``N_X`` sample axis, i.e. the inputs are treated as if they
+              were ``(N_X, C)``. From here on the ``reduce`` parameter applies as usual.
+
+        multiclass:
+            Used only in certain special cases, where you want to treat inputs as a different type
+            than what they appear to be. See the parameter's
+            :ref:`documentation section <references/modules:using the multiclass parameter>`
+            for a more detailed explanation and examples.
+
+    Return:
+        The metric returns a tensor of shape ``(..., 5)``, where the last dimension corresponds
+        to ``[tp, fp, tn, fn, sup]`` (``sup`` stands for support and equals ``tp + fn``). The
+        shape depends on the ``reduce`` and ``mdmc_reduce`` (in case of multi-dimensional
+        multi-class data) parameters:
+
+        - If the data is not multi-dimensional multi-class, then
+
+          - If ``reduce='micro'``, the shape will be ``(5, )``
+          - If ``reduce='macro'``, the shape will be ``(C, 5)``,
+            where ``C`` stands for the number of classes
+          - If ``reduce='samples'``, the shape will be ``(N, 5)``, where ``N`` stands for
+            the number of samples
+
+        - If the data is multi-dimensional multi-class and ``mdmc_reduce='global'``, then
+
+          - If ``reduce='micro'``, the shape will be ``(5, )``
+          - If ``reduce='macro'``, the shape will be ``(C, 5)``
+          - If ``reduce='samples'``, the shape will be ``(N*X, 5)``, where ``X`` stands for
+            the product of sizes of all "extra" dimensions of the data (i.e. all dimensions
+            except for ``C`` and ``N``)
+
+        - If the data is multi-dimensional multi-class and ``mdmc_reduce='samplewise'``, then
+
+          - If ``reduce='micro'``, the shape will be ``(N, 5)``
+          - If ``reduce='macro'``, the shape will be ``(N, C, 5)``
+          - If ``reduce='samples'``, the shape will be ``(N, X, 5)``
+
+    Raises:
+        ValueError:
+            If ``reduce`` is none of ``"micro"``, ``"macro"`` or ``"samples"``.
+        ValueError:
+            If ``mdmc_reduce`` is none of ``None``, ``"samplewise"``, ``"global"``.
+        ValueError:
+            If ``reduce`` is set to ``"macro"`` and ``num_classes`` is not provided.
+        ValueError:
+            If ``num_classes`` is set
+            and ``ignore_index`` is not in the range ``[0, num_classes)``.
+        ValueError:
+            If ``ignore_index`` is used with ``binary data``.
+        ValueError:
+            If inputs are ``multi-dimensional multi-class`` and ``mdmc_reduce`` is not provided.
+
+    Example:
+        >>> from paddlemetrics.functional import stat_scores
+        >>> preds  = B.tensor([1, 0, 2, 1])
+        >>> target = B.tensor([1, 1, 2, 0])
+        >>> stat_scores(preds, target, reduce='macro', num_classes=3)
+        tensor([[0, 1, 2, 1, 1],
+                [1, 1, 1, 1, 2],
+                [1, 0, 3, 0, 1]])
+        >>> stat_scores(preds, target, reduce='micro')
+        tensor([2, 2, 6, 2, 4])
+
+    """
+    if reduce not in ["micro", "macro", "samples"]:
+        raise ValueError(f"The `reduce` {reduce} is not valid.")
+
+    if mdmc_reduce not in [None, "samplewise", "global"]:
+        raise ValueError(f"The `mdmc_reduce` {mdmc_reduce} is not valid.")
+
+    if reduce == "macro" and (not num_classes or num_classes < 1):
+        raise ValueError("When you set `reduce` as 'macro', you have to provide the number of classes.")
+
+    if num_classes and ignore_index is not None and (not 0 <= ignore_index < num_classes or num_classes == 1):
+        raise ValueError(f"The `ignore_index` {ignore_index} is not valid for inputs with {num_classes} classes")
+
+    tp, fp, tn, fn = _stat_scores_update(
+        preds,
+        target,
+        reduce=reduce,
+        mdmc_reduce=mdmc_reduce,
+        top_k=top_k,
+        threshold=threshold,
+        num_classes=num_classes,
+        multiclass=multiclass,
+        ignore_index=ignore_index,
+    )
+    return _stat_scores_compute(tp, fp, tn, fn)
diff --git a/RE/paddlemetric/src/paddlemetrics/functional/image/__init__.py b/RE/paddlemetric/src/paddlemetrics/functional/image/__init__.py
new file mode 100644
index 00000000..9fe64120
--- /dev/null
+++ b/RE/paddlemetric/src/paddlemetrics/functional/image/__init__.py
@@ -0,0 +1,16 @@
+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from paddlemetrics.functional.image.gradients import image_gradients  # noqa: F401
+from paddlemetrics.functional.image.psnr import psnr  # noqa: F401
diff --git a/RE/paddlemetric/src/paddlemetrics/functional/image/gradients.py b/RE/paddlemetric/src/paddlemetrics/functional/image/gradients.py
new file mode 100644
index 00000000..abe1b08d
--- /dev/null
+++ b/RE/paddlemetric/src/paddlemetrics/functional/image/gradients.py
@@ -0,0 +1,81 @@
+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from typing import Tuple
+
+import paddleext.torchapi as B
+from paddleext.torchapi import  Tensor
+
+
+def _image_gradients_validate(img: Tensor) -> None:
+    """Validates whether img is a 4D torch Tensor."""
+
+    if not isinstance(img, Tensor):
+        raise TypeError(f"The `img` expects a value of <Tensor> type but got {type(img)}")
+    if img.ndim != 4:
+        raise RuntimeError(f"The `img` expects a 4D tensor but got {img.ndim}D tensor")
+
+
+def _compute_image_gradients(img: Tensor) -> Tuple[Tensor, Tensor]:
+    """Computes image gradients (dy/dx) for a given image."""
+
+    batch_size, channels, height, width = img.shape
+
+    dy = img[..., 1:, :] - img[..., :-1, :]
+    dx = img[..., :, 1:] - img[..., :, :-1]
+
+    shapey = [batch_size, channels, 1, width]
+    dy = B.cat([dy, B.zeros(shapey, device=img.device, dtype=img.dtype)], dim=2)
+    dy = dy.view(img.shape)
+
+    shapex = [batch_size, channels, height, 1]
+    dx = B.cat([dx, B.zeros(shapex, device=img.device, dtype=img.dtype)], dim=3)
+    dx = dx.view(img.shape)
+
+    return dy, dx
+
+
+def image_gradients(img: Tensor) -> Tuple[Tensor, Tensor]:
+    """Computes `Gradient Computation of Image`_ of a given image using finite difference.
+
+    Args:
+        img: An ``(N, C, H, W)`` input tensor where C is the number of image channels
+
+    Return:
+        Tuple of (dy, dx) with each gradient of shape ``[N, C, H, W]``
+
+    Raises:
+        TypeError:
+            If ``img`` is not of the type <B.Tensor>.
+        RuntimeError:
+            If ``img`` is not a 4D tensor.
+
+    Example:
+        >>> from paddlemetrics.functional import image_gradients
+        >>> image = B.arange(0, 1*1*5*5, dtype=B.float32)
+        >>> image = B.reshape(image, (1, 1, 5, 5))
+        >>> dy, dx = image_gradients(image)
+        >>> dy[0, 0, :, :]
+        tensor([[5., 5., 5., 5., 5.],
+                [5., 5., 5., 5., 5.],
+                [5., 5., 5., 5., 5.],
+                [5., 5., 5., 5., 5.],
+                [0., 0., 0., 0., 0.]])
+
+    .. note:: The implementation follows the 1-step finite difference method as followed
+           by the TF implementation. The values are organized such that the gradient of
+           [I(x+1, y)-[I(x, y)]] are at the (x, y) location
+    """
+    _image_gradients_validate(img)
+
+    return _compute_image_gradients(img)
diff --git a/RE/paddlemetric/src/paddlemetrics/functional/image/psnr.py b/RE/paddlemetric/src/paddlemetrics/functional/image/psnr.py
new file mode 100644
index 00000000..2ffd6046
--- /dev/null
+++ b/RE/paddlemetric/src/paddlemetrics/functional/image/psnr.py
@@ -0,0 +1,150 @@
+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from typing import Optional, Tuple, Union
+
+import paddleext.torchapi as B
+from paddleext.torchapi import  Tensor, tensor
+
+from paddlemetrics.utilities import rank_zero_warn, reduce
+
+
+def _psnr_compute(
+    sum_squared_error: Tensor,
+    n_obs: Tensor,
+    data_range: Tensor,
+    base: float = 10.0,
+    reduction: str = "elementwise_mean",
+) -> Tensor:
+    """Computes peak signal-to-noise ratio.
+
+    Args:
+        sum_squared_error: Sum of square of errors over all observations
+        n_obs: Number of predictions or observations
+        data_range:
+            the range of the data. If None, it is determined from the data (max - min). ``data_range`` must be given
+            when ``dim`` is not None.
+        base: a base of a logarithm to use (default: 10)
+        reduction: a method to reduce metric score over labels.
+
+            - ``'elementwise_mean'``: takes the mean (default)
+            - ``'sum'``: takes the sum
+            - ``'none'``: no reduction will be applied
+
+    Example:
+        >>> preds = B.tensor([[0.0, 1.0], [2.0, 3.0]])
+        >>> target = B.tensor([[3.0, 2.0], [1.0, 0.0]])
+        >>> data_range = target.max() - target.min()
+        >>> sum_squared_error, n_obs = _psnr_update(preds, target)
+        >>> _psnr_compute(sum_squared_error, n_obs, data_range)
+        tensor(2.5527)
+    """
+
+    psnr_base_e = 2 * B.log(data_range) - B.log(sum_squared_error / n_obs)
+    psnr_vals = psnr_base_e * (10 / B.log(tensor(base)))
+    return reduce(psnr_vals, reduction=reduction)
+
+
+def _psnr_update(
+    preds: Tensor,
+    target: Tensor,
+    dim: Optional[Union[int, Tuple[int, ...]]] = None,
+) -> Tuple[Tensor, Tensor]:
+    """Updates and returns variables required to compute peak signal-to-noise ratio.
+
+    Args:
+        preds: Predicted tensor
+        target: Ground truth tensor
+        dim:
+            Dimensions to reduce PSNR scores over provided as either an integer or a list of integers. Default is
+            None meaning scores will be reduced across all dimensions.
+    """
+
+    if dim is None:
+        sum_squared_error = B.sum(B.pow(preds - target, 2))
+        n_obs = tensor(target.numel(), device=target.device)
+        return sum_squared_error, n_obs
+
+    diff = preds - target
+    sum_squared_error = B.sum(diff * diff, dim=dim)
+
+    if isinstance(dim, int):
+        dim_list = [dim]
+    else:
+        dim_list = list(dim)
+    if not dim_list:
+        n_obs = tensor(target.numel(), device=target.device)
+    else:
+        n_obs = tensor(target.size(), device=target.device)[dim_list].prod()
+        n_obs = n_obs.expand_as(sum_squared_error)
+
+    return sum_squared_error, n_obs
+
+
+def psnr(
+    preds: Tensor,
+    target: Tensor,
+    data_range: Optional[float] = None,
+    base: float = 10.0,
+    reduction: str = "elementwise_mean",
+    dim: Optional[Union[int, Tuple[int, ...]]] = None,
+) -> Tensor:
+    """Computes the peak signal-to-noise ratio.
+
+    Args:
+        preds: estimated signal
+        target: groun truth signal
+        data_range:
+            the range of the data. If None, it is determined from the data (max - min). ``data_range`` must be given
+            when ``dim`` is not None.
+        base: a base of a logarithm to use (default: 10)
+        reduction: a method to reduce metric score over labels.
+
+            - ``'elementwise_mean'``: takes the mean (default)
+            - ``'sum'``: takes the sum
+            - ``'none'``: no reduction will be applied
+
+        dim:
+            Dimensions to reduce PSNR scores over provided as either an integer or a list of integers. Default is
+            None meaning scores will be reduced across all dimensions.
+    Return:
+        Tensor with PSNR score
+
+    Raises:
+        ValueError:
+            If ``dim`` is not ``None`` and ``data_range`` is not provided.
+
+    Example:
+        >>> from paddlemetrics.functional import psnr
+        >>> pred = B.tensor([[0.0, 1.0], [2.0, 3.0]])
+        >>> target = B.tensor([[3.0, 2.0], [1.0, 0.0]])
+        >>> psnr(pred, target)
+        tensor(2.5527)
+
+    .. note::
+        Half precision is only support on GPU for this metric
+    """
+    if dim is None and reduction != "elementwise_mean":
+        rank_zero_warn(f"The `reduction={reduction}` will not have any effect when `dim` is None.")
+
+    if data_range is None:
+        if dim is not None:
+            # Maybe we could use `B.amax(target, dim=dim) - B.amin(target, dim=dim)` in PyTorch 1.7 to calculate
+            # `data_range` in the future.
+            raise ValueError("The `data_range` must be given when `dim` is not None.")
+
+        data_range = target.max() - target.min()
+    else:
+        data_range = tensor(float(data_range))
+    sum_squared_error, n_obs = _psnr_update(preds, target, dim=dim)
+    return _psnr_compute(sum_squared_error, n_obs, data_range, base=base, reduction=reduction)
diff --git a/RE/paddlemetric/src/paddlemetrics/functional/image/ssim.py b/RE/paddlemetric/src/paddlemetrics/functional/image/ssim.py
new file mode 100644
index 00000000..52af9b79
--- /dev/null
+++ b/RE/paddlemetric/src/paddlemetrics/functional/image/ssim.py
@@ -0,0 +1,225 @@
+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from typing import Optional, Sequence, Tuple
+
+import paddleext.torchapi as B
+from paddleext.torchapi import Tensor
+
+from paddlemetrics.utilities.checks import _check_same_shape
+from paddlemetrics.utilities.distributed import reduce
+
+
+def _gaussian(kernel_size: int, sigma: float, dtype: B.dtype, device: B.device) -> Tensor:
+    """Computes 1D gaussian kernel.
+
+    Args:
+        kernel_size: size of the gaussian kernel
+        sigma: Standard deviation of the gaussian kernel
+        dtype: data type of the output tensor
+        device: device of the output tensor
+
+    Example:
+        >>> _gaussian(3, 1, B.float, 'cpu')
+        tensor([[0.2741, 0.4519, 0.2741]])
+    """
+    dist = B.arange(start=(1 - kernel_size) / 2, end=(1 + kernel_size) / 2, step=1, dtype=dtype, device=device)
+    gauss = B.exp(-B.pow(dist / sigma, 2) / 2)
+    return (gauss / gauss.sum()).unsqueeze(dim=0)  # (1, kernel_size)
+
+
+def _gaussian_kernel(
+    channel: int, kernel_size: Sequence[int], sigma: Sequence[float], dtype: B.dtype, device: B.device
+) -> Tensor:
+    """Computes 2D gaussian kernel.
+
+    Args:
+        channel: number of channels in the image
+        kernel_size: size of the gaussian kernel as a tuple (h, w)
+        sigma: Standard deviation of the gaussian kernel
+        dtype: data type of the output tensor
+        device: device of the output tensor
+
+    Example:
+        >>> _gaussian_kernel(1, (5,5), (1,1), B.float, "cpu")
+        tensor([[[[0.0030, 0.0133, 0.0219, 0.0133, 0.0030],
+                  [0.0133, 0.0596, 0.0983, 0.0596, 0.0133],
+                  [0.0219, 0.0983, 0.1621, 0.0983, 0.0219],
+                  [0.0133, 0.0596, 0.0983, 0.0596, 0.0133],
+                  [0.0030, 0.0133, 0.0219, 0.0133, 0.0030]]]])
+    """
+
+    gaussian_kernel_x = _gaussian(kernel_size[0], sigma[0], dtype, device)
+    gaussian_kernel_y = _gaussian(kernel_size[1], sigma[1], dtype, device)
+    kernel = B.matmul(gaussian_kernel_x.t(), gaussian_kernel_y)  # (kernel_size, 1) * (1, kernel_size)
+
+    return kernel.expand(channel, 1, kernel_size[0], kernel_size[1])
+
+
+def _ssim_update(preds: Tensor, target: Tensor) -> Tuple[Tensor, Tensor]:
+    """Updates and returns variables required to compute Structural Similarity Index Measure. Checks for same shape
+    and type of the input tensors.
+
+    Args:
+        preds: Predicted tensor
+        target: Ground truth tensor
+    """
+
+    if preds.dtype != target.dtype:
+        raise TypeError(
+            "Expected `preds` and `target` to have the same data type."
+            f" Got preds: {preds.dtype} and target: {target.dtype}."
+        )
+    _check_same_shape(preds, target)
+    if len(preds.shape) != 4:
+        raise ValueError(
+            "Expected `preds` and `target` to have BxCxHxW shape."
+            f" Got preds: {preds.shape} and target: {target.shape}."
+        )
+    return preds, target
+
+
+def _ssim_compute(
+    preds: Tensor,
+    target: Tensor,
+    kernel_size: Sequence[int] = (11, 11),
+    sigma: Sequence[float] = (1.5, 1.5),
+    reduction: str = "elementwise_mean",
+    data_range: Optional[float] = None,
+    k1: float = 0.01,
+    k2: float = 0.03,
+) -> Tensor:
+    """Computes Structual Similarity Index Measure.
+
+    Args:
+        preds: estimated image
+        target: ground truth image
+        kernel_size: size of the gaussian kernel (default: (11, 11))
+        sigma: Standard deviation of the gaussian kernel (default: (1.5, 1.5))
+        reduction: a method to reduce metric score over labels.
+
+            - ``'elementwise_mean'``: takes the mean (default)
+            - ``'sum'``: takes the sum
+            - ``'none'``: no reduction will be applied
+
+        data_range: Range of the image. If ``None``, it is determined from the image (max - min)
+        k1: Parameter of SSIM. Default: 0.01
+        k2: Parameter of SSIM. Default: 0.03
+
+    Example:
+        >>> preds = B.rand([16, 1, 16, 16])
+        >>> target = preds * 0.75
+        >>> preds, target = _ssim_update(preds, target)
+        >>> _ssim_compute(preds, target)
+        tensor(0.9219)
+    """
+    if len(kernel_size) != 2 or len(sigma) != 2:
+        raise ValueError(
+            "Expected `kernel_size` and `sigma` to have the length of two."
+            f" Got kernel_size: {len(kernel_size)} and sigma: {len(sigma)}."
+        )
+
+    if any(x % 2 == 0 or x <= 0 for x in kernel_size):
+        raise ValueError(f"Expected `kernel_size` to have odd positive number. Got {kernel_size}.")
+
+    if any(y <= 0 for y in sigma):
+        raise ValueError(f"Expected `sigma` to have positive number. Got {sigma}.")
+
+    if data_range is None:
+        data_range = max(preds.max() - preds.min(), target.max() - target.min())
+
+    c1 = pow(k1 * data_range, 2)
+    c2 = pow(k2 * data_range, 2)
+    device = preds.device
+
+    channel = preds.size(1)
+    dtype = preds.dtype
+    kernel = _gaussian_kernel(channel, kernel_size, sigma, dtype, device)
+    pad_h = (kernel_size[0] - 1) // 2
+    pad_w = (kernel_size[1] - 1) // 2
+
+    preds = B.pad(preds, (pad_h, pad_h, pad_w, pad_w), mode="reflect")
+    target = B.pad(target, (pad_h, pad_h, pad_w, pad_w), mode="reflect")
+
+    input_list = B.cat((preds, target, preds * preds, target * target, preds * target))  # (5 * B, C, H, W)
+    outputs = B.conv2d(input_list, kernel, groups=channel)
+    output_list = outputs.split(preds.shape[0])
+
+    mu_pred_sq = output_list[0].pow(2)
+    mu_target_sq = output_list[1].pow(2)
+    mu_pred_target = output_list[0] * output_list[1]
+
+    sigma_pred_sq = output_list[2] - mu_pred_sq
+    sigma_target_sq = output_list[3] - mu_target_sq
+    sigma_pred_target = output_list[4] - mu_pred_target
+
+    upper = 2 * sigma_pred_target + c2
+    lower = sigma_pred_sq + sigma_target_sq + c2
+
+    ssim_idx = ((2 * mu_pred_target + c1) * upper) / ((mu_pred_sq + mu_target_sq + c1) * lower)
+    ssim_idx = ssim_idx[..., pad_h:-pad_h, pad_w:-pad_w]
+
+    return reduce(ssim_idx, reduction)
+
+
+def ssim(
+    preds: Tensor,
+    target: Tensor,
+    kernel_size: Sequence[int] = (11, 11),
+    sigma: Sequence[float] = (1.5, 1.5),
+    reduction: str = "elementwise_mean",
+    data_range: Optional[float] = None,
+    k1: float = 0.01,
+    k2: float = 0.03,
+) -> Tensor:
+    """Computes Structual Similarity Index Measure.
+
+    Args:
+        preds: estimated image
+        target: ground truth image
+        kernel_size: size of the gaussian kernel (default: (11, 11))
+        sigma: Standard deviation of the gaussian kernel (default: (1.5, 1.5))
+        reduction: a method to reduce metric score over labels.
+
+            - ``'elementwise_mean'``: takes the mean (default)
+            - ``'sum'``: takes the sum
+            - ``'none'``: no reduction will be applied
+
+        data_range: Range of the image. If ``None``, it is determined from the image (max - min)
+        k1: Parameter of SSIM. Default: 0.01
+        k2: Parameter of SSIM. Default: 0.03
+
+    Return:
+        Tensor with SSIM score
+
+    Raises:
+        TypeError:
+            If ``preds`` and ``target`` don't have the same data type.
+        ValueError:
+            If ``preds`` and ``target`` don't have ``BxCxHxW shape``.
+        ValueError:
+            If the length of ``kernel_size`` or ``sigma`` is not ``2``.
+        ValueError:
+            If one of the elements of ``kernel_size`` is not an ``odd positive number``.
+        ValueError:
+            If one of the elements of ``sigma`` is not a ``positive number``.
+
+    Example:
+        >>> from paddlemetrics.functional import ssim
+        >>> preds = B.rand([16, 1, 16, 16])
+        >>> target = preds * 0.75
+        >>> ssim(preds, target)
+        tensor(0.9219)
+    """
+    preds, target = _ssim_update(preds, target)
+    return _ssim_compute(preds, target, kernel_size, sigma, reduction, data_range, k1, k2)
diff --git a/RE/paddlemetric/src/paddlemetrics/functional/pairwise/__init__.py b/RE/paddlemetric/src/paddlemetrics/functional/pairwise/__init__.py
new file mode 100644
index 00000000..1d28d0c4
--- /dev/null
+++ b/RE/paddlemetric/src/paddlemetrics/functional/pairwise/__init__.py
@@ -0,0 +1,17 @@
+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from paddlemetrics.functional.pairwise.cosine import pairwise_cosine_similarity  # noqa: F401
+from paddlemetrics.functional.pairwise.euclidean import pairwise_euclidean_distance  # noqa: F401
+from paddlemetrics.functional.pairwise.linear import pairwise_linear_similarity  # noqa: F401
+from paddlemetrics.functional.pairwise.manhatten import pairwise_manhatten_distance  # noqa: F401
diff --git a/RE/paddlemetric/src/paddlemetrics/functional/pairwise/cosine.py b/RE/paddlemetric/src/paddlemetrics/functional/pairwise/cosine.py
new file mode 100644
index 00000000..cdd24e15
--- /dev/null
+++ b/RE/paddlemetric/src/paddlemetrics/functional/pairwise/cosine.py
@@ -0,0 +1,85 @@
+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from typing import Optional
+
+import paddleext.torchapi as B
+from paddleext.torchapi import  Tensor
+
+from paddlemetrics.functional.pairwise.helpers import _check_input, _reduce_distance_matrix
+
+
+def _pairwise_cosine_similarity_update(
+    x: Tensor, y: Optional[Tensor] = None, zero_diagonal: Optional[bool] = None
+) -> Tensor:
+    """Calculates the pairwise cosine similarity matrix.
+
+    Args:
+        x: tensor of shape ``[N,d]``
+        y: tensor of shape ``[M,d]``
+        zero_diagonal: determines if the diagonal of the distance matrix should be set to zero
+    """
+    x, y, zero_diagonal = _check_input(x, y, zero_diagonal)
+
+    norm = B.norm(x, p=2, dim=1)
+    x /= norm.unsqueeze(1)
+    norm = B.norm(y, p=2, dim=1)
+    y /= norm.unsqueeze(1)
+
+    distance = x @ y.T
+    if zero_diagonal:
+        distance.fill_diagonal_(0)
+    return distance
+
+
+def pairwise_cosine_similarity(
+    x: Tensor, y: Optional[Tensor] = None, reduction: Optional[str] = None, zero_diagonal: Optional[bool] = None
+) -> Tensor:
+    r"""
+    Calculates pairwise cosine similarity:
+
+    .. math::
+        s_{cos}(x,y) = \frac{<x,y>}{||x|| \cdot ||y||}
+                     = \frac{\sum_{d=1}^D x_d \cdot y_d }{\sqrt{\sum_{d=1}^D x_i^2} \cdot \sqrt{\sum_{d=1}^D x_i^2}}
+
+    If both `x` and `y` are passed in, the calculation will be performed pairwise between the rows of `x` and `y`.
+    If only `x` is passed in, the calculation will be performed between the rows of `x`.
+
+    Args:
+        x: Tensor with shape ``[N, d]``
+        y: Tensor with shape ``[M, d]``, optional
+        reduction: reduction to apply along the last dimension. Choose between `'mean'`, `'sum'`
+            (applied along column dimension) or  `'none'`, `None` for no reduction
+        zero_diagonal: if the diagonal of the distance matrix should be set to 0. If only `x` is given
+            this defaults to `True` else if `y` is also given it defaults to `False`
+
+    Returns:
+        A ``[N,N]`` matrix of distances if only ``x`` is given, else a ``[N,M]`` matrix
+
+    Example:
+        >>> import torchapi as B
+        >>> from paddlemetrics.functional import pairwise_cosine_similarity
+        >>> x = B.tensor([[2, 3], [3, 5], [5, 8]], dtype=B.float32)
+        >>> y = B.tensor([[1, 0], [2, 1]], dtype=B.float32)
+        >>> pairwise_cosine_similarity(x, y)
+        tensor([[0.5547, 0.8682],
+                [0.5145, 0.8437],
+                [0.5300, 0.8533]])
+        >>> pairwise_cosine_similarity(x)
+        tensor([[0.0000, 0.9989, 0.9996],
+                [0.9989, 0.0000, 0.9998],
+                [0.9996, 0.9998, 0.0000]])
+
+    """
+    distance = _pairwise_cosine_similarity_update(x, y, zero_diagonal)
+    return _reduce_distance_matrix(distance, reduction)
diff --git a/RE/paddlemetric/src/paddlemetrics/functional/pairwise/euclidean.py b/RE/paddlemetric/src/paddlemetrics/functional/pairwise/euclidean.py
new file mode 100644
index 00000000..fd31cd7f
--- /dev/null
+++ b/RE/paddlemetric/src/paddlemetrics/functional/pairwise/euclidean.py
@@ -0,0 +1,79 @@
+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from typing import Optional
+
+from paddleext.torchapi import  Tensor
+
+from paddlemetrics.functional.pairwise.helpers import _check_input, _reduce_distance_matrix
+
+
+def _pairwise_euclidean_distance_update(
+    x: Tensor, y: Optional[Tensor] = None, zero_diagonal: Optional[bool] = None
+) -> Tensor:
+    """Calculates the pairwise euclidean distance matrix.
+
+    Args:
+        x: tensor of shape ``[N,d]``
+        y: tensor of shape ``[M,d]``
+        zero_diagonal: determines if the diagonal of the distance matrix should be set to zero
+    """
+    x, y, zero_diagonal = _check_input(x, y, zero_diagonal)
+    x_norm = x.norm(dim=1, keepdim=True)
+    y_norm = y.norm(dim=1).T
+    distance = x_norm * x_norm + y_norm * y_norm - 2 * x.mm(y.T)
+    if zero_diagonal:
+        distance.fill_diagonal_(0)
+    return distance.sqrt()
+
+
+def pairwise_euclidean_distance(
+    x: Tensor, y: Optional[Tensor] = None, reduction: Optional[str] = None, zero_diagonal: Optional[bool] = None
+) -> Tensor:
+    r"""
+    Calculates pairwise euclidean distances:
+
+    .. math::
+        d_{euc}(x,y) = ||x - y||_2 = \sqrt{\sum_{d=1}^D (x_d - y_d)^2}
+
+    If both `x` and `y` are passed in, the calculation will be performed pairwise between the rows of `x` and `y`.
+    If only `x` is passed in, the calculation will be performed between the rows of `x`.
+
+    Args:
+        x: Tensor with shape ``[N, d]``
+        y: Tensor with shape ``[M, d]``, optional
+        reduction: reduction to apply along the last dimension. Choose between `'mean'`, `'sum'`
+            (applied along column dimension) or  `'none'`, `None` for no reduction
+        zero_diagonal: if the diagonal of the distance matrix should be set to 0. If only `x` is given
+            this defaults to `True` else if `y` is also given it defaults to `False`
+
+    Returns:
+        A ``[N,N]`` matrix of distances if only ``x`` is given, else a ``[N,M]`` matrix
+
+    Example:
+        >>> import torchapi as B
+        >>> from paddlemetrics.functional import pairwise_euclidean_distance
+        >>> x = B.tensor([[2, 3], [3, 5], [5, 8]], dtype=B.float32)
+        >>> y = B.tensor([[1, 0], [2, 1]], dtype=B.float32)
+        >>> pairwise_euclidean_distance(x, y)
+        tensor([[3.1623, 2.0000],
+                [5.3852, 4.1231],
+                [8.9443, 7.6158]])
+        >>> pairwise_euclidean_distance(x)
+        tensor([[0.0000, 2.2361, 5.8310],
+                [2.2361, 0.0000, 3.6056],
+                [5.8310, 3.6056, 0.0000]])
+
+    """
+    distance = _pairwise_euclidean_distance_update(x, y, zero_diagonal)
+    return _reduce_distance_matrix(distance, reduction)
diff --git a/RE/paddlemetric/src/paddlemetrics/functional/pairwise/helpers.py b/RE/paddlemetric/src/paddlemetrics/functional/pairwise/helpers.py
new file mode 100644
index 00000000..2d38916a
--- /dev/null
+++ b/RE/paddlemetric/src/paddlemetrics/functional/pairwise/helpers.py
@@ -0,0 +1,59 @@
+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from typing import Optional, Tuple
+
+from paddleext.torchapi import  Tensor
+
+
+def _check_input(
+    x: Tensor, y: Optional[Tensor] = None, zero_diagonal: Optional[bool] = None
+) -> Tuple[Tensor, Tensor, bool]:
+    """Check that input has the right dimensionality and sets the zero_diagonal argument if user has not provided
+    import module.
+
+    Args:
+        x: tensor of shape ``[N,d]``
+        y: if provided, a tensor of shape ``[M,d]``
+        zero_diagonal: determines if the diagonal of the distance matrix should be set to zero
+    """
+    if x.ndim != 2:
+        raise ValueError(f"Expected argument `x` to be a 2D tensor of shape `[N, d]` but got {x.shape}")
+
+    if y is not None:
+        if y.ndim != 2 or y.shape[1] != x.shape[1]:
+            raise ValueError(
+                "Expected argument `y` to be a 2D tensor of shape `[M, d]` where"
+                " `d` should be same as the last dimension of `x`"
+            )
+        zero_diagonal = False if zero_diagonal is None else zero_diagonal
+    else:
+        y = x.clone()
+        zero_diagonal = True if zero_diagonal is None else zero_diagonal
+    return x, y, zero_diagonal
+
+
+def _reduce_distance_matrix(distmat: Tensor, reduction: Optional[str] = None) -> Tensor:
+    """Final reduction of distance matrix.
+
+    Args:
+        distance: a ``[N,M]`` matrix
+        reduction: string determining how to reduce along last dimension
+    """
+    if reduction == "mean":
+        return distmat.mean(dim=-1)
+    if reduction == "sum":
+        return distmat.sum(dim=-1)
+    if reduction is None or reduction == "none":
+        return distmat
+    raise ValueError(f"Expected reduction to be one of `['mean', 'sum', None]` but got {reduction}")
diff --git a/RE/paddlemetric/src/paddlemetrics/functional/pairwise/linear.py b/RE/paddlemetric/src/paddlemetrics/functional/pairwise/linear.py
new file mode 100644
index 00000000..08e79301
--- /dev/null
+++ b/RE/paddlemetric/src/paddlemetrics/functional/pairwise/linear.py
@@ -0,0 +1,78 @@
+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from typing import Optional
+
+from paddleext.torchapi import  Tensor
+
+from paddlemetrics.functional.pairwise.helpers import _check_input, _reduce_distance_matrix
+
+
+def _pairwise_linear_similarity_update(
+    x: Tensor, y: Optional[Tensor] = None, zero_diagonal: Optional[bool] = None
+) -> Tensor:
+    """Calculates the pairwise linear similarity matrix.
+
+    Args:
+        x: tensor of shape ``[N,d]``
+        y: tensor of shape ``[M,d]``
+        zero_diagonal: determines if the diagonal of the distance matrix should be set to zero
+    """
+    x, y, zero_diagonal = _check_input(x, y, zero_diagonal)
+
+    distance = x @ y.T
+    if zero_diagonal:
+        distance.fill_diagonal_(0)
+    return distance
+
+
+def pairwise_linear_similarity(
+    x: Tensor, y: Optional[Tensor] = None, reduction: Optional[str] = None, zero_diagonal: Optional[bool] = None
+) -> Tensor:
+    r"""
+    Calculates pairwise linear similarity:
+
+    .. math::
+        s_{lin}(x,y) = <x,y> = \sum_{d=1}^D x_d \cdot y_d
+
+    If both `x` and `y` are passed in, the calculation will be performed pairwise between the rows of `x` and `y`.
+    If only `x` is passed in, the calculation will be performed between the rows of `x`.
+
+    Args:
+        x: Tensor with shape ``[N, d]``
+        y: Tensor with shape ``[M, d]``, optional
+        reduction: reduction to apply along the last dimension. Choose between `'mean'`, `'sum'`
+            (applied along column dimension) or  `'none'`, `None` for no reduction
+        zero_diagonal: if the diagonal of the distance matrix should be set to 0. If only `x` is given
+            this defaults to `True` else if `y` is also given it defaults to `False`
+
+    Returns:
+        A ``[N,N]`` matrix of distances if only ``x`` is given, else a ``[N,M]`` matrix
+
+    Example:
+        >>> import torchapi as B
+        >>> from paddlemetrics.functional import pairwise_linear_similarity
+        >>> x = B.tensor([[2, 3], [3, 5], [5, 8]], dtype=B.float32)
+        >>> y = B.tensor([[1, 0], [2, 1]], dtype=B.float32)
+        >>> pairwise_linear_similarity(x, y)
+        tensor([[ 2.,  7.],
+                [ 3., 11.],
+                [ 5., 18.]])
+        >>> pairwise_linear_similarity(x)
+        tensor([[ 0., 21., 34.],
+                [21.,  0., 55.],
+                [34., 55.,  0.]])
+
+    """
+    distance = _pairwise_linear_similarity_update(x, y, zero_diagonal)
+    return _reduce_distance_matrix(distance, reduction)
diff --git a/RE/paddlemetric/src/paddlemetrics/functional/pairwise/manhatten.py b/RE/paddlemetric/src/paddlemetrics/functional/pairwise/manhatten.py
new file mode 100644
index 00000000..d0079bd6
--- /dev/null
+++ b/RE/paddlemetric/src/paddlemetrics/functional/pairwise/manhatten.py
@@ -0,0 +1,78 @@
+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from typing import Optional
+
+from paddleext.torchapi import  Tensor
+
+from paddlemetrics.functional.pairwise.helpers import _check_input, _reduce_distance_matrix
+
+
+def _pairwise_manhatten_distance_update(
+    x: Tensor, y: Optional[Tensor] = None, zero_diagonal: Optional[bool] = None
+) -> Tensor:
+    """Calculates the pairwise manhatten similarity matrix.
+
+    Args:
+        x: tensor of shape ``[N,d]``
+        y: if provided, a tensor of shape ``[M,d]``
+        zero_diagonal: determines if the diagonal of the distance matrix should be set to zero
+    """
+    x, y, zero_diagonal = _check_input(x, y, zero_diagonal)
+
+    distance = (x.unsqueeze(1) - y.unsqueeze(0).repeat(x.shape[0], 1, 1)).abs().sum(dim=-1)
+    if zero_diagonal:
+        distance.fill_diagonal_(0)
+    return distance
+
+
+def pairwise_manhatten_distance(
+    x: Tensor, y: Optional[Tensor] = None, reduction: Optional[str] = None, zero_diagonal: Optional[bool] = None
+) -> Tensor:
+    r"""
+    Calculates pairwise manhatten distance:
+
+    .. math::
+        d_{man}(x,y) = ||x-y||_1 = \sum_{d=1}^D |x_d - y_d|
+
+    If both `x` and `y` are passed in, the calculation will be performed pairwise between the rows of `x` and `y`.
+    If only `x` is passed in, the calculation will be performed between the rows of `x`.
+
+    Args:
+        x: Tensor with shape ``[N, d]``
+        y: Tensor with shape ``[M, d]``, optional
+        reduction: reduction to apply along the last dimension. Choose between `'mean'`, `'sum'`
+            (applied along column dimension) or  `'none'`, `None` for no reduction
+        zero_diagonal: if the diagonal of the distance matrix should be set to 0. If only `x` is given
+            this defaults to `True` else if `y` is also given it defaults to `False`
+
+    Returns:
+        A ``[N,N]`` matrix of distances if only ``x`` is given, else a ``[N,M]`` matrix
+
+    Example:
+        >>> import torchapi as B
+        >>> from paddlemetrics.functional import pairwise_manhatten_distance
+        >>> x = B.tensor([[2, 3], [3, 5], [5, 8]], dtype=B.float32)
+        >>> y = B.tensor([[1, 0], [2, 1]], dtype=B.float32)
+        >>> pairwise_manhatten_distance(x, y)
+        tensor([[ 4.,  2.],
+                [ 7.,  5.],
+                [12., 10.]])
+        >>> pairwise_manhatten_distance(x)
+        tensor([[0., 3., 8.],
+                [3., 0., 5.],
+                [8., 5., 0.]])
+
+    """
+    distance = _pairwise_manhatten_distance_update(x, y, zero_diagonal)
+    return _reduce_distance_matrix(distance, reduction)
diff --git a/RE/paddlemetric/src/paddlemetrics/functional/regression/__init__.py b/RE/paddlemetric/src/paddlemetrics/functional/regression/__init__.py
new file mode 100644
index 00000000..7ddc6040
--- /dev/null
+++ b/RE/paddlemetric/src/paddlemetrics/functional/regression/__init__.py
@@ -0,0 +1,27 @@
+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from paddlemetrics.functional.image.psnr import psnr  # noqa: F401
+from paddlemetrics.functional.image.ssim import ssim  # noqa: F401
+from paddlemetrics.functional.regression.cosine_similarity import cosine_similarity  # noqa: F401
+from paddlemetrics.functional.regression.explained_variance import explained_variance  # noqa: F401
+from paddlemetrics.functional.regression.mean_absolute_error import mean_absolute_error  # noqa: F401
+from paddlemetrics.functional.regression.mean_absolute_percentage_error import (  # noqa: F401
+    mean_absolute_percentage_error,
+)
+from paddlemetrics.functional.regression.mean_squared_error import mean_squared_error  # noqa: F401
+from paddlemetrics.functional.regression.mean_squared_log_error import mean_squared_log_error  # noqa: F401
+from paddlemetrics.functional.regression.pearson import pearson_corrcoef  # noqa: F401
+from paddlemetrics.functional.regression.r2 import r2_score  # noqa: F401
+from paddlemetrics.functional.regression.spearman import spearman_corrcoef  # noqa: F401
+from paddlemetrics.functional.regression.tweedie_deviance import tweedie_deviance_score  # noqa: F401
diff --git a/RE/paddlemetric/src/paddlemetrics/functional/regression/cosine_similarity.py b/RE/paddlemetric/src/paddlemetrics/functional/regression/cosine_similarity.py
new file mode 100644
index 00000000..ea0f77a3
--- /dev/null
+++ b/RE/paddlemetric/src/paddlemetrics/functional/regression/cosine_similarity.py
@@ -0,0 +1,98 @@
+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from typing import Tuple
+
+import paddleext.torchapi as B
+from paddleext.torchapi import  Tensor
+
+from paddlemetrics.utilities.checks import _check_same_shape
+
+
+def _cosine_similarity_update(
+    preds: Tensor,
+    target: Tensor,
+) -> Tuple[Tensor, Tensor]:
+    """Updates and returns variables required to compute Cosine Similarity. Checks for same shape of input tensors.
+
+    Args:
+        preds: Predicted tensor
+        target: Ground truth tensor
+    """
+
+    _check_same_shape(preds, target)
+    preds = preds.float()
+    target = target.float()
+
+    return preds, target
+
+
+def _cosine_similarity_compute(preds: Tensor, target: Tensor, reduction: str = "sum") -> Tensor:
+    """Computes Cosine Similarity.
+
+    Args:
+        preds: Predicted tensor
+        target: Ground truth tensor
+        reduction:
+            The method of reducing along the batch dimension using sum, mean or taking the individual scores
+
+    Example:
+        >>> target = B.tensor([[1, 2, 3, 4], [1, 2, 3, 4]])
+        >>> preds = B.tensor([[1, 2, 3, 4], [-1, -2, -3, -4]])
+        >>> preds, target = _cosine_similarity_update(preds, target)
+        >>> _cosine_similarity_compute(preds, target, 'none')
+        tensor([ 1.0000, -1.0000])
+    """
+
+    dot_product = (preds * target).sum(dim=-1)
+    preds_norm = preds.norm(dim=-1)
+    target_norm = target.norm(dim=-1)
+    similarity = dot_product / (preds_norm * target_norm)
+    reduction_mapping = {
+        "sum": B.sum,
+        "mean": B.mean,
+        "none": lambda x: x,
+        None: lambda x: x,
+    }
+    return reduction_mapping[reduction](similarity)
+
+
+def cosine_similarity(preds: Tensor, target: Tensor, reduction: str = "sum") -> Tensor:
+    r"""
+    Computes the `Cosine Similarity`_
+    between targets and predictions:
+
+    .. math::
+        cos_{sim}(x,y) = \frac{x \cdot y}{||x|| \cdot ||y||} =
+        \frac{\sum_{i=1}^n x_i y_i}{\sqrt{\sum_{i=1}^n x_i^2}\sqrt{\sum_{i=1}^n y_i^2}}
+
+    where :math:`y` is a tensor of target values, and :math:`x` is a tensor of predictions.
+
+    Args:
+        preds: Predicted tensor with shape ``(N,d)``
+        target: Ground truth tensor with shape ``(N,d)``
+        reduction:
+            The method of reducing along the batch dimension using sum, mean or taking the individual scores
+
+    Example:
+        >>> from paddlemetrics.functional.regression import cosine_similarity
+        >>> target = B.tensor([[1, 2, 3, 4],
+        ...                        [1, 2, 3, 4]])
+        >>> preds = B.tensor([[1, 2, 3, 4],
+        ...                       [-1, -2, -3, -4]])
+        >>> cosine_similarity(preds, target, 'none')
+        tensor([ 1.0000, -1.0000])
+
+    """
+    preds, target = _cosine_similarity_update(preds, target)
+    return _cosine_similarity_compute(preds, target, reduction)
diff --git a/RE/paddlemetric/src/paddlemetrics/functional/regression/explained_variance.py b/RE/paddlemetric/src/paddlemetrics/functional/regression/explained_variance.py
new file mode 100644
index 00000000..95ef6acf
--- /dev/null
+++ b/RE/paddlemetric/src/paddlemetrics/functional/regression/explained_variance.py
@@ -0,0 +1,137 @@
+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from typing import Sequence, Tuple, Union
+
+import paddleext.torchapi as B
+from paddleext.torchapi import  Tensor
+
+from paddlemetrics.utilities.checks import _check_same_shape
+
+
+def _explained_variance_update(preds: Tensor, target: Tensor) -> Tuple[int, Tensor, Tensor, Tensor, Tensor]:
+    """Updates and returns variables required to compute Explained Variance. Checks for same shape of input
+    tensors.
+
+    Args:
+        preds: Predicted tensor
+        target: Ground truth tensor
+    """
+
+    _check_same_shape(preds, target)
+
+    n_obs = preds.size(0)
+    sum_error = B.sum(target - preds, dim=0)
+    diff = target - preds
+    sum_squared_error = B.sum(diff * diff, dim=0)
+
+    sum_target = B.sum(target, dim=0)
+    sum_squared_target = B.sum(target * target, dim=0)
+
+    return n_obs, sum_error, sum_squared_error, sum_target, sum_squared_target
+
+
+def _explained_variance_compute(
+    n_obs: Tensor,
+    sum_error: Tensor,
+    sum_squared_error: Tensor,
+    sum_target: Tensor,
+    sum_squared_target: Tensor,
+    multioutput: str = "uniform_average",
+) -> Tensor:
+    """Computes Explained Variance.
+
+    Args:
+        n_obs: Number of predictions or observations
+        sum_error: Sum of errors over all observations
+        sum_squared_error: Sum of square of errors over all observations
+        sum_target: Sum of target values
+        sum_squared_target: Sum of squares of target values
+        multioutput: Defines aggregation in the case of multiple output scores. Can be one
+            of the following strings (default is `'uniform_average'`.):
+
+            * `'raw_values'` returns full set of scores
+            * `'uniform_average'` scores are uniformly averaged
+            * `'variance_weighted'` scores are weighted by their individual variances
+
+    Example:
+        >>> target = B.tensor([[0.5, 1], [-1, 1], [7, -6]])
+        >>> preds = B.tensor([[0, 2], [-1, 2], [8, -5]])
+        >>> n_obs, sum_error, ss_error, sum_target, ss_target = _explained_variance_update(preds, target)
+        >>> _explained_variance_compute(n_obs, sum_error, ss_error, sum_target, ss_target, multioutput='raw_values')
+        tensor([0.9677, 1.0000])
+    """
+
+    diff_avg = sum_error / n_obs
+    numerator = sum_squared_error / n_obs - (diff_avg * diff_avg)
+
+    target_avg = sum_target / n_obs
+    denominator = sum_squared_target / n_obs - (target_avg * target_avg)
+
+    # Take care of division by zero
+    nonzero_numerator = numerator != 0
+    nonzero_denominator = denominator != 0
+    valid_score = nonzero_numerator & nonzero_denominator
+    output_scores = B.ones_like(diff_avg)
+    output_scores[valid_score] = 1.0 - (numerator[valid_score] / denominator[valid_score])
+    output_scores[nonzero_numerator & ~nonzero_denominator] = 0.0
+
+    # Decide what to do in multioutput case
+    # Todo: allow user to pass in tensor with weights
+    if multioutput == "raw_values":
+        return output_scores
+    if multioutput == "uniform_average":
+        return B.mean(output_scores)
+    if multioutput == "variance_weighted":
+        denom_sum = B.sum(denominator)
+        return B.sum(denominator / denom_sum * output_scores)
+
+
+def explained_variance(
+    preds: Tensor,
+    target: Tensor,
+    multioutput: str = "uniform_average",
+) -> Union[Tensor, Sequence[Tensor]]:
+    """Computes explained variance.
+
+    Args:
+        preds: estimated labels
+        target: ground truth labels
+        multioutput: Defines aggregation in the case of multiple output scores. Can be one
+            of the following strings (default is `'uniform_average'`.):
+
+            * `'raw_values'` returns full set of scores
+            * `'uniform_average'` scores are uniformly averaged
+            * `'variance_weighted'` scores are weighted by their individual variances
+
+    Example:
+        >>> from paddlemetrics.functional import explained_variance
+        >>> target = B.tensor([3, -0.5, 2, 7])
+        >>> preds = B.tensor([2.5, 0.0, 2, 8])
+        >>> explained_variance(preds, target)
+        tensor(0.9572)
+
+        >>> target = B.tensor([[0.5, 1], [-1, 1], [7, -6]])
+        >>> preds = B.tensor([[0, 2], [-1, 2], [8, -5]])
+        >>> explained_variance(preds, target, multioutput='raw_values')
+        tensor([0.9677, 1.0000])
+    """
+    n_obs, sum_error, sum_squared_error, sum_target, sum_squared_target = _explained_variance_update(preds, target)
+    return _explained_variance_compute(
+        n_obs,
+        sum_error,
+        sum_squared_error,
+        sum_target,
+        sum_squared_target,
+        multioutput,
+    )
diff --git a/RE/paddlemetric/src/paddlemetrics/functional/regression/mean_absolute_error.py b/RE/paddlemetric/src/paddlemetrics/functional/regression/mean_absolute_error.py
new file mode 100644
index 00000000..1ddb4153
--- /dev/null
+++ b/RE/paddlemetric/src/paddlemetrics/functional/regression/mean_absolute_error.py
@@ -0,0 +1,73 @@
+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from typing import Tuple
+
+import paddleext.torchapi as B
+from paddleext.torchapi import  Tensor
+
+from paddlemetrics.utilities.checks import _check_same_shape
+
+
+def _mean_absolute_error_update(preds: Tensor, target: Tensor) -> Tuple[Tensor, int]:
+    """Updates and returns variables required to compute Mean Absolute Error. Checks for same shape of input
+    tensors.
+
+    Args:
+        preds: Predicted tensor
+        target: Ground truth tensor
+    """
+
+    _check_same_shape(preds, target)
+    sum_abs_error = B.sum(B.abs(preds - target))
+    n_obs = target.numel()
+    return sum_abs_error, n_obs
+
+
+def _mean_absolute_error_compute(sum_abs_error: Tensor, n_obs: int) -> Tensor:
+    """Computes Mean Absolute Error.
+
+    Args:
+        sum_abs_error: Sum of absolute value of errors over all observations
+        n_obs: Number of predictions or observations
+
+    Example:
+        >>> preds = B.tensor([0., 1, 2, 3])
+        >>> target = B.tensor([0., 1, 2, 2])
+        >>> sum_abs_error, n_obs = _mean_absolute_error_update(preds, target)
+        >>> _mean_absolute_error_compute(sum_abs_error, n_obs)
+        tensor(0.2500)
+    """
+
+    return sum_abs_error / n_obs
+
+
+def mean_absolute_error(preds: Tensor, target: Tensor) -> Tensor:
+    """Computes mean absolute error.
+
+    Args:
+        preds: estimated labels
+        target: ground truth labels
+
+    Return:
+        Tensor with MAE
+
+    Example:
+        >>> from paddlemetrics.functional import mean_absolute_error
+        >>> x = B.tensor([0., 1, 2, 3])
+        >>> y = B.tensor([0., 1, 2, 2])
+        >>> mean_absolute_error(x, y)
+        tensor(0.2500)
+    """
+    sum_abs_error, n_obs = _mean_absolute_error_update(preds, target)
+    return _mean_absolute_error_compute(sum_abs_error, n_obs)
diff --git a/RE/paddlemetric/src/paddlemetrics/functional/regression/mean_absolute_percentage_error.py b/RE/paddlemetric/src/paddlemetrics/functional/regression/mean_absolute_percentage_error.py
new file mode 100644
index 00000000..862617c0
--- /dev/null
+++ b/RE/paddlemetric/src/paddlemetrics/functional/regression/mean_absolute_percentage_error.py
@@ -0,0 +1,91 @@
+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from typing import Tuple
+
+import paddleext.torchapi as B
+from paddleext.torchapi import  Tensor
+
+from paddlemetrics.utilities.checks import _check_same_shape
+
+
+def _mean_absolute_percentage_error_update(
+    preds: Tensor,
+    target: Tensor,
+    epsilon: float = 1.17e-06,
+) -> Tuple[Tensor, int]:
+    """Updates and returns variables required to compute Mean Percentage Error. Checks for same shape of input
+    tensors.
+
+    Args:
+        preds: Predicted tensor
+        target: Ground truth tensor
+        epsilon: Specifies the lower bound for target values. Any target value below epsilon
+            is set to epsilon (avoids ZeroDivisionError). default: 1.17e-06
+    """
+
+    _check_same_shape(preds, target)
+
+    abs_diff = B.abs(preds - target)
+    abs_per_error = abs_diff / B.clamp(B.abs(target), min=epsilon)
+
+    sum_abs_per_error = B.sum(abs_per_error)
+
+    num_obs = target.numel()
+
+    return sum_abs_per_error, num_obs
+
+
+def _mean_absolute_percentage_error_compute(sum_abs_per_error: Tensor, num_obs: int) -> Tensor:
+    """Computes Mean Absolute Percentage Error.
+
+    Args:
+        sum_abs_per_error: Sum of absolute value of percentage errors over all observations
+            (percentage error = (target - prediction) / target)
+        num_obs: Number of predictions or observations
+
+    Example:
+        >>> target = B.tensor([1, 10, 1e6])
+        >>> preds = B.tensor([0.9, 15, 1.2e6])
+        >>> sum_abs_per_error, num_obs = _mean_absolute_percentage_error_update(preds, target)
+        >>> _mean_absolute_percentage_error_compute(sum_abs_per_error, num_obs)
+        tensor(0.2667)
+    """
+
+    return sum_abs_per_error / num_obs
+
+
+def mean_absolute_percentage_error(preds: Tensor, target: Tensor) -> Tensor:
+    """Computes mean absolute percentage error.
+
+    Args:
+        preds: estimated labels
+        target: ground truth labels
+
+    Return:
+        Tensor with MAPE
+
+    Note:
+        The epsilon value is taken from `scikit-learn's implementation of MAPE`_.
+
+    Example:
+        >>> from paddlemetrics.functional import mean_absolute_percentage_error
+        >>> target = B.tensor([1, 10, 1e6])
+        >>> preds = B.tensor([0.9, 15, 1.2e6])
+        >>> mean_absolute_percentage_error(preds, target)
+        tensor(0.2667)
+    """
+    sum_abs_per_error, num_obs = _mean_absolute_percentage_error_update(preds, target)
+    mean_ape = _mean_absolute_percentage_error_compute(sum_abs_per_error, num_obs)
+
+    return mean_ape
diff --git a/RE/paddlemetric/src/paddlemetrics/functional/regression/mean_squared_error.py b/RE/paddlemetric/src/paddlemetrics/functional/regression/mean_squared_error.py
new file mode 100644
index 00000000..58af5d21
--- /dev/null
+++ b/RE/paddlemetric/src/paddlemetrics/functional/regression/mean_squared_error.py
@@ -0,0 +1,74 @@
+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from typing import Tuple
+
+import paddleext.torchapi as B
+from paddleext.torchapi import  Tensor
+
+from paddlemetrics.utilities.checks import _check_same_shape
+
+
+def _mean_squared_error_update(preds: Tensor, target: Tensor) -> Tuple[Tensor, int]:
+    """Updates and returns variables required to compute Mean Squared Error. Checks for same shape of input
+    tensors.
+
+    Args:
+        preds: Predicted tensor
+        target: Ground truth tensor
+    """
+    _check_same_shape(preds, target)
+    diff = preds - target
+    sum_squared_error = B.sum(diff * diff)
+    n_obs = target.numel()
+    return sum_squared_error, n_obs
+
+
+def _mean_squared_error_compute(sum_squared_error: Tensor, n_obs: int, squared: bool = True) -> Tensor:
+    """Computes Mean Squared Error.
+
+    Args:
+        sum_squared_error: Sum of square of errors over all observations
+        n_obs: Number of predictions or observations
+        squared: Returns RMSE value if set to False. default: True
+
+    Example:
+        >>> preds = B.tensor([0., 1, 2, 3])
+        >>> target = B.tensor([0., 1, 2, 2])
+        >>> sum_squared_error, n_obs = _mean_squared_error_update(preds, target)
+        >>> _mean_squared_error_compute(sum_squared_error, n_obs)
+        tensor(0.2500)
+    """
+    return sum_squared_error / n_obs if squared else B.sqrt(sum_squared_error / n_obs)
+
+
+def mean_squared_error(preds: Tensor, target: Tensor, squared: bool = True) -> Tensor:
+    """Computes mean squared error.
+
+    Args:
+        preds: estimated labels
+        target: ground truth labels
+        squared: returns RMSE value if set to False
+
+    Return:
+        Tensor with MSE
+
+    Example:
+        >>> from paddlemetrics.functional import mean_squared_error
+        >>> x = B.tensor([0., 1, 2, 3])
+        >>> y = B.tensor([0., 1, 2, 2])
+        >>> mean_squared_error(x, y)
+        tensor(0.2500)
+    """
+    sum_squared_error, n_obs = _mean_squared_error_update(preds, target)
+    return _mean_squared_error_compute(sum_squared_error, n_obs, squared=squared)
diff --git a/RE/paddlemetric/src/paddlemetrics/functional/regression/mean_squared_log_error.py b/RE/paddlemetric/src/paddlemetrics/functional/regression/mean_squared_log_error.py
new file mode 100644
index 00000000..7270ffc0
--- /dev/null
+++ b/RE/paddlemetric/src/paddlemetrics/functional/regression/mean_squared_log_error.py
@@ -0,0 +1,76 @@
+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from typing import Tuple
+
+import paddleext.torchapi as B
+from paddleext.torchapi import  Tensor
+
+from paddlemetrics.utilities.checks import _check_same_shape
+
+
+def _mean_squared_log_error_update(preds: Tensor, target: Tensor) -> Tuple[Tensor, int]:
+    """Returns variables required to compute Mean Squared Log Error. Checks for same shape of tensors.
+
+    Args:
+        preds: Predicted tensor
+        target: Ground truth tensor
+    """
+
+    _check_same_shape(preds, target)
+    sum_squared_log_error = B.sum(B.pow(B.log1p(preds) - B.log1p(target), 2))
+    n_obs = target.numel()
+    return sum_squared_log_error, n_obs
+
+
+def _mean_squared_log_error_compute(sum_squared_log_error: Tensor, n_obs: int) -> Tensor:
+    """Computes Mean Squared Log Error.
+
+    Args:
+        sum_squared_log_error: Sum of square of log errors over all observations
+                                (log error = log(target) - log(prediction))
+        n_obs: Number of predictions or observations
+
+    Example:
+        >>> preds = B.tensor([0., 1, 2, 3])
+        >>> target = B.tensor([0., 1, 2, 2])
+        >>> sum_squared_log_error, n_obs = _mean_squared_log_error_update(preds, target)
+        >>> _mean_squared_log_error_compute(sum_squared_log_error, n_obs)
+        tensor(0.0207)
+    """
+
+    return sum_squared_log_error / n_obs
+
+
+def mean_squared_log_error(preds: Tensor, target: Tensor) -> Tensor:
+    """Computes mean squared log error.
+
+    Args:
+        preds: estimated labels
+        target: ground truth labels
+
+    Return:
+        Tensor with RMSLE
+
+    Example:
+        >>> from paddlemetrics.functional import mean_squared_log_error
+        >>> x = B.tensor([0., 1, 2, 3])
+        >>> y = B.tensor([0., 1, 2, 2])
+        >>> mean_squared_log_error(x, y)
+        tensor(0.0207)
+
+    .. note::
+        Half precision is only support on GPU for this metric
+    """
+    sum_squared_log_error, n_obs = _mean_squared_log_error_update(preds, target)
+    return _mean_squared_log_error_compute(sum_squared_log_error, n_obs)
diff --git a/RE/paddlemetric/src/paddlemetrics/functional/regression/pearson.py b/RE/paddlemetric/src/paddlemetrics/functional/regression/pearson.py
new file mode 100644
index 00000000..e1f7dd82
--- /dev/null
+++ b/RE/paddlemetric/src/paddlemetrics/functional/regression/pearson.py
@@ -0,0 +1,102 @@
+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from typing import Tuple
+
+import paddleext.torchapi as B
+from paddleext.torchapi import  Tensor
+
+from paddlemetrics.utilities.checks import _check_same_shape
+
+
+def _pearson_corrcoef_update(
+    preds: Tensor,
+    target: Tensor,
+    mean_x: Tensor,
+    mean_y: Tensor,
+    var_x: Tensor,
+    var_y: Tensor,
+    corr_xy: Tensor,
+    n_prior: Tensor,
+) -> Tuple[Tensor, Tensor, Tensor, Tensor, Tensor, Tensor]:
+    """Updates and returns variables required to compute Pearson Correlation Coefficient. Checks for same shape of
+    input tensors.
+
+    Args:
+        mean_x: current mean estimate of x tensor
+        mean_y: current mean estimate of y tensor
+        var_x: current variance estimate of x tensor
+        var_y: current variance estimate of y tensor
+        corr_xy: current covariance estimate between x and y tensor
+        n_prior: current number of observed observations
+    """
+    # Data checking
+    _check_same_shape(preds, target)
+    preds = preds.squeeze()
+    target = target.squeeze()
+    if preds.ndim > 1 or target.ndim > 1:
+        raise ValueError("Expected both predictions and target to be 1 dimensional tensors.")
+
+    n_obs = preds.numel()
+    mx_new = (n_prior * mean_x + preds.mean() * n_obs) / (n_prior + n_obs)
+    my_new = (n_prior * mean_y + target.mean() * n_obs) / (n_prior + n_obs)
+    n_prior += n_obs
+    var_x += ((preds - mx_new) * (preds - mean_x)).sum()
+    var_y += ((target - my_new) * (target - mean_y)).sum()
+    corr_xy += ((preds - mx_new) * (target - mean_y)).sum()
+    mean_x = mx_new
+    mean_y = my_new
+
+    return mean_x, mean_y, var_x, var_y, corr_xy, n_prior
+
+
+def _pearson_corrcoef_compute(
+    var_x: Tensor,
+    var_y: Tensor,
+    corr_xy: Tensor,
+    nb: Tensor,
+) -> Tensor:
+    """Computes the final pearson correlation based on accumulated statistics.
+
+    Args:
+        var_x: variance estimate of x tensor
+        var_y: variance estimate of y tensor
+        corr_xy: covariance estimate between x and y tensor
+        nb: number of observations
+    """
+    var_x /= nb - 1
+    var_y /= nb - 1
+    corr_xy /= nb - 1
+    corrcoef = (corr_xy / (var_x * var_y).sqrt()).squeeze()
+    return B.clamp(corrcoef, -1.0, 1.0)
+
+
+def pearson_corrcoef(preds: Tensor, target: Tensor) -> Tensor:
+    """Computes pearson correlation coefficient.
+
+    Args:
+        preds: estimated scores
+        target: ground truth scores
+
+    Example:
+        >>> from paddlemetrics.functional import pearson_corrcoef
+        >>> target = B.tensor([3, -0.5, 2, 7])
+        >>> preds = B.tensor([2.5, 0.0, 2, 8])
+        >>> pearson_corrcoef(preds, target)
+        tensor(0.9849)
+    """
+    _temp = B.zeros(1, dtype=preds.dtype, device=preds.device)
+    mean_x, mean_y, var_x = _temp.clone(), _temp.clone(), _temp.clone()
+    var_y, corr_xy, nb = _temp.clone(), _temp.clone(), _temp.clone()
+    _, _, var_x, var_y, corr_xy, nb = _pearson_corrcoef_update(preds, target, mean_x, mean_y, var_x, var_y, corr_xy, nb)
+    return _pearson_corrcoef_compute(var_x, var_y, corr_xy, nb)
diff --git a/RE/paddlemetric/src/paddlemetrics/functional/regression/r2.py b/RE/paddlemetric/src/paddlemetrics/functional/regression/r2.py
new file mode 100644
index 00000000..a8321912
--- /dev/null
+++ b/RE/paddlemetric/src/paddlemetrics/functional/regression/r2.py
@@ -0,0 +1,173 @@
+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from typing import Tuple
+
+import paddleext.torchapi as B
+from paddleext.torchapi import  Tensor
+
+from paddlemetrics.utilities import rank_zero_warn
+from paddlemetrics.utilities.checks import _check_same_shape
+
+
+def _r2_score_update(preds: Tensor, target: Tensor) -> Tuple[Tensor, Tensor, Tensor, Tensor]:
+    """Updates and returns variables required to compute R2 score. Checks for same shape and 1D/2D input tensors.
+
+    Args:
+        preds: Predicted tensor
+        target: Ground truth tensor
+    """
+
+    _check_same_shape(preds, target)
+    if preds.ndim > 2:
+        raise ValueError(
+            "Expected both prediction and target to be 1D or 2D tensors,"
+            f" but received tensors with dimension {preds.shape}"
+        )
+
+    sum_obs = B.sum(target, dim=0)
+    sum_squared_obs = B.sum(target * target, dim=0)
+    residual = target - preds
+    rss = B.sum(residual * residual, dim=0)
+    n_obs = target.size(0)
+
+    return sum_squared_obs, sum_obs, rss, n_obs
+
+
+def _r2_score_compute(
+    sum_squared_obs: Tensor,
+    sum_obs: Tensor,
+    rss: Tensor,
+    n_obs: Tensor,
+    adjusted: int = 0,
+    multioutput: str = "uniform_average",
+) -> Tensor:
+    """Computes R2 score.
+
+    Args:
+        sum_squared_obs: Sum of square of all observations
+        sum_obs: Sum of all observations
+        rss: Residual sum of squares
+        n_obs: Number of predictions or observations
+        adjusted: number of independent regressors for calculating adjusted r2 score.
+            Default 0 (standard r2 score).
+        multioutput: Defines aggregation in the case of multiple output scores. Can be one
+            of the following strings (default is `'uniform_average'`.):
+
+            * `'raw_values'` returns full set of scores
+            * `'uniform_average'` scores are uniformly averaged
+            * `'variance_weighted'` scores are weighted by their individual variances
+
+    Example:
+        >>> target = B.tensor([[0.5, 1], [-1, 1], [7, -6]])
+        >>> preds = B.tensor([[0, 2], [-1, 2], [8, -5]])
+        >>> sum_squared_obs, sum_obs, rss, n_obs = _r2_score_update(preds, target)
+        >>> _r2_score_compute(sum_squared_obs, sum_obs, rss, n_obs, multioutput="raw_values")
+        tensor([0.9654, 0.9082])
+    """
+    if n_obs < 2:
+        raise ValueError("Needs at least two samples to calculate r2 score.")
+
+    mean_obs = sum_obs / n_obs
+    tss = sum_squared_obs - sum_obs * mean_obs
+    raw_scores = 1 - (rss / tss)
+
+    if multioutput == "raw_values":
+        r2 = raw_scores
+    elif multioutput == "uniform_average":
+        r2 = B.mean(raw_scores)
+    elif multioutput == "variance_weighted":
+        tss_sum = B.sum(tss)
+        r2 = B.sum(tss / tss_sum * raw_scores)
+    else:
+        raise ValueError(
+            "Argument `multioutput` must be either `raw_values`,"
+            f" `uniform_average` or `variance_weighted`. Received {multioutput}."
+        )
+
+    if adjusted < 0 or not isinstance(adjusted, int):
+        raise ValueError("`adjusted` parameter should be an integer larger or" " equal to 0.")
+
+    if adjusted != 0:
+        if adjusted > n_obs - 1:
+            rank_zero_warn(
+                "More independent regressions than data points in"
+                " adjusted r2 score. Falls back to standard r2 score.",
+                UserWarning,
+            )
+        elif adjusted == n_obs - 1:
+            rank_zero_warn("Division by zero in adjusted r2 score. Falls back to" " standard r2 score.", UserWarning)
+        else:
+            r2 = 1 - (1 - r2) * (n_obs - 1) / (n_obs - adjusted - 1)
+    return r2
+
+
+def r2_score(
+    preds: Tensor,
+    target: Tensor,
+    adjusted: int = 0,
+    multioutput: str = "uniform_average",
+) -> Tensor:
+    r"""
+    Computes r2 score also known as `R2 Score_Coefficient Determination`_:
+
+    .. math:: R^2 = 1 - \frac{SS_{res}}{SS_{tot}}
+
+    where :math:`SS_{res}=\sum_i (y_i - f(x_i))^2` is the sum of residual squares, and
+    :math:`SS_{tot}=\sum_i (y_i - \bar{y})^2` is total sum of squares. Can also calculate
+    adjusted r2 score given by
+
+    .. math:: R^2_{adj} = 1 - \frac{(1-R^2)(n-1)}{n-k-1}
+
+    where the parameter :math:`k` (the number of independent regressors) should
+    be provided as the ``adjusted`` argument.
+
+    Args:
+        preds: estimated labels
+        target: ground truth labels
+        adjusted: number of independent regressors for calculating adjusted r2 score.
+            Default 0 (standard r2 score).
+        multioutput: Defines aggregation in the case of multiple output scores. Can be one
+            of the following strings (default is ``'uniform_average'``.):
+
+            * ``'raw_values'`` returns full set of scores
+            * ``'uniform_average'`` scores are uniformly averaged
+            * ``'variance_weighted'`` scores are weighted by their individual variances
+
+    Raises:
+        ValueError:
+            If both ``preds`` and ``targets`` are not ``1D`` or ``2D`` tensors.
+        ValueError:
+            If ``len(preds)`` is less than ``2``
+            since at least ``2`` sampels are needed to calculate r2 score.
+        ValueError:
+            If ``multioutput`` is not one of ``raw_values``,
+            ``uniform_average`` or ``variance_weighted``.
+        ValueError:
+            If ``adjusted`` is not an ``integer`` greater than ``0``.
+
+    Example:
+        >>> from paddlemetrics.functional import r2_score
+        >>> target = B.tensor([3, -0.5, 2, 7])
+        >>> preds = B.tensor([2.5, 0.0, 2, 8])
+        >>> r2_score(preds, target)
+        tensor(0.9486)
+
+        >>> target = B.tensor([[0.5, 1], [-1, 1], [7, -6]])
+        >>> preds = B.tensor([[0, 2], [-1, 2], [8, -5]])
+        >>> r2_score(preds, target, multioutput='raw_values')
+        tensor([0.9654, 0.9082])
+
+    """
+    sum_squared_obs, sum_obs, rss, n_obs = _r2_score_update(preds, target)
+    return _r2_score_compute(sum_squared_obs, sum_obs, rss, n_obs, adjusted, multioutput)
diff --git a/RE/paddlemetric/src/paddlemetrics/functional/regression/spearman.py b/RE/paddlemetric/src/paddlemetrics/functional/regression/spearman.py
new file mode 100644
index 00000000..62f7a9d4
--- /dev/null
+++ b/RE/paddlemetric/src/paddlemetrics/functional/regression/spearman.py
@@ -0,0 +1,129 @@
+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from typing import Tuple
+
+import paddleext.torchapi as B
+from paddleext.torchapi import  Tensor
+
+from paddlemetrics.utilities.checks import _check_same_shape
+
+
+def _find_repeats(data: Tensor) -> Tensor:
+    """find and return values which have repeats i.e. the same value are more than once in the tensor."""
+    temp = data.detach().clone()
+    temp = temp.sort()[0]
+
+    change = B.cat([B.tensor([True], device=temp.device), temp[1:] != temp[:-1]])
+    unique = temp[change]
+    change_idx = B.cat([B.nonzero(change), B.tensor([[temp.numel()]], device=temp.device)]).flatten()
+    freq = change_idx[1:] - change_idx[:-1]
+    atleast2 = freq > 1
+    return unique[atleast2]
+
+
+def _rank_data(data: Tensor) -> Tensor:
+    """Calculate the rank for each element of a tensor. The rank refers to the indices of an element in the
+    corresponding sorted tensor (starting from 1). Duplicates of the same value will be assigned the mean of their
+    rank.
+
+    Adopted from:     `Rank of element tensor`_
+    """
+    n = data.numel()
+    rank = B.empty_like(data)
+    idx = data.argsort()
+    rank[idx[:n]] = B.arange(1, n + 1, dtype=data.dtype, device=data.device)
+
+    repeats = _find_repeats(data)
+    for r in repeats:
+        condition = data == r
+        rank[condition] = rank[condition].mean()
+    return rank
+
+
+def _spearman_corrcoef_update(preds: Tensor, target: Tensor) -> Tuple[Tensor, Tensor]:
+    """Updates and returns variables required to compute Spearman Correlation Coefficient. Checks for same shape
+    and type of input tensors.
+
+    Args:
+        preds: Predicted tensor
+        target: Ground truth tensor
+    """
+
+    if preds.dtype != target.dtype:
+        raise TypeError(
+            "Expected `preds` and `target` to have the same data type."
+            f" Got preds: {preds.dtype} and target: {target.dtype}."
+        )
+    _check_same_shape(preds, target)
+    preds = preds.squeeze()
+    target = target.squeeze()
+    if preds.ndim > 1 or target.ndim > 1:
+        raise ValueError("Expected both predictions and target to be 1 dimensional tensors.")
+    return preds, target
+
+
+def _spearman_corrcoef_compute(preds: Tensor, target: Tensor, eps: float = 1e-6) -> Tensor:
+    """Computes Spearman Correlation Coefficient.
+
+    Args:
+        preds: Predicted tensor
+        target: Ground truth tensor
+        eps: Avoids ZeroDivisionError. default: 1e-6
+
+    Example:
+        >>> target = B.tensor([3, -0.5, 2, 7])
+        >>> preds = B.tensor([2.5, 0.0, 2, 8])
+        >>> preds, target = _spearman_corrcoef_update(preds, target)
+        >>> _spearman_corrcoef_compute(preds, target)
+        tensor(1.0000)
+    """
+
+    preds = _rank_data(preds)
+    target = _rank_data(target)
+
+    preds_diff = preds - preds.mean()
+    target_diff = target - target.mean()
+
+    cov = (preds_diff * target_diff).mean()
+    preds_std = B.sqrt((preds_diff * preds_diff).mean())
+    target_std = B.sqrt((target_diff * target_diff).mean())
+
+    corrcoef = cov / (preds_std * target_std + eps)
+    return B.clamp(corrcoef, -1.0, 1.0)
+
+
+def spearman_corrcoef(preds: Tensor, target: Tensor) -> Tensor:
+    r"""
+     Computes `spearmans rank correlation coefficient`_:
+
+    .. math:
+        r_s = = \frac{cov(rg_x, rg_y)}{\sigma_{rg_x} * \sigma_{rg_y}}
+
+    where :math:`rg_x` and :math:`rg_y` are the rank associated to the variables x and y. Spearmans correlations
+    coefficient corresponds to the standard pearsons correlation coefficient calculated on the rank variables.
+
+    Args:
+        preds: estimated scores
+        target: ground truth scores
+
+    Example:
+        >>> from paddlemetrics.functional import spearman_corrcoef
+        >>> target = B.tensor([3, -0.5, 2, 7])
+        >>> preds = B.tensor([2.5, 0.0, 2, 8])
+        >>> spearman_corrcoef(preds, target)
+        tensor(1.0000)
+
+    """
+    preds, target = _spearman_corrcoef_update(preds, target)
+    return _spearman_corrcoef_compute(preds, target)
diff --git a/RE/paddlemetric/src/paddlemetrics/functional/regression/symmetric_mean_absolute_percentage_error.py b/RE/paddlemetric/src/paddlemetrics/functional/regression/symmetric_mean_absolute_percentage_error.py
new file mode 100644
index 00000000..89eadf9e
--- /dev/null
+++ b/RE/paddlemetric/src/paddlemetrics/functional/regression/symmetric_mean_absolute_percentage_error.py
@@ -0,0 +1,99 @@
+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from typing import Tuple
+
+import paddleext.torchapi as B
+from paddleext.torchapi import  Tensor
+
+from paddlemetrics.utilities.checks import _check_same_shape
+
+
+def _symmetric_mean_absolute_percentage_error_update(
+    preds: Tensor,
+    target: Tensor,
+    epsilon: float = 1.17e-06,
+) -> Tuple[Tensor, int]:
+    """Updates and returns variables required to compute Symmetric Mean Absolute Percentage Error. Checks for same
+    shape of input tensors.
+
+    Args:
+        preds: Predicted tensor
+        target: Ground truth tensor
+        epsilon: Avoids ZeroDivisionError. default: 1.17e-06
+    """
+
+    _check_same_shape(preds, target)
+
+    abs_diff = B.abs(preds - target)
+    abs_per_error = abs_diff / B.clamp(B.abs(target) + B.abs(preds), min=epsilon)
+
+    sum_abs_per_error = 2 * B.sum(abs_per_error)
+
+    num_obs = target.numel()
+
+    return sum_abs_per_error, num_obs
+
+
+def _symmetric_mean_absolute_percentage_error_compute(sum_abs_per_error: Tensor, num_obs: int) -> Tensor:
+    """Computes Symmetric Mean Absolute Percentage Error.
+
+    Args:
+        sum_abs_per_error: Sum of values of symmetric absolute percentage errors over all observations
+            (symmetric absolute percentage error = 2 * |target - prediction| / (target + prediction))
+        num_obs: Number of predictions or observations
+
+    Example:
+        >>> target = B.tensor([1, 10, 1e6])
+        >>> preds = B.tensor([0.9, 15, 1.2e6])
+        >>> sum_abs_per_error, num_obs = _symmetric_mean_absolute_percentage_error_update(preds, target)
+        >>> _symmetric_mean_absolute_percentage_error_compute(sum_abs_per_error, num_obs)
+        tensor(0.2290)
+    """
+
+    return sum_abs_per_error / num_obs
+
+
+def symmetric_mean_absolute_percentage_error(preds: Tensor, target: Tensor) -> Tensor:
+    r"""
+    Computes symmetric mean absolute percentage error (SMAPE_):
+
+    .. math:: \text{SMAPE} = \frac{2}{n}\sum_1^n\frac{max(|   y_i - \hat{y_i} |}{| y_i | + | \hat{y_i} |, \epsilon)}
+
+    Where :math:`y` is a tensor of target values, and :math:`\hat{y}` is a tensor of predictions.
+
+    Args:
+        preds: estimated labels
+        target: ground truth labels
+
+    Return:
+        Tensor with SMAPE.
+
+    Example:
+        >>> from paddlemetrics.functional import symmetric_mean_absolute_percentage_error
+        >>> target = B.tensor([1, 10, 1e6])
+        >>> preds = B.tensor([0.9, 15, 1.2e6])
+        >>> symmetric_mean_absolute_percentage_error(preds, target)
+        tensor(0.2290)
+
+    """
+    sum_abs_per_error, num_obs = _symmetric_mean_absolute_percentage_error_update(
+        preds,
+        target,
+    )
+    mean_ape = _symmetric_mean_absolute_percentage_error_compute(
+        sum_abs_per_error,
+        num_obs,
+    )
+
+    return mean_ape
diff --git a/RE/paddlemetric/src/paddlemetrics/functional/regression/tweedie_deviance.py b/RE/paddlemetric/src/paddlemetrics/functional/regression/tweedie_deviance.py
new file mode 100644
index 00000000..7cb366a2
--- /dev/null
+++ b/RE/paddlemetric/src/paddlemetrics/functional/regression/tweedie_deviance.py
@@ -0,0 +1,139 @@
+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from typing import Tuple
+
+import paddleext.torchapi as B
+from paddleext.torchapi import  Tensor
+
+from paddlemetrics.utilities.checks import _check_same_shape
+
+
+def _tweedie_deviance_score_update(preds: Tensor, targets: Tensor, power: float = 0.0) -> Tuple[Tensor, Tensor]:
+    """Updates and returns variables required to compute Deviance Score for the given power. Checks for same shape
+    of input tensors.
+
+    Args:
+        preds: Predicted tensor
+        targets: Ground truth tensor
+        power: see :func:`tweedie_deviance_score`
+
+    Example:
+        >>> targets = B.tensor([1.0, 2.0, 3.0, 4.0])
+        >>> preds = B.tensor([4.0, 3.0, 2.0, 1.0])
+        >>> _tweedie_deviance_score_update(preds, targets, power=2)
+        (tensor(4.8333), tensor(4))
+    """
+    _check_same_shape(preds, targets)
+
+    zero_tensor = B.zeros(preds.shape, device=preds.device)
+
+    if 0 < power < 1:
+        raise ValueError(f"Deviance Score is not defined for power={power}.")
+
+    if power == 0:
+        deviance_score = B.pow(targets - preds, exponent=2)
+    elif power == 1:
+        # Poisson distribution
+        if B.any(preds <= 0) or B.any(targets < 0):
+            raise ValueError(
+                f"For power={power}, 'preds' has to be strictly positive and 'targets' cannot be negative."
+            )
+
+        deviance_score = 2 * (targets * B.log(targets / preds) + preds - targets)
+    elif power == 2:
+        # Gamma distribution
+        if B.any(preds <= 0) or B.any(targets <= 0):
+            raise ValueError(f"For power={power}, both 'preds' and 'targets' have to be strictly positive.")
+
+        deviance_score = 2 * (B.log(preds / targets) + (targets / preds) - 1)
+    else:
+        if power < 0:
+            if B.any(preds <= 0):
+                raise ValueError(f"For power={power}, 'preds' has to be strictly positive.")
+        elif 1 < power < 2:
+            if B.any(preds <= 0) or B.any(targets < 0):
+                raise ValueError(
+                    f"For power={power}, 'targets' has to be strictly positive and 'preds' cannot be negative."
+                )
+        else:
+            if B.any(preds <= 0) or B.any(targets <= 0):
+                raise ValueError(f"For power={power}, both 'preds' and 'targets' have to be strictly positive.")
+
+        term_1 = B.pow(B.max(targets, zero_tensor), 2 - power) / ((1 - power) * (2 - power))
+        term_2 = targets * B.pow(preds, 1 - power) / (1 - power)
+        term_3 = B.pow(preds, 2 - power) / (2 - power)
+        deviance_score = 2 * (term_1 - term_2 + term_3)
+
+    sum_deviance_score = B.sum(deviance_score)
+    num_observations = B.tensor(B.numel(deviance_score), device=preds.device)
+
+    return sum_deviance_score, num_observations
+
+
+def _tweedie_deviance_score_compute(sum_deviance_score: Tensor, num_observations: Tensor) -> Tensor:
+    """Computes Deviance Score.
+
+    Args:
+        sum_deviance_score: Sum of deviance scores accumalated until now.
+        num_observations: Number of observations encountered until now.
+
+    Example:
+        >>> targets = B.tensor([1.0, 2.0, 3.0, 4.0])
+        >>> preds = B.tensor([4.0, 3.0, 2.0, 1.0])
+        >>> sum_deviance_score, num_observations = _tweedie_deviance_score_update(preds, targets, power=2)
+        >>> _tweedie_deviance_score_compute(sum_deviance_score, num_observations)
+        tensor(1.2083)
+    """
+
+    return sum_deviance_score / num_observations
+
+
+def tweedie_deviance_score(preds: Tensor, targets: Tensor, power: float = 0.0) -> Tensor:
+    r"""
+    Computes the `Tweedie Deviance Score`_ between targets and predictions:
+
+    .. math::
+        deviance\_score(\hat{y},y) =
+        \begin{cases}
+        (\hat{y} - y)^2, & \text{for }power=0\\
+        2 * (y * log(\frac{y}{\hat{y}}) + \hat{y} - y),  & \text{for }power=1\\
+        2 * (log(\frac{\hat{y}}{y}) + \frac{y}{\hat{y}} - 1),  & \text{for }power=2\\
+        2 * (\frac{(max(y,0))^{2}}{(1 - power)(2 - power)} - \frac{y(\hat{y})^{1 - power}}{1 - power} + \frac{(\hat{y})
+            ^{2 - power}}{2 - power}), & \text{otherwise}
+        \end{cases}
+
+    where :math:`y` is a tensor of targets values, and :math:`\hat{y}` is a tensor of predictions.
+
+    Args:
+        preds: Predicted tensor with shape ``(N,...)``
+        targets: Ground truth tensor with shape ``(N,...)``
+        power:
+            - power < 0 : Extreme stable distribution. (Requires: preds > 0.)
+            - power = 0 : Normal distribution. (Requires: targets and preds can be any real numbers.)
+            - power = 1 : Poisson distribution. (Requires: targets >= 0 and y_pred > 0.)
+            - 1 < p < 2 : Compound Poisson distribution. (Requires: targets >= 0 and preds > 0.)
+            - power = 2 : Gamma distribution. (Requires: targets > 0 and preds > 0.)
+            - power = 3 : Inverse Gaussian distribution. (Requires: targets > 0 and preds > 0.)
+            - otherwise : Positive stable distribution. (Requires: targets > 0 and preds > 0.)
+
+    Example:
+        >>> from paddlemetrics.functional import tweedie_deviance_score
+        >>> targets = B.tensor([1.0, 2.0, 3.0, 4.0])
+        >>> preds = B.tensor([4.0, 3.0, 2.0, 1.0])
+        >>> tweedie_deviance_score(preds, targets, power=2)
+        tensor(1.2083)
+
+    """
+    sum_deviance_score, num_observations = _tweedie_deviance_score_update(preds, targets, power=power)
+    return _tweedie_deviance_score_compute(sum_deviance_score, num_observations)
diff --git a/RE/paddlemetric/src/paddlemetrics/functional/retrieval/__init__.py b/RE/paddlemetric/src/paddlemetrics/functional/retrieval/__init__.py
new file mode 100644
index 00000000..d05abb6a
--- /dev/null
+++ b/RE/paddlemetric/src/paddlemetrics/functional/retrieval/__init__.py
@@ -0,0 +1,22 @@
+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from paddlemetrics.functional.retrieval.average_precision import retrieval_average_precision  # noqa: F401
+from paddlemetrics.functional.retrieval.fall_out import retrieval_fall_out  # noqa: F401
+from paddlemetrics.functional.retrieval.hit_rate import retrieval_hit_rate  # noqa: F401
+from paddlemetrics.functional.retrieval.ndcg import retrieval_normalized_dcg  # noqa: F401
+from paddlemetrics.functional.retrieval.precision import retrieval_precision  # noqa: F401
+from paddlemetrics.functional.retrieval.r_precision import retrieval_r_precision  # noqa: F401
+from paddlemetrics.functional.retrieval.recall import retrieval_recall  # noqa: F401
+from paddlemetrics.functional.retrieval.reciprocal_rank import retrieval_reciprocal_rank  # noqa: F401
diff --git a/RE/paddlemetric/src/paddlemetrics/functional/retrieval/average_precision.py b/RE/paddlemetric/src/paddlemetrics/functional/retrieval/average_precision.py
new file mode 100644
index 00000000..0b067a89
--- /dev/null
+++ b/RE/paddlemetric/src/paddlemetrics/functional/retrieval/average_precision.py
@@ -0,0 +1,49 @@
+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import paddleext.torchapi as B
+from paddleext.torchapi import  Tensor, tensor
+
+from paddlemetrics.utilities.checks import _check_retrieval_functional_inputs
+
+
+def retrieval_average_precision(preds: Tensor, target: Tensor) -> Tensor:
+    """Computes average precision (for information retrieval), as explained in `IR Average precision`_.
+
+    ``preds`` and ``target`` should be of the same shape and live on the same device. If no ``target`` is ``True``,
+    ``0`` is returned. ``target`` must be either `bool` or `integers` and ``preds`` must be `float`,
+    otherwise an error is raised.
+
+    Args:
+        preds: estimated probabilities of each document to be relevant.
+        target: ground truth about each document being relevant or not.
+
+    Return:
+        a single-value tensor with the average precision (AP) of the predictions ``preds`` w.r.t. the labels ``target``.
+
+    Example:
+        >>> from paddlemetrics.functional import retrieval_average_precision
+        >>> preds = tensor([0.2, 0.3, 0.5])
+        >>> target = tensor([True, False, True])
+        >>> retrieval_average_precision(preds, target)
+        tensor(0.8333)
+    """
+    preds, target = _check_retrieval_functional_inputs(preds, target)
+
+    if not target.sum():
+        return tensor(0.0, device=preds.device)
+
+    target = target[B.argsort(preds, dim=-1, descending=True)]
+    positions = B.arange(1, len(target) + 1, device=target.device, dtype=B.float32)[target > 0]
+    res = B.div((B.arange(len(positions), device=positions.device, dtype=B.float32) + 1), positions).mean()
+    return res
diff --git a/RE/paddlemetric/src/paddlemetrics/functional/retrieval/fall_out.py b/RE/paddlemetric/src/paddlemetrics/functional/retrieval/fall_out.py
new file mode 100644
index 00000000..10c5762b
--- /dev/null
+++ b/RE/paddlemetric/src/paddlemetrics/functional/retrieval/fall_out.py
@@ -0,0 +1,62 @@
+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from typing import Optional
+
+import paddleext.torchapi as B
+from paddleext.torchapi import  Tensor, tensor
+
+from paddlemetrics.utilities.checks import _check_retrieval_functional_inputs
+
+
+def retrieval_fall_out(preds: Tensor, target: Tensor, k: Optional[int] = None) -> Tensor:
+    """Computes the Fall-out (for information retrieval), as explained in `IR Fall-out`_ Fall-out is the fraction
+    of non-relevant documents retrieved among all the non-relevant documents.
+
+    ``preds`` and ``target`` should be of the same shape and live on the same device. If no ``target`` is ``True``,
+    ``0`` is returned. ``target`` must be either `bool` or `integers` and ``preds`` must be `float`,
+    otherwise an error is raised. If you want to measure Fall-out@K, ``k`` must be a positive integer.
+
+    Args:
+        preds: estimated probabilities of each document to be relevant.
+        target: ground truth about each document being relevant or not.
+        k: consider only the top k elements (default: None, which considers them all)
+
+    Returns:
+        a single-value tensor with the fall-out (at ``k``) of the predictions ``preds`` w.r.t. the labels ``target``.
+
+    Raises:
+        ValueError:
+            If ``k`` parameter is not `None` or an integer larger than 0
+
+    Example:
+        >>> from  paddlemetrics.functional import retrieval_fall_out
+        >>> preds = tensor([0.2, 0.3, 0.5])
+        >>> target = tensor([True, False, True])
+        >>> retrieval_fall_out(preds, target, k=2)
+        tensor(1.)
+    """
+    preds, target = _check_retrieval_functional_inputs(preds, target)
+
+    k = preds.shape[-1] if k is None else k
+
+    if not (isinstance(k, int) and k > 0):
+        raise ValueError("`k` has to be a positive integer or None")
+
+    target = 1 - target
+
+    if not target.sum():
+        return tensor(0.0, device=preds.device)
+
+    relevant = target[B.argsort(preds, dim=-1, descending=True)][:k].sum().float()
+    return relevant / target.sum()
diff --git a/RE/paddlemetric/src/paddlemetrics/functional/retrieval/hit_rate.py b/RE/paddlemetric/src/paddlemetrics/functional/retrieval/hit_rate.py
new file mode 100644
index 00000000..83336a50
--- /dev/null
+++ b/RE/paddlemetric/src/paddlemetrics/functional/retrieval/hit_rate.py
@@ -0,0 +1,57 @@
+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from typing import Optional
+
+import paddleext.torchapi as B
+from paddleext.torchapi import  Tensor, tensor
+
+from paddlemetrics.utilities.checks import _check_retrieval_functional_inputs
+
+
+def retrieval_hit_rate(preds: Tensor, target: Tensor, k: Optional[int] = None) -> Tensor:
+    """Computes the hit rate (for information retrieval). The hit rate is 1.0 if there is at least one relevant
+    document among all the top `k` retrieved documents.
+
+    ``preds`` and ``target`` should be of the same shape and live on the same device. If no ``target`` is ``True``,
+    ``0`` is returned. ``target`` must be either `bool` or `integers` and ``preds`` must be `float`,
+    otherwise an error is raised. If you want to measure HitRate@K, ``k`` must be a positive integer.
+
+    Args:
+        preds: estimated probabilities of each document to be relevant.
+        target: ground truth about each document being relevant or not.
+        k: consider only the top k elements (default: None, which considers them all)
+
+    Returns:
+        a single-value tensor with the hit rate (at ``k``) of the predictions ``preds`` w.r.t. the labels ``target``.
+
+    Raises:
+        ValueError:
+            If ``k`` parameter is not `None` or an integer larger than 0
+
+    Example:
+        >>> preds = tensor([0.2, 0.3, 0.5])
+        >>> target = tensor([True, False, True])
+        >>> retrieval_hit_rate(preds, target, k=2)
+        tensor(1.)
+    """
+    preds, target = _check_retrieval_functional_inputs(preds, target)
+
+    if k is None:
+        k = preds.shape[-1]
+
+    if not (isinstance(k, int) and k > 0):
+        raise ValueError("`k` has to be a positive integer or None")
+
+    relevant = target[B.argsort(preds, dim=-1, descending=True)][:k].sum()
+    return (relevant > 0).float()
diff --git a/RE/paddlemetric/src/paddlemetrics/functional/retrieval/ndcg.py b/RE/paddlemetric/src/paddlemetrics/functional/retrieval/ndcg.py
new file mode 100644
index 00000000..73fedad5
--- /dev/null
+++ b/RE/paddlemetric/src/paddlemetrics/functional/retrieval/ndcg.py
@@ -0,0 +1,72 @@
+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from typing import Optional
+
+import paddleext.torchapi as B
+from paddleext.torchapi import  Tensor
+
+from paddlemetrics.utilities.checks import _check_retrieval_functional_inputs
+
+
+def _dcg(target: Tensor) -> Tensor:
+    """Computes Discounted Cumulative Gain for input tensor."""
+    denom = B.log2(B.arange(target.shape[-1], device=target.device) + 2.0)
+    return (target / denom).sum(dim=-1)
+
+
+def retrieval_normalized_dcg(preds: Tensor, target: Tensor, k: Optional[int] = None) -> Tensor:
+    """Computes `Normalized Discounted Cumulative Gain`_ (for information retrieval).
+
+    ``preds`` and ``target`` should be of the same shape and live on the same device.
+    ``target`` must be either `bool` or `integers` and ``preds`` must be `float`,
+    otherwise an error is raised.
+
+    Args:
+        preds: estimated probabilities of each document to be relevant.
+        target: ground truth about each document relevance.
+        k: consider only the top k elements (default: None, which considers them all)
+
+    Return:
+        a single-value tensor with the nDCG of the predictions ``preds`` w.r.t. the labels ``target``.
+
+    Raises:
+        ValueError:
+            If ``k`` parameter is not `None` or an integer larger than 0
+
+    Example:
+        >>> from paddlemetrics.functional import retrieval_normalized_dcg
+        >>> preds = B.tensor([.1, .2, .3, 4, 70])
+        >>> target = B.tensor([10, 0, 0, 1, 5])
+        >>> retrieval_normalized_dcg(preds, target)
+        tensor(0.6957)
+    """
+    preds, target = _check_retrieval_functional_inputs(preds, target, allow_non_binary_target=True)
+
+    k = preds.shape[-1] if k is None else k
+
+    if not (isinstance(k, int) and k > 0):
+        raise ValueError("`k` has to be a positive integer or None")
+
+    sorted_target = target[B.argsort(preds, dim=-1, descending=True)][:k]
+    ideal_target = B.sort(target, descending=True)[0][:k]
+
+    ideal_dcg = _dcg(ideal_target)
+    target_dcg = _dcg(sorted_target)
+
+    # filter undefined scores
+    all_irrelevant = ideal_dcg == 0
+    target_dcg[all_irrelevant] = 0
+    target_dcg[~all_irrelevant] /= ideal_dcg[~all_irrelevant]
+
+    return target_dcg.mean()
diff --git a/RE/paddlemetric/src/paddlemetrics/functional/retrieval/precision.py b/RE/paddlemetric/src/paddlemetrics/functional/retrieval/precision.py
new file mode 100644
index 00000000..83bd1172
--- /dev/null
+++ b/RE/paddlemetric/src/paddlemetrics/functional/retrieval/precision.py
@@ -0,0 +1,60 @@
+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from typing import Optional
+
+import paddleext.torchapi as B
+from paddleext.torchapi import  Tensor, tensor
+
+from paddlemetrics.utilities.checks import _check_retrieval_functional_inputs
+
+
+def retrieval_precision(preds: Tensor, target: Tensor, k: Optional[int] = None) -> Tensor:
+    """Computes the precision metric (for information retrieval). Precision is the fraction of relevant documents
+    among all the retrieved documents.
+
+    ``preds`` and ``target`` should be of the same shape and live on the same device. If no ``target`` is ``True``,
+    ``0`` is returned. ``target`` must be either `bool` or `integers` and ``preds`` must be `float`,
+    otherwise an error is raised. If you want to measure Precision@K, ``k`` must be a positive integer.
+
+    Args:
+        preds: estimated probabilities of each document to be relevant.
+        target: ground truth about each document being relevant or not.
+        k: consider only the top k elements (default: None, which considers them all)
+
+    Returns:
+        a single-value tensor with the precision (at ``k``) of the predictions ``preds`` w.r.t. the labels ``target``.
+
+    Raises:
+        ValueError:
+            If ``k`` parameter is not `None` or an integer larger than 0
+
+    Example:
+        >>> preds = tensor([0.2, 0.3, 0.5])
+        >>> target = tensor([True, False, True])
+        >>> retrieval_precision(preds, target, k=2)
+        tensor(0.5000)
+    """
+    preds, target = _check_retrieval_functional_inputs(preds, target)
+
+    if k is None:
+        k = preds.shape[-1]
+
+    if not (isinstance(k, int) and k > 0):
+        raise ValueError("`k` has to be a positive integer or None")
+
+    if not target.sum():
+        return tensor(0.0, device=preds.device)
+
+    relevant = target[B.argsort(preds, dim=-1, descending=True)][:k].sum().float()
+    return relevant / k
diff --git a/RE/paddlemetric/src/paddlemetrics/functional/retrieval/r_precision.py b/RE/paddlemetric/src/paddlemetrics/functional/retrieval/r_precision.py
new file mode 100644
index 00000000..d26e32f8
--- /dev/null
+++ b/RE/paddlemetric/src/paddlemetrics/functional/retrieval/r_precision.py
@@ -0,0 +1,49 @@
+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import paddleext.torchapi as B
+from paddleext.torchapi import  Tensor, tensor
+
+from paddlemetrics.utilities.checks import _check_retrieval_functional_inputs
+
+
+def retrieval_r_precision(preds: Tensor, target: Tensor) -> Tensor:
+    """Computes the r-precision metric (for information retrieval). R-Precision is the fraction of relevant
+    documents among all the top ``k`` retrieved documents where ``k`` is equal to the total number of relevant
+    documents.
+
+    ``preds`` and ``target`` should be of the same shape and live on the same device. If no ``target`` is ``True``,
+    ``0`` is returned. ``target`` must be either `bool` or `integers` and ``preds`` must be `float`,
+    otherwise an error is raised. If you want to measure Precision@K, ``k`` must be a positive integer.
+
+    Args:
+        preds: estimated probabilities of each document to be relevant.
+        target: ground truth about each document being relevant or not.
+
+    Returns:
+        a single-value tensor with the r-precision of the predictions ``preds`` w.r.t. the labels ``target``.
+
+    Example:
+        >>> preds = tensor([0.2, 0.3, 0.5])
+        >>> target = tensor([True, False, True])
+        >>> retrieval_r_precision(preds, target)
+        tensor(0.5000)
+    """
+    preds, target = _check_retrieval_functional_inputs(preds, target)
+
+    relevant_number = target.sum()
+    if not relevant_number:
+        return tensor(0.0, device=preds.device)
+
+    relevant = target[B.argsort(preds, dim=-1, descending=True)][:relevant_number].sum().float()
+    return relevant / relevant_number
diff --git a/RE/paddlemetric/src/paddlemetrics/functional/retrieval/recall.py b/RE/paddlemetric/src/paddlemetrics/functional/retrieval/recall.py
new file mode 100644
index 00000000..e00d450c
--- /dev/null
+++ b/RE/paddlemetric/src/paddlemetrics/functional/retrieval/recall.py
@@ -0,0 +1,61 @@
+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from typing import Optional
+
+import paddleext.torchapi as B
+from paddleext.torchapi import  Tensor, tensor
+
+from paddlemetrics.utilities.checks import _check_retrieval_functional_inputs
+
+
+def retrieval_recall(preds: Tensor, target: Tensor, k: Optional[int] = None) -> Tensor:
+    """Computes the recall metric (for information retrieval). Recall is the fraction of relevant documents
+    retrieved among all the relevant documents.
+
+    ``preds`` and ``target`` should be of the same shape and live on the same device. If no ``target`` is ``True``,
+    ``0`` is returned. ``target`` must be either `bool` or `integers` and ``preds`` must be `float`,
+    otherwise an error is raised. If you want to measure Recall@K, ``k`` must be a positive integer.
+
+    Args:
+        preds: estimated probabilities of each document to be relevant.
+        target: ground truth about each document being relevant or not.
+        k: consider only the top k elements (default: None, which considers them all)
+
+    Returns:
+        a single-value tensor with the recall (at ``k``) of the predictions ``preds`` w.r.t. the labels ``target``.
+
+    Raises:
+        ValueError:
+            If ``k`` parameter is not `None` or an integer larger than 0
+
+    Example:
+        >>> from  paddlemetrics.functional import retrieval_recall
+        >>> preds = tensor([0.2, 0.3, 0.5])
+        >>> target = tensor([True, False, True])
+        >>> retrieval_recall(preds, target, k=2)
+        tensor(0.5000)
+    """
+    preds, target = _check_retrieval_functional_inputs(preds, target)
+
+    if k is None:
+        k = preds.shape[-1]
+
+    if not (isinstance(k, int) and k > 0):
+        raise ValueError("`k` has to be a positive integer or None")
+
+    if not target.sum():
+        return tensor(0.0, device=preds.device)
+
+    relevant = target[B.argsort(preds, dim=-1, descending=True)][:k].sum().float()
+    return relevant / target.sum()
diff --git a/RE/paddlemetric/src/paddlemetrics/functional/retrieval/reciprocal_rank.py b/RE/paddlemetric/src/paddlemetrics/functional/retrieval/reciprocal_rank.py
new file mode 100644
index 00000000..c92c223e
--- /dev/null
+++ b/RE/paddlemetric/src/paddlemetrics/functional/retrieval/reciprocal_rank.py
@@ -0,0 +1,49 @@
+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import paddleext.torchapi as B
+from paddleext.torchapi import  Tensor, tensor
+
+from paddlemetrics.utilities.checks import _check_retrieval_functional_inputs
+
+
+def retrieval_reciprocal_rank(preds: Tensor, target: Tensor) -> Tensor:
+    """Computes reciprocal rank (for information retrieval). See `Mean Reciprocal Rank`_
+
+    ``preds`` and ``target`` should be of the same shape and live on the same device. If no ``target`` is ``True``,
+    0 is returned. ``target`` must be either `bool` or `integers` and ``preds`` must be `float`,
+    otherwise an error is raised.
+
+    Args:
+        preds: estimated probabilities of each document to be relevant.
+        target: ground truth about each document being relevant or not.
+
+    Return:
+        a single-value tensor with the reciprocal rank (RR) of the predictions ``preds`` wrt the labels ``target``.
+
+    Example:
+        >>> from paddlemetrics.functional import retrieval_reciprocal_rank
+        >>> preds = B.tensor([0.2, 0.3, 0.5])
+        >>> target = B.tensor([False, True, False])
+        >>> retrieval_reciprocal_rank(preds, target)
+        tensor(0.5000)
+    """
+    preds, target = _check_retrieval_functional_inputs(preds, target)
+
+    if not target.sum():
+        return tensor(0.0, device=preds.device)
+
+    target = target[B.argsort(preds, dim=-1, descending=True)]
+    position = B.nonzero(target).view(-1)
+    res = 1.0 / (position[0] + 1.0)
+    return res
diff --git a/RE/paddlemetric/src/paddlemetrics/functional/self_supervised.py b/RE/paddlemetric/src/paddlemetrics/functional/self_supervised.py
new file mode 100644
index 00000000..9af407aa
--- /dev/null
+++ b/RE/paddlemetric/src/paddlemetrics/functional/self_supervised.py
@@ -0,0 +1,57 @@
+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from warnings import warn
+
+import paddleext.torchapi as B
+from paddleext.torchapi import  Tensor
+
+from paddlemetrics.functional.pairwise import pairwise_cosine_similarity, pairwise_linear_similarity
+
+
+def embedding_similarity(
+    batch: Tensor, similarity: str = "cosine", reduction: str = "none", zero_diagonal: bool = True
+) -> Tensor:
+    """Computes representation similarity.
+
+    Example:
+        >>> from paddlemetrics.functional import embedding_similarity
+        >>> embeddings = B.tensor([[1., 2., 3., 4.], [1., 2., 3., 4.], [4., 5., 6., 7.]])
+        >>> embedding_similarity(embeddings)
+        tensor([[0.0000, 1.0000, 0.9759],
+                [1.0000, 0.0000, 0.9759],
+                [0.9759, 0.9759, 0.0000]])
+
+    Args:
+        batch: (batch, dim)
+        similarity: 'dot' or 'cosine'
+        reduction: 'none', 'sum', 'mean' (all along dim -1)
+        zero_diagonal: if True, the diagonals are set to zero
+
+    Return:
+        A square matrix (batch, batch) with the similarity scores between all elements
+        If sum or mean are used, then returns (b, 1) with the reduced value for each row
+
+    .. deprecated:: v0.6
+        Use :func:`paddlemetrics.functional.pairwise_cosine_similarity` when `similarity='cosine'`
+        else use :func:`paddlemetrics.functional.pairwise_euclidean_distance`. Will be removed in v0.7.
+    """
+    warn(
+        "Function `embedding_similarity` was deprecated v0.6 and will be removed in v0.7."
+        " Use `paddlemetrics.functional.pairwise_cosine_similarity` instead when argument"
+        " similarity='cosine' else use `paddlemetrics.functional.pairwise_linear_similarity",
+        DeprecationWarning,
+    )
+    if similarity == "cosine":
+        return pairwise_cosine_similarity(batch, reduction=reduction, zero_diagonal=zero_diagonal)
+    return pairwise_linear_similarity(batch, reduction=reduction, zero_diagonal=zero_diagonal)
diff --git a/RE/paddlemetric/src/paddlemetrics/functional/text/__init__.py b/RE/paddlemetric/src/paddlemetrics/functional/text/__init__.py
new file mode 100644
index 00000000..97170840
--- /dev/null
+++ b/RE/paddlemetric/src/paddlemetrics/functional/text/__init__.py
@@ -0,0 +1,17 @@
+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from paddlemetrics.functional.text.bleu import bleu_score  # noqa: F401
+from paddlemetrics.functional.text.sacre_bleu import sacre_bleu_score  # noqa: F401
+from paddlemetrics.functional.text.wer import wer  # noqa: F401
diff --git a/RE/paddlemetric/src/paddlemetrics/functional/text/bert.py b/RE/paddlemetric/src/paddlemetrics/functional/text/bert.py
new file mode 100644
index 00000000..168be6ee
--- /dev/null
+++ b/RE/paddlemetric/src/paddlemetrics/functional/text/bert.py
@@ -0,0 +1,650 @@
+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import csv
+import math
+import urllib
+import warnings
+from collections import Counter, defaultdict
+from typing import Any, Callable, Dict, List, Optional, Set, Tuple, Union
+
+import paddleext.torchapi as B
+from paddleext.torchapi import Tensor, nn
+from paddleext.torchapi.data import DataLoader, Dataset
+
+from paddlemetrics.utilities.imports import _TQDM_AVAILABLE, _TRANSFORMERS_AVAILABLE
+
+if _TRANSFORMERS_AVAILABLE:
+    from transformers import AutoModel, AutoTokenizer
+
+if _TQDM_AVAILABLE:
+    import tqdm
+
+
+def _preprocess_text(
+    text: List[str],
+    tokenizer: Any,
+    max_length: int = 512,
+    truncation: bool = True,
+    sort_according_length: bool = True,
+    own_tokenizer: bool = False,
+) -> Dict[str, Tensor]:
+    """Default text pre-processing function using `transformers` `AutoTokenizer` instance.
+
+    Args:
+        text:
+            An iterable of sentences.
+        tokenizer:
+            Either `AutoTokenizer` instance from `transformers` package, or a user's own tokenizer.
+        max_length:
+            A maximum sequence length.
+        truncation:
+            An indication of whether tokenized sequences should be padded only to the length of the longest sequence.
+        sort_according_length:
+            An indication of whether tokenized sequences should be sorted from shortest to longest. This is appropriate
+            to do for leveraging dynamic padding during embedding calculation and thereby to hasten inference.
+        own_tokenizer:
+            An indication of whether a non-default user's own tokenizer is used.
+
+    Return:
+        A dictionary of tokenized sentences including input_ids and attention_mask.
+
+    Raises:
+        BaseException:
+            If a tokenization with a user's own tokenizer is not successful.
+    """
+    if not own_tokenizer:
+        tokenized_data = tokenizer(
+            text, padding="max_length", max_length=max_length, truncation=truncation, return_tensors="pt"
+        )
+    else:
+        try:
+            tokenized_data = tokenizer(text, max_length)
+        except BaseException as e:
+            raise BaseException(f"Tokenization was not successful: {e}")
+
+    input_ids, attention_mask = (
+        _sort_data_according_length(tokenized_data["input_ids"], tokenized_data["attention_mask"])
+        if sort_according_length
+        else (tokenized_data["input_ids"], tokenized_data["attention_mask"])
+    )
+    return {"input_ids": input_ids, "attention_mask": attention_mask}
+
+
+def _process_attention_mask_for_special_tokens(attention_mask: Tensor) -> Tensor:
+    """Process attention mask to be zero for special [CLS] and [SEP] tokens as they're not included in a
+    calculation for BERT score.
+
+    Args:
+        attention_mask: An attention mask to be returned, for example, by a `transformers` tokenizer.
+
+    Return:
+        A processed attention mask.
+    """
+    # Make attention_mask zero for [CLS] token
+    attention_mask[:, 0] = 0
+    # Make attention_mask zero for [SEP] token
+    sep_token_position = (attention_mask - 0.1).cumsum(-1).argmax(-1)
+    attention_mask[B.arange(attention_mask.size(0)).long(), sep_token_position] = 0
+    return attention_mask
+
+
+def _sort_data_according_length(input_ids: Tensor, attention_mask: Tensor) -> Tuple[Tensor, Tensor]:
+    """Sort tokenized sentence from the shortest to the longest one."""
+    sorted_indices = attention_mask.sum(1).argsort()
+    input_ids = input_ids[sorted_indices]
+    attention_mask = attention_mask[sorted_indices]
+    return input_ids, attention_mask
+
+
+def _input_data_collator(
+    batch: Dict[str, Tensor], device: Optional[Union[str, B.device]] = None
+) -> Dict[str, Tensor]:
+    """Helper function that trims model inputs to the longest sequence within the batch and put the input on the
+    proper device."""
+    max_len = int(batch["attention_mask"].sum(1).max().item())
+    input_ids = batch["input_ids"][:, :max_len].to(device)
+    attention_mask = batch["attention_mask"][:, :max_len].to(device)
+    batch.update({"input_ids": input_ids, "attention_mask": attention_mask})
+    return batch
+
+
+def _output_data_collator(model_output: Tensor, attention_mask: Tensor, target_len: int) -> Tuple[Tensor, Tensor]:
+    """Helper function that pads the model output and attention mask to the target length."""
+    zeros_shape = list(model_output.shape)
+    zeros_shape[2] = target_len - zeros_shape[2]
+    model_output = B.cat(
+        [model_output, B.zeros(zeros_shape, dtype=model_output.dtype).to(model_output.device)], dim=2
+    )
+    zeros = B.zeros(zeros_shape[0], zeros_shape[2], dtype=attention_mask.dtype).to(attention_mask.device)
+    attention_mask = B.cat([attention_mask, zeros], dim=1)
+    return model_output, attention_mask
+
+
+class TextDataset(Dataset):
+    """PyTorch dataset class for storing tokenized sentences and other properties used for BERT score
+    calculation."""
+
+    def __init__(
+        self,
+        text: List[str],
+        tokenizer: Any,
+        max_length: int = 512,
+        preprocess_text_fn: Callable[[List[str], Any, int], Dict[str, Tensor]] = _preprocess_text,
+        idf: bool = False,
+        tokens_idf: Optional[Dict[int, float]] = None,
+    ) -> None:
+        """
+        Args:
+            text:
+                An iterable of sentences.
+            tokenizer:
+                `AutoTokenizer` instance from `transformers` package.
+            max_length:
+                A maximum sequence length.
+            preprocess_text_fn:
+                A function used for processing the input sentences.
+            idf:
+                An indication of whether calculate token inverse document frequencies to weight the model embeddings.
+            tokens_idf:
+                Inverse document frequencies (these should be calculated on reference sentences).
+        """
+        self.text = preprocess_text_fn(text, tokenizer, max_length)
+        self.max_length = self.text["input_ids"].shape[1]
+        self.num_sentences = len(text)
+        self.idf = idf
+        self.tokens_idf = {}
+        if idf:
+            self.tokens_idf = tokens_idf if tokens_idf is not None else self._get_tokens_idf()
+
+    def __getitem__(self, idx: int) -> Dict[str, Tensor]:
+        input_ids = self.text["input_ids"][idx, :]
+        attention_mask = self.text["attention_mask"][idx, :]
+        inputs_dict = {"input_ids": input_ids, "attention_mask": attention_mask}
+        if self.idf:
+            input_ids_idf = B.tensor([self.tokens_idf[input_idx] for input_idx in input_ids.tolist()])
+            inputs_dict["input_ids_idf"] = input_ids_idf
+        return inputs_dict
+
+    def __len__(self) -> int:
+        return self.num_sentences
+
+    def _get_tokens_idf(self) -> Dict[int, float]:
+        """Calculate token inverse document frequences.
+
+        Return:
+            A python dictionary containing inverse document frequences for token ids.
+        """
+        token_counter: Counter = Counter()
+        for tokens in map(self._set_of_tokens, self.text["input_ids"]):
+            token_counter.update(tokens)
+
+        tokens_idf: Dict[int, float] = defaultdict(self._get_tokens_idf_default_value)
+        tokens_idf.update(
+            {idx: math.log((self.num_sentences + 1) / (occurrence + 1)) for idx, occurrence in token_counter.items()}
+        )
+        return tokens_idf
+
+    def _get_tokens_idf_default_value(self) -> float:
+        """Helper function that ensures `defaultdict` to be pickled."""
+        return math.log((self.num_sentences + 1) / 1)
+
+    @staticmethod
+    def _set_of_tokens(input_ids: Tensor) -> Set:
+        """Return set of tokens from the `input_ids` `B.Tensor`."""
+        return set(input_ids.tolist())
+
+
+class TokenizedDataset(TextDataset):
+    """The child class of `TextDataset` class used with already tokenized data."""
+
+    def __init__(
+        self,
+        input_ids: Tensor,
+        attention_mask: Tensor,
+        idf: bool = False,
+        tokens_idf: Optional[Dict[int, float]] = None,
+    ) -> None:
+        """
+        Args:
+            input_ids:
+                Input ids (`B.Tensor`).
+            attention_mask:
+                Attention mask (`B.Tensor`).
+            idf:
+                An indication of whether calculate token inverse document frequencies to weight the model embeddings.
+            tokens_idf:
+                Inverse document frequencies (these should be calculated on reference sentences).
+        """
+        self.text = dict(zip(["input_ids", "attention_mask"], _sort_data_according_length(input_ids, attention_mask)))
+        self.text = _input_data_collator(self.text)
+        self.num_sentences = len(self.text["input_ids"])
+        self.max_length = self.text["input_ids"].shape[1]
+        self.idf = idf
+        self.tokens_idf = {}
+        if idf:
+            self.tokens_idf = tokens_idf if tokens_idf is not None else self._get_tokens_idf()
+
+
+def _get_progress_bar(dataloader: DataLoader, verbose: bool = False) -> Union[DataLoader, "tqdm.auto.tqdm"]:
+    """Helper function returning either the dataloader itself when `verbose = False`, or it wraps the dataloader with
+    `tqdm.auto.tqdm`, when `verbose = True` to display a progress bar during the embbeddings calculation."""
+    return tqdm.auto.tqdm(dataloader) if verbose else dataloader
+
+
+def _check_shape_of_model_output(output: Tensor, input_ids: Tensor) -> None:
+    """Check if the shape of the user's own model output."""
+    bs, seq_len = input_ids.shape[:2]
+    invalid_out_shape = len(output.shape) != 3 or output.shape[0] != bs or output.shape[1] != seq_len
+    if invalid_out_shape:
+        raise ValueError(
+            "The model output must be `B.Tensor` of a shape `[batch_size, seq_len, model_dim]` "
+            f"i.e. [{bs}, {seq_len}. , `model_dim`], but got {output.shape}."
+        )
+
+
+def _get_embeddings_and_idf_scale(
+    dataloader: DataLoader,
+    target_len: int,
+    model: nn.Module,
+    device: Optional[Union[str, B.device]] = None,
+    num_layers: Optional[int] = None,
+    all_layers: bool = False,
+    idf: bool = False,
+    verbose: bool = False,
+    user_forward_fn: Callable[[nn.Module, Dict[str, Tensor]], Tensor] = None,
+) -> Tuple[Tensor, Tensor]:
+    """Calculate sentence embeddings and the inverse-document-frequence scaling factor.
+    Args:
+        dataloader:
+            `B.utils.data.DataLoader` instance.
+        target_len:
+            A length of the longest sequence in the data. Used for padding the model output.
+        model:
+            BERT model.
+        device:
+            A device to be used for calculation.
+        num_layers:
+            The layer of representation to use.
+        all_layers:
+            An indication whether representation from all model layers should be used for BERTScore.
+        idf:
+            An Indication whether normalization using inverse document frequencies should be used.
+        verbose:
+            An indication of whether a progress bar to be displayed during the embeddings calculation.
+        user_forward_fn:
+            A user's own forward function used in a combination with `user_model`. This function must take `user_model`
+            and a python dictionary of containing `"input_ids"` and `"attention_mask"` represented by `B.Tensor`
+            as an input and return the model's output represented by the single `B.Tensor`.
+
+    Return:
+        A tuple of B.Tensors containing the model's embeddings and the normalized tokens IDF.
+        When `idf = False`, tokens IDF is not calculated, and a matrix of mean weights is returned instead.
+        For a single sentence, `mean_weight = 1/seq_len`, where `seq_len` is a sum over the corresponding
+        `attention_mask`.
+
+    Raises:
+        ValueError:
+            If `all_layers = True` and a model, which is not from the `transformers` package, is used.
+    """
+    embeddings_list: List[Tensor] = []
+    idf_scale_list: List[Tensor] = []
+    for batch in _get_progress_bar(dataloader, verbose):
+        with B.no_grad():
+            batch = _input_data_collator(batch, device)
+            # Output shape: batch_size x num_layers OR 1 x sequence_length x bert_dim
+            if not all_layers:
+                if not user_forward_fn:
+                    out = model(batch["input_ids"], batch["attention_mask"], output_hidden_states=True)
+                    out = out.hidden_states[num_layers if num_layers is not None else -1]
+                else:
+                    out = user_forward_fn(model, batch)
+                    _check_shape_of_model_output(out, batch["input_ids"])
+                out = out.unsqueeze(1)
+            else:
+                if user_forward_fn:
+                    raise ValueError(
+                        "The option `all_layers=True` can be used only with default `transformers` models."
+                    )
+                out = model(batch["input_ids"], batch["attention_mask"], output_hidden_states=True)
+                out = B.cat([o.unsqueeze(1) for o in out.hidden_states], dim=1)
+
+        out /= out.norm(dim=-1).unsqueeze(-1)  # normalize embeddings
+        out, attention_mask = _output_data_collator(out, batch["attention_mask"], target_len)
+        processed_attention_mask = _process_attention_mask_for_special_tokens(attention_mask)
+        # Multiply embeddings with attention_mask (b=batch_size, l=num_layers, s=seq_len, d=emb_dim)
+        out = B.einsum("blsd, bs -> blsd", out, processed_attention_mask)
+        embeddings_list.append(out.cpu())
+
+        # Calculate weighted (w.r.t. sentence length) input_ids IDF matrix
+        input_ids_idf = (
+            batch["input_ids_idf"] * processed_attention_mask if idf else processed_attention_mask.type(out.dtype)
+        )
+        input_ids_idf /= input_ids_idf.sum(-1, keepdim=True)
+        idf_scale_list.append(input_ids_idf)
+
+    embeddings = B.cat(embeddings_list)
+    idf_scale = B.cat(idf_scale_list)
+
+    return embeddings, idf_scale
+
+
+def _get_scaled_precision_or_recall(cos_sim: Tensor, metric: str, idf_scale: Tensor) -> Tensor:
+    """Helper function that calculates precision or recall, transpose it and scale it with idf_scale factor."""
+    dim = 3 if metric == "precision" else 2
+    res = cos_sim.max(dim=dim).values
+    res = B.einsum("bls, bs -> bls", res, idf_scale).sum(-1)
+    # We transpose the results and squeeze if possible to match the format of the original BERTScore implementation
+    res = res.transpose(0, 1).squeeze()
+    return res
+
+
+def _get_precision_recall_f1(
+    pred_embeddings: Tensor, ref_embeddings: Tensor, pred_idf_scale: Tensor, ref_idf_scale: Tensor
+) -> Tuple[Tensor, Tensor, Tensor]:
+    """Calculate precision, recall and F1 score over candidate and reference sentences.
+
+    Args:
+        pred_embeddings: Embeddings of candidate sentenecs.
+        ref_embeddings: Embeddings of reference sentences.
+        pred_idf_scale: An IDF scale factor for candidate sentences.
+        ref_idf_scale: An IDF scale factor for reference sentences.
+
+    Return:
+        Tensors containing precision, recall and F1 score, respectively.
+    """
+    # Dimensions: b = batch_size, l = num_layers, p = predictions_seq_len, r = references_seq_len, d = bert_dim
+    cos_sim = B.einsum("blpd, blrd -> blpr", pred_embeddings, ref_embeddings)
+    # Final metrics shape = (batch_size * num_layers | batch_size)
+    precision = _get_scaled_precision_or_recall(cos_sim, "precision", pred_idf_scale)
+    recall = _get_scaled_precision_or_recall(cos_sim, "recall", ref_idf_scale)
+
+    f1_score = 2 * precision * recall / (precision + recall)
+    f1_score = f1_score.masked_fill(B.isnan(f1_score), 0.0)
+
+    return precision, recall, f1_score
+
+
+def _get_hash(model_name_or_path: Optional[str] = None, num_layers: Optional[int] = None, idf: bool = False) -> str:
+    """Compute `BERT_score`_ (copied and adjusted)"""
+    msg = f"{model_name_or_path}_L{num_layers}{'_idf' if idf else '_no-idf'}"
+    return msg
+
+
+def _read_csv_from_local_file(baseline_path: str) -> Tensor:
+    """Helper function which reads baseline the csv file from the local file.
+
+    This method implemented to avoid `pandas` dependency.
+    """
+    with open(baseline_path) as fname:
+        csv_file = csv.reader(fname)
+        baseline_list = [[float(item) for item in row] for idx, row in enumerate(csv_file) if idx > 0]
+    baseline = B.tensor(baseline_list)[:, 1:]
+    return baseline
+
+
+def _read_csv_from_url(baseline_url: str) -> Tensor:
+    """Helper function which reads the baseline csv file from URL.
+
+    This method is implemented to avoid `pandas` dependency.
+    """
+    with urllib.request.urlopen(baseline_url) as http_request:  # type: ignore
+        baseline_list = [
+            [float(item) for item in row.strip().decode("utf-8").split(",")]
+            for idx, row in enumerate(http_request)
+            if idx > 0
+        ]
+        baseline = B.tensor(baseline_list)[:, 1:]
+    return baseline
+
+
+def _load_baseline(
+    lang: str = "en",
+    model_name_or_path: Optional[str] = None,
+    baseline_path: Optional[str] = None,
+    baseline_url: Optional[str] = None,
+) -> Optional[Tensor]:
+    """Load a CSV file with the baseline values used for rescaling."""
+    if baseline_path:
+        baseline: Optional[Tensor] = _read_csv_from_local_file(baseline_path)
+    elif baseline_url:
+        baseline = _read_csv_from_url(baseline_url)
+    # Read default baseline from the original `bert-score` package https://github.com/Tiiiger/bert_score
+    elif lang and model_name_or_path:
+        _URL_BASE = "https://raw.githubusercontent.com/Tiiiger/bert_score/master/bert_score/rescale_baseline"
+        baseline_url = f"{_URL_BASE}/{lang}/{model_name_or_path}.tsv"
+        baseline = _read_csv_from_url(baseline_url)
+    else:
+        baseline = None
+        warnings.warn("Baseline was not successfully loaded. No baseline is going to be used.")
+
+    return baseline
+
+
+def _rescale_metrics_with_baseline(
+    precision: Tensor,
+    recall: Tensor,
+    f1_score: Tensor,
+    baseline: Tensor,
+    num_layers: Optional[int] = None,
+    all_layers: bool = False,
+) -> Tuple[Tensor, Tensor, Tensor]:
+    """Rescale the computed metrics with the pre-computed baseline."""
+    if num_layers is None and all_layers is False:
+        num_layers = -1
+    all_metrics = B.stack([precision, recall, f1_score], dim=-1)
+    baseline_scale = baseline.unsqueeze(1) if all_layers else baseline[num_layers]
+    all_metrics = (all_metrics - baseline_scale) / (1 - baseline_scale)
+
+    return all_metrics[..., 0], all_metrics[..., 1], all_metrics[..., 2]
+
+
+def bert_score(
+    predictions: Union[List[str], Dict[str, Tensor]],
+    references: Union[List[str], Dict[str, Tensor]],
+    model_name_or_path: Optional[str] = None,
+    num_layers: Optional[int] = None,
+    all_layers: bool = False,
+    model: Optional[nn.Module] = None,
+    user_tokenizer: Any = None,
+    user_forward_fn: Callable[[nn.Module, Dict[str, Tensor]], Tensor] = None,
+    verbose: bool = False,
+    idf: bool = False,
+    device: Optional[Union[str, B.device]] = None,
+    max_length: int = 512,
+    batch_size: int = 64,
+    num_threads: int = 4,
+    return_hash: bool = False,
+    lang: str = "en",
+    rescale_with_baseline: bool = False,
+    baseline_path: Optional[str] = None,
+    baseline_url: Optional[str] = None,
+) -> Dict[str, Union[List[float], str]]:
+    """`Bert_score Evaluating Text Generation`_ leverages the pre-trained contextual embeddings from BERT and
+    matches words in candidate and reference sentences by cosine similarity. It has been shown to correlate with
+    human judgment on sentence-level and system-level evaluation. Moreover, BERTScore computes precision, recall,
+    and F1 measure, which can be useful for evaluating different language generation tasks.
+
+    This implemenation follows the original implementation from `BERT_score`_
+
+    Args:
+        predictions:
+            Either an iterable of predicted sentences or a `Dict[str, B.Tensor]` containing `input_ids` and
+            `attention_mask` `B.Tensor`.
+        references:
+            Either an iterable of target sentences or a `Dict[str, B.Tensor]` containing `input_ids` and
+            `attention_mask` `B.Tensor`.
+        model_name_or_path:
+            A name or a model path used to load `transformers` pretrained model.
+        num_layers:
+            A layer of representation to use.
+        all_layers:
+            An indication of whether the representation from all model's layers should be used.
+            If `all_layers = True`, the argument `num_layers` is ignored.
+        model:
+            A user's own model. Must be of `nn.Module` instance.
+        user_tokenizer:
+            A user's own tokenizer used with the own model. This must be an instance with the `__call__` method.
+            This method must take an iterable of sentences (`List[str]`) and must return a python dictionary
+            containing `"input_ids"` and `"attention_mask"` represented by `B.Tensor`. It is up to the user's model
+            of whether `"input_ids"` is a `B.Tensor` of input ids or embedding vectors.
+            This tokenizer must prepend an equivalent of `[CLS]` token and append an equivalent of `[SEP]` token
+            as `transformers` tokenizer does.
+        user_forward_fn:
+            A user's own forward function used in a combination with `user_model`. This function must take `user_model`
+            and a python dictionary of containing `"input_ids"` and `"attention_mask"` represented by `B.Tensor`
+            as an input and return the model's output represented by the single `B.Tensor`.
+        verbose:
+            An indication of whether a progress bar to be displayed during the embeddings calculation.
+        idf:
+            An indication of whether normalization using inverse document frequencies should be used.
+        device:
+            A device to be used for calculation.
+        max_length:
+            A maximum length of input sequences. Sequences longer than `max_length` are to be trimmed.
+        batch_size:
+            A batch size used for model processing.
+        num_threads:
+            A number of threads to use for a dataloader.
+        return_hash:
+            An indication of whether the correspodning `hash_code` should be returned.
+        lang:
+            A language of input sentences. It is used when the scores are rescaled with a baseline.
+        rescale_with_baseline:
+            An indication of whether bertscore should be rescaled with a pre-computed baseline.
+            When a pretrained model from `transformers` model is used, the corresponding baseline is downloaded
+            from the original `bert-score` package from `BERT_score`_ if available.
+            In other cases, please specify a path to the baseline csv/tsv file, which must follow the formatting
+            of the files from `BERT_score`_
+        baseline_path:
+            A path to the user's own local csv/tsv file with the baseline scale.
+        baseline_url:
+            A url path to the user's own  csv/tsv file with the baseline scale.
+
+    Returns:
+        Python dictionary containing the keys `precision`, `recall` and `f1` with corresponding values.
+
+    Raises:
+        ValueError:
+            If `len(predictions) != len(references)`.
+        ValueError:
+            If `tqdm` package is required and not installed.
+        ValueError:
+            If `transformers` package is required and not installed.
+        ValueError:
+            If `num_layer` is larger than the number of the model layers.
+        ValueError:
+            If invalid input is provided.
+
+    Example:
+        >>> predictions = ["hello there", "general kenobi"]
+        >>> references = ["hello there", "master kenobi"]
+        >>> bert_score(predictions=predictions, references=references, lang="en")  # doctest: +SKIP
+        {'precision': [0.99..., 0.99...],
+         'recall': [0.99..., 0.99...],
+         'f1': [0.99..., 0.99...]}
+    """
+    if len(predictions) != len(references):
+        raise ValueError("Number of predicted and reference sententes must be the same!")
+
+    if verbose and (not _TQDM_AVAILABLE):
+        raise ValueError(
+            "An argument `verbose = True` requires `tqdm` package be installed. Install with `pip install tqdm`."
+        )
+
+    if model is None:
+        if not _TRANSFORMERS_AVAILABLE:
+            raise ValueError(
+                "`bert_score` metric with default models requires `transformers` package be installed. "
+                "Either install with `pip install transformers>=4.0` or `pip install paddlemetrics[text]`"
+            )
+        tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
+        model = AutoModel.from_pretrained(model_name_or_path)
+    else:
+        tokenizer = user_tokenizer
+    model.eval()
+    model.to(device)
+
+    try:
+        if num_layers and num_layers > model.config.num_hidden_layers:  # type: ignore
+            raise ValueError(
+                f"num_layers={num_layers} is forbidden for {model_name_or_path}. "  # type: ignore
+                f"Please use num_layers <= {model.config.num_hidden_layers}"  # type: ignore
+            )
+    except AttributeError:
+        warnings.warn("It was not possible to retrieve the parameter `num_layers` from the model specification.")
+
+    _are_empty_lists = all(isinstance(text, list) and len(text) == 0 for text in (predictions, references))
+    _are_valid_lists = all(
+        isinstance(text, list) and len(text) > 0 and isinstance(text[0], str) for text in (predictions, references)
+    )
+    _are_valid_tensors = all(
+        isinstance(text, dict) and isinstance(text["input_ids"], Tensor) for text in (predictions, references)
+    )
+    if _are_empty_lists:
+        warnings.warn("Predictions and references are empty.")
+        output_dict: Dict[str, Union[List[float], str]] = {
+            "precision": [0.0],
+            "recall": [0.0],
+            "f1": [0.0],
+        }
+        if return_hash:
+            output_dict.update({"hash": _get_hash(model_name_or_path, num_layers, idf)})
+        return output_dict
+
+    # Load baselines if needed
+    baseline = _load_baseline(lang, model_name_or_path, baseline_path, baseline_url) if rescale_with_baseline else None
+
+    # We ignore mypy typing below as the proper typing is ensured by conditions above, only mypy cannot infer that.
+    if _are_valid_lists:
+        ref_dataset = TextDataset(references, tokenizer, max_length, idf=idf)  # type: ignore
+        pred_dataset = TextDataset(
+            predictions,  # type: ignore
+            tokenizer,
+            max_length,
+            idf=idf,
+            tokens_idf=ref_dataset.tokens_idf,
+        )
+    elif _are_valid_tensors:
+        ref_dataset = TokenizedDataset(**references, idf=idf)  # type: ignore
+        pred_dataset = TokenizedDataset(**predictions, idf=idf, tokens_idf=ref_dataset.tokens_idf)  # type: ignore
+    else:
+        raise ValueError("Invalid input provided.")
+
+    ref_loader = DataLoader(ref_dataset, batch_size=batch_size, num_workers=num_threads)
+    pred_loader = DataLoader(pred_dataset, batch_size=batch_size, num_workers=num_threads)
+
+    ref_embeddings, ref_idf_scale = _get_embeddings_and_idf_scale(
+        ref_loader, ref_dataset.max_length, model, device, num_layers, all_layers, idf, verbose, user_forward_fn
+    )
+    pred_embeddings, pred_idf_scale = _get_embeddings_and_idf_scale(
+        pred_loader, pred_dataset.max_length, model, device, num_layers, all_layers, idf, verbose, user_forward_fn
+    )
+
+    precision, recall, f1_score = _get_precision_recall_f1(
+        pred_embeddings, ref_embeddings, pred_idf_scale, ref_idf_scale
+    )
+
+    if baseline is not None:
+        precision, recall, f1_score = _rescale_metrics_with_baseline(
+            precision, recall, f1_score, baseline, num_layers, all_layers
+        )
+
+    output_dict = {
+        "precision": precision.tolist(),
+        "recall": recall.tolist(),
+        "f1": f1_score.tolist(),
+    }
+    if return_hash:
+        output_dict.update({"hash": _get_hash(model_name_or_path, num_layers, idf)})
+    return output_dict
diff --git a/RE/paddlemetric/src/paddlemetrics/functional/text/bleu.py b/RE/paddlemetric/src/paddlemetrics/functional/text/bleu.py
new file mode 100644
index 00000000..4d00946b
--- /dev/null
+++ b/RE/paddlemetric/src/paddlemetrics/functional/text/bleu.py
@@ -0,0 +1,171 @@
+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# referenced from
+# Library Name: torchtext
+# Authors: torchtext authors and @sluks
+# Date: 2020-07-18
+# Link: https://pyB.org/text/_modules/torchtext/data/metrics.html#bleu_score
+from collections import Counter
+from typing import Sequence, Tuple
+
+import paddleext.torchapi as B
+from paddleext.torchapi import  Tensor, tensor
+
+
+def _count_ngram(ngram_input_list: Sequence[str], n_gram: int) -> Counter:
+    """Counting how many times each word appears in a given text with ngram.
+
+    Args:
+        ngram_input_list: A list of translated text or reference texts
+        n_gram: gram value ranged 1 to 4
+
+    Return:
+        ngram_counter: a collections.Counter object of ngram
+    """
+
+    ngram_counter: Counter = Counter()
+
+    for i in range(1, n_gram + 1):
+        for j in range(len(ngram_input_list) - i + 1):
+            ngram_key = tuple(ngram_input_list[j : (i + j)])
+            ngram_counter[ngram_key] += 1
+
+    return ngram_counter
+
+
+def _bleu_score_update(
+    reference_corpus: Sequence[Sequence[Sequence[str]]],
+    translate_corpus: Sequence[Sequence[str]],
+    numerator: Tensor,
+    denominator: Tensor,
+    trans_len: Tensor,
+    ref_len: Tensor,
+    n_gram: int = 4,
+) -> Tuple[Tensor, Tensor]:
+    """Updates and returns variables required to compute the BLEU score.
+
+    Args:
+        reference_corpus: An iterable of iterables of reference corpus
+        translate_corpus: An iterable of machine translated corpus
+        numerator: Numerator of precision score (true positives)
+        denominator: Denominator of precision score (true positives + false positives)
+        trans_len: count of words in a candidate translation
+        ref_len: count of words in a reference translation
+        n_gram: gram value ranged 1 to 4
+    """
+
+    for (translation, references) in zip(translate_corpus, reference_corpus):
+        trans_len += len(translation)
+        ref_len_list = [len(ref) for ref in references]
+        ref_len_diff = [abs(len(translation) - x) for x in ref_len_list]
+        ref_len += ref_len_list[ref_len_diff.index(min(ref_len_diff))]
+        translation_counter: Counter = _count_ngram(translation, n_gram)
+        reference_counter: Counter = Counter()
+
+        for ref in references:
+            reference_counter |= _count_ngram(ref, n_gram)
+
+        ngram_counter_clip = translation_counter & reference_counter
+
+        for counter_clip in ngram_counter_clip:
+            numerator[len(counter_clip) - 1] += ngram_counter_clip[counter_clip]
+
+        for counter in translation_counter:
+            denominator[len(counter) - 1] += translation_counter[counter]
+
+    return trans_len, ref_len
+
+
+def _bleu_score_compute(
+    trans_len: Tensor, ref_len: Tensor, numerator: Tensor, denominator: Tensor, n_gram: int = 4, smooth: bool = False
+) -> Tensor:
+    """Computes the BLEU score.
+
+    Args:
+        trans_len: count of words in a candidate translation
+        ref_len: count of words in a reference translation
+        numerator: Numerator of precision score (true positives)
+        denominator: Denominator of precision score (true positives + false positives)
+        n_gram: gram value ranged 1 to 4
+        smooth: Whether or not to apply smoothing
+    """
+    device = numerator.device
+    if min(numerator) == 0.0:
+        return tensor(0.0, device=device)
+
+    if smooth:
+        precision_scores = B.div(
+            B.add(numerator, B.ones(n_gram, device=device)),
+            B.add(denominator, B.ones(n_gram, device=device)),
+        )
+        precision_scores[0] = numerator[0] / denominator[0]
+    else:
+        precision_scores = numerator / denominator
+
+    log_precision_scores = tensor([1.0 / n_gram] * n_gram, device=device) * B.log(precision_scores)
+    geometric_mean = B.exp(B.sum(log_precision_scores))
+    brevity_penalty = tensor(1.0, device=device) if trans_len > ref_len else B.exp(1 - (ref_len / trans_len))
+    bleu = brevity_penalty * geometric_mean
+
+    return bleu
+
+
+def bleu_score(
+    reference_corpus: Sequence[Sequence[Sequence[str]]],
+    translate_corpus: Sequence[Sequence[str]],
+    n_gram: int = 4,
+    smooth: bool = False,
+) -> Tensor:
+    """Calculate `BLEU score`_ of machine translated text with one or more references.
+
+    Args:
+        reference_corpus:
+            An iterable of iterables of reference corpus
+        translate_corpus:
+            An iterable of machine translated corpus
+        n_gram:
+            Gram value ranged from 1 to 4 (Default 4)
+        smooth:
+            Whether or not to apply smoothing – see [2]
+
+    Return:
+        Tensor with BLEU Score
+
+    Example:
+        >>> from paddlemetrics.functional import bleu_score
+        >>> translate_corpus = ['the cat is on the mat'.split()]
+        >>> reference_corpus = [['there is a cat on the mat'.split(), 'a cat is on the mat'.split()]]
+        >>> bleu_score(reference_corpus, translate_corpus)
+        tensor(0.7598)
+
+    References:
+        [1] BLEU: a Method for Automatic Evaluation of Machine Translation by Papineni,
+        Kishore, Salim Roukos, Todd Ward, and Wei-Jing Zhu `BLEU`_
+
+        [2] Automatic Evaluation of Machine Translation Quality Using Longest Common Subsequence
+        and Skip-Bigram Statistics by Chin-Yew Lin and Franz Josef Och `Machine Translation Evolution`_
+    """
+
+    if len(translate_corpus) != len(reference_corpus):
+        raise ValueError(f"Corpus has different size {len(translate_corpus)} != {len(reference_corpus)}")
+    numerator = B.zeros(n_gram)
+    denominator = B.zeros(n_gram)
+    trans_len = tensor(0, dtype=B.float)
+    ref_len = tensor(0, dtype=B.float)
+
+    trans_len, ref_len = _bleu_score_update(
+        reference_corpus, translate_corpus, numerator, denominator, trans_len, ref_len, n_gram
+    )
+
+    return _bleu_score_compute(trans_len, ref_len, numerator, denominator, n_gram, smooth)
diff --git a/RE/paddlemetric/src/paddlemetrics/functional/text/rouge.py b/RE/paddlemetric/src/paddlemetrics/functional/text/rouge.py
new file mode 100644
index 00000000..e83c00d0
--- /dev/null
+++ b/RE/paddlemetric/src/paddlemetrics/functional/text/rouge.py
@@ -0,0 +1,325 @@
+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import re
+from collections import Counter
+from typing import Any, Dict, List, Optional, Tuple, Union
+
+import paddleext.torchapi as B
+from paddleext.torchapi import  Tensor, tensor
+
+from paddlemetrics.utilities.imports import _NLTK_AVAILABLE
+
+ALLOWED_ROUGE_KEYS: Dict[str, Union[int, str]] = {
+    "rouge1": 1,
+    "rouge2": 2,
+    "rouge3": 3,
+    "rouge4": 4,
+    "rouge5": 5,
+    "rouge6": 6,
+    "rouge7": 7,
+    "rouge8": 8,
+    "rouge9": 9,
+    "rougeL": "L",
+    "rougeLsum": "Lsum",
+}
+
+
+def _add_newline_to_end_of_each_sentence(x: str) -> str:
+    """This was added to get rougeLsum scores matching published rougeL scores for BART and PEGASUS."""
+    if not _NLTK_AVAILABLE:
+        raise ValueError("ROUGE-Lsum calculation requires that nltk is installed. Use `pip install nltk`.")
+    import nltk
+
+    nltk.download("punkt", quiet=True, force=False)
+
+    re.sub("<n>", "", x)  # remove pegasus newline char
+    return "\n".join(nltk.sent_tokenize(x))
+
+
+def _compute_metrics(hits_or_lcs: int, pred_len: int, target_len: int) -> Dict[str, Tensor]:
+    """This computes precision, recall and F1 score based on hits/lcs, and the length of lists of tokenizer
+    predicted and target sentences.
+
+    Args:
+        hits_or_lcs:
+            A number of matches or a length of the longest common subsequence.
+        pred_len:
+            A length of a tokenized predicted sentence.
+        target_len:
+            A length of a tokenized target sentence.
+    """
+    precision = hits_or_lcs / pred_len
+    recall = hits_or_lcs / target_len
+    if precision == recall == 0.0:
+        return dict(precision=tensor(0.0), recall=tensor(0.0), fmeasure=tensor(0.0))
+
+    fmeasure = 2 * precision * recall / (precision + recall)
+    return dict(precision=tensor(precision), recall=tensor(recall), fmeasure=tensor(fmeasure))
+
+
+def _lcs(pred_tokens: List[str], target_tokens: List[str]) -> int:
+    """Common DP algorithm to compute the length of the longest common subsequence.
+
+    Args:
+        pred_tokens:
+            A tokenized predicted sentence.
+        target_toknes:
+            A tokenized target sentence.
+    """
+    LCS = [[0] * (len(pred_tokens) + 1) for _ in range(len(target_tokens) + 1)]
+    for i in range(1, len(target_tokens) + 1):
+        for j in range(1, len(pred_tokens) + 1):
+            if target_tokens[i - 1] == pred_tokens[j - 1]:
+                LCS[i][j] = LCS[i - 1][j - 1] + 1
+            else:
+                LCS[i][j] = max(LCS[i - 1][j], LCS[i][j - 1])
+    return LCS[-1][-1]
+
+
+def _normalize_and_tokenize_text(text: str, stemmer: Optional[Any] = None) -> List[str]:
+    """Rouge score should be calculated only over lowercased words and digits. Optionally, Porter stemmer can be
+    used to strip word suffixes to improve matching. The text normalization follows the implemantion from `Rouge
+    score_Text Normalizition`_
+
+    Args:
+        text:
+            An input sentence.
+        stemmer:
+            Porter stemmer instance to strip word suffixes to improve matching.
+    """
+    # Replace any non-alpha-numeric characters with spaces.
+    text = re.sub(r"[^a-z0-9]+", " ", text.lower())
+
+    tokens = re.split(r"\s+", text)
+    if stemmer:
+        # Only stem words more than 3 characters long.
+        tokens = [stemmer.stem(x) if len(x) > 3 else x for x in tokens]
+
+    # One final check to drop any empty or invalid tokens.
+    tokens = [x for x in tokens if (isinstance(x, str) and re.match(r"^[a-z0-9]+$", x))]
+
+    return tokens
+
+
+def _rouge_n_score(pred: List[str], target: List[str], n_gram: int) -> Dict[str, Tensor]:
+    """This computes precision, recall and F1 score for the Rouge-N metric.
+
+    Args:
+        pred:
+            A predicted sentence.
+        target:
+            A target sentence.
+        n_gram:
+            N-gram overlap.
+    """
+
+    def _create_ngrams(tokens: List[str], n: int) -> Counter:
+        ngrams: Counter = Counter()
+        for ngram in (tuple(tokens[i : i + n]) for i in range(len(tokens) - n + 1)):
+            ngrams[ngram] += 1
+        return ngrams
+
+    pred_ngrams, target_ngrams = _create_ngrams(pred, n_gram), _create_ngrams(target, n_gram)
+    pred_len, target_len = sum(pred_ngrams.values()), sum(target_ngrams.values())
+    if 0 in (pred_len, target_len):
+        return dict(precision=tensor(0.0), recall=tensor(0.0), fmeasure=tensor(0.0))
+
+    # It is sufficient to take a set(pred_tokenized) for hits count as we consider intersenction of pred & target
+    hits = sum(min(pred_ngrams[w], target_ngrams[w]) for w in set(pred_ngrams))
+    return _compute_metrics(hits, max(pred_len, 1), max(target_len, 1))
+
+
+def _rouge_l_score(pred: List[str], target: List[str]) -> Dict[str, Tensor]:
+    """This computes precision, recall and F1 score for the Rouge-L or Rouge-LSum metric.
+
+    Args:
+        pred:
+            A predicted sentence.
+        target:
+            A target sentence.
+    """
+    pred_len, target_len = len(pred), len(target)
+    if 0 in (pred_len, target_len):
+        return dict(precision=tensor(0.0), recall=tensor(0.0), fmeasure=tensor(0.0))
+
+    lcs = _lcs(pred, target)
+    return _compute_metrics(lcs, pred_len, target_len)
+
+
+def _rouge_score_update(
+    preds: List[str],
+    targets: List[str],
+    rouge_keys_values: List[Union[int, str]],
+    stemmer: Optional[Any] = None,
+) -> Dict[Union[int, str], List[Dict[str, Tensor]]]:
+    """Update the rouge score with the current set of predicted and target sentences.
+
+    Args:
+        preds:
+            An iterable of predicted sentences.
+        targets:
+            An iterable of target sentences.
+        rouge_keys_values:
+            List of N-grams/'L'/'Lsum' arguments.
+        stemmer:
+            Porter stemmer instance to strip word suffixes to improve matching.
+
+    Example:
+        >>> targets = "Is your name John".split()
+        >>> preds = "My name is John".split()
+        >>> from pprint import pprint
+        >>> score = _rouge_score_update(preds, targets, rouge_keys_values=[1, 2, 3, 'L'])
+        >>> pprint(score)  # doctest: +NORMALIZE_WHITESPACE +SKIP
+        {1: [{'fmeasure': tensor(0.), 'precision': tensor(0.), 'recall': tensor(0.)},
+            {'fmeasure': tensor(0.), 'precision': tensor(0.), 'recall': tensor(0.)},
+            {'fmeasure': tensor(0.), 'precision': tensor(0.), 'recall': tensor(0.)},
+            {'fmeasure': tensor(1.), 'precision': tensor(1.), 'recall': tensor(1.)}],
+        2: [{'fmeasure': tensor(0.), 'precision': tensor(0.), 'recall': tensor(0.)},
+            {'fmeasure': tensor(0.), 'precision': tensor(0.), 'recall': tensor(0.)},
+            {'fmeasure': tensor(0.), 'precision': tensor(0.), 'recall': tensor(0.)},
+            {'fmeasure': tensor(0.), 'precision': tensor(0.), 'recall': tensor(0.)}],
+        3: [{'fmeasure': tensor(0.), 'precision': tensor(0.), 'recall': tensor(0.)},
+            {'fmeasure': tensor(0.), 'precision': tensor(0.), 'recall': tensor(0.)},
+            {'fmeasure': tensor(0.), 'precision': tensor(0.), 'recall': tensor(0.)},
+            {'fmeasure': tensor(0.), 'precision': tensor(0.), 'recall': tensor(0.)}],
+        'L': [{'fmeasure': tensor(0.), 'precision': tensor(0.), 'recall': tensor(0.)},
+            {'fmeasure': tensor(0.), 'precision': tensor(0.), 'recall': tensor(0.)},
+            {'fmeasure': tensor(0.), 'precision': tensor(0.), 'recall': tensor(0.)},
+            {'fmeasure': tensor(1.), 'precision': tensor(1.), 'recall': tensor(1.)}]}
+    """
+    results: Dict[Union[int, str], List[Dict[str, Tensor]]] = {rouge_key: [] for rouge_key in rouge_keys_values}
+    for pred_raw, target_raw in zip(preds, targets):
+        pred = _normalize_and_tokenize_text(pred_raw, stemmer)
+        target = _normalize_and_tokenize_text(target_raw, stemmer)
+
+        if "Lsum" in rouge_keys_values:
+            # rougeLsum expects "\n" separated sentences within a summary
+            pred_Lsum = _normalize_and_tokenize_text(_add_newline_to_end_of_each_sentence(pred_raw), stemmer)
+            target_Lsum = _normalize_and_tokenize_text(_add_newline_to_end_of_each_sentence(target_raw), stemmer)
+
+        for rouge_key in rouge_keys_values:
+            if isinstance(rouge_key, int):
+                score = _rouge_n_score(pred, target, rouge_key)
+            else:
+                score = _rouge_l_score(
+                    pred if rouge_key != "Lsum" else pred_Lsum,
+                    target if rouge_key != "Lsum" else target_Lsum,
+                )
+            results[rouge_key].append(score)
+    return results
+
+
+def _rouge_score_compute(sentence_results: Dict[str, List[Tensor]]) -> Dict[str, Tensor]:
+    """Compute the combined ROUGE metric for all the input set of predicted and target sentences.
+
+    Args:
+        sentence_results:
+            Rouge-N/Rouge-L/Rouge-LSum metrics calculated for single sentence.
+    """
+    results: Dict[str, Tensor] = {}
+    # Obtain mean scores for individual rouge metrics
+    if sentence_results == {}:
+        return results
+
+    for rouge_key, scores in sentence_results.items():
+        results[rouge_key] = B.tensor(scores).mean()
+
+    return results
+
+
+def rouge_score(
+    preds: Union[str, List[str]],
+    targets: Union[str, List[str]],
+    use_stemmer: bool = False,
+    rouge_keys: Union[str, Tuple[str, ...]] = ("rouge1", "rouge2", "rougeL", "rougeLsum"),  # type: ignore
+) -> Dict[str, Tensor]:
+    """Calculate `Calculate Rouge Score`_ , used for automatic summarization.
+
+    Args:
+        preds:
+            An iterable of predicted sentences.
+        targets:
+            An iterable of target sentences.
+        use_stemmer:
+            Use Porter stemmer to strip word suffixes to improve matching.
+        rouge_keys:
+            A list of rouge types to calculate.
+            Keys that are allowed are ``rougeL``, ``rougeLsum``, and ``rouge1`` through ``rouge9``.
+
+    Return:
+        Python dictionary of rouge scores for each input rouge key.
+
+    Example:
+        >>> targets = "Is your name John".split()
+        >>> preds = "My name is John".split()
+        >>> from pprint import pprint
+        >>> pprint(rouge_score(preds, targets))  # doctest: +NORMALIZE_WHITESPACE +SKIP
+        {'rouge1_fmeasure': 0.25,
+         'rouge1_precision': 0.25,
+         'rouge1_recall': 0.25,
+         'rouge2_fmeasure': 0.0,
+         'rouge2_precision': 0.0,
+         'rouge2_recall': 0.0,
+         'rougeL_fmeasure': 0.25,
+         'rougeL_precision': 0.25,
+         'rougeL_recall': 0.25,
+         'rougeLsum_fmeasure': 0.25,
+         'rougeLsum_precision': 0.25,
+         'rougeLsum_recall': 0.25}
+
+    Raises:
+        ValueError:
+            If the python package ``nltk`` is not installed.
+        ValueError:
+            If any of the ``rouge_keys`` does not belong to the allowed set of keys.
+
+    References:
+        [1] ROUGE: A Package for Automatic Evaluation of Summaries by Chin-Yew Lin. https://aclanthology.org/W04-1013/
+    """
+
+    if use_stemmer:
+        if not _NLTK_AVAILABLE:
+            raise ValueError("Stemmer requires that nltk is installed. Use `pip install nltk`.")
+        import nltk
+
+    stemmer = nltk.stem.porter.PorterStemmer() if use_stemmer else None
+
+    if not isinstance(rouge_keys, tuple):
+        rouge_keys = tuple([rouge_keys])
+    for key in rouge_keys:
+        if key not in ALLOWED_ROUGE_KEYS.keys():
+            raise ValueError(f"Got unknown rouge key {key}. Expected to be one of {list(ALLOWED_ROUGE_KEYS.keys())}")
+    rouge_keys_values = [ALLOWED_ROUGE_KEYS[key] for key in rouge_keys]
+
+    if isinstance(preds, str):
+        preds = [preds]
+
+    if isinstance(targets, str):
+        targets = [targets]
+
+    sentence_results: Dict[Union[int, str], List[Dict[str, Tensor]]] = _rouge_score_update(
+        preds, targets, rouge_keys_values, stemmer=stemmer
+    )
+
+    output: Dict[str, List[Tensor]] = {}
+    for rouge_key in rouge_keys_values:
+        for type in ["fmeasure", "precision", "recall"]:
+            output[f"rouge{rouge_key}_{type}"] = []
+
+    for rouge_key, metrics in sentence_results.items():
+        for metric in metrics:
+            for type, value in metric.items():
+                output[f"rouge{rouge_key}_{type}"].append(value)
+
+    return _rouge_score_compute(output)
diff --git a/RE/paddlemetric/src/paddlemetrics/functional/text/sacre_bleu.py b/RE/paddlemetric/src/paddlemetrics/functional/text/sacre_bleu.py
new file mode 100644
index 00000000..1a59377f
--- /dev/null
+++ b/RE/paddlemetric/src/paddlemetrics/functional/text/sacre_bleu.py
@@ -0,0 +1,355 @@
+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# referenced from
+# Library Name: torchtext
+# Authors: torchtext authors and @sluks
+# Date: 2020-07-18
+# Link: https://pyB.org/text/_modules/torchtext/data/metrics.html#bleu_score
+
+##############
+
+# Copyright 2017--2018 Amazon.com, Inc. or its affiliates. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"). You may not
+# use this file except in compliance with the License. A copy of the License
+# is located at
+#
+#     http://aws.amazon.com/apache2.0/
+#
+# or in the "license" file accompanying this file. This file is distributed on
+# an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
+# express or implied. See the License for the specific language governing
+# permissions and limitations under the License.
+
+##############
+
+# MIT License
+# Copyright (c) 2017 - Shujian Huang <huangsj@nju.edu.cn>
+
+
+import re
+from typing import Sequence
+
+import paddleext.torchapi as B
+from paddleext.torchapi import  Tensor, tensor
+from typing_extensions import Literal
+
+from paddlemetrics.functional.text.bleu import _bleu_score_compute, _bleu_score_update
+from paddlemetrics.utilities.imports import _REGEX_AVAILABLE
+
+AVAILABLE_TOKENIZERS = ("none", "13a", "zh", "intl", "char")
+
+_UCODE_RANGES = (
+    ("\u3400", "\u4db5"),  # CJK Unified Ideographs Extension A, release 3.0
+    ("\u4e00", "\u9fa5"),  # CJK Unified Ideographs, release 1.1
+    ("\u9fa6", "\u9fbb"),  # CJK Unified Ideographs, release 4.1
+    ("\uf900", "\ufa2d"),  # CJK Compatibility Ideographs, release 1.1
+    ("\ufa30", "\ufa6a"),  # CJK Compatibility Ideographs, release 3.2
+    ("\ufa70", "\ufad9"),  # CJK Compatibility Ideographs, release 4.1
+    ("\u20000", "\u2a6d6"),  # (UTF16) CJK Unified Ideographs Extension B, release 3.1
+    ("\u2f800", "\u2fa1d"),  # (UTF16) CJK Compatibility Supplement, release 3.1
+    ("\uff00", "\uffef"),  # Full width ASCII, full width of English punctuation,
+    # half width Katakana, half wide half width kana, Korean alphabet
+    ("\u2e80", "\u2eff"),  # CJK Radicals Supplement
+    ("\u3000", "\u303f"),  # CJK punctuation mark
+    ("\u31c0", "\u31ef"),  # CJK stroke
+    ("\u2f00", "\u2fdf"),  # Kangxi Radicals
+    ("\u2ff0", "\u2fff"),  # Chinese character structure
+    ("\u3100", "\u312f"),  # Phonetic symbols
+    ("\u31a0", "\u31bf"),  # Phonetic symbols (Taiwanese and Hakka expansion)
+    ("\ufe10", "\ufe1f"),
+    ("\ufe30", "\ufe4f"),
+    ("\u2600", "\u26ff"),
+    ("\u2700", "\u27bf"),
+    ("\u3200", "\u32ff"),
+    ("\u3300", "\u33ff"),
+)
+
+
+class _SacreBLEUTokenizer:
+    """Tokenizer used for SacreBLEU calculation.
+
+    Source: https://github.com/mjpost/sacrebleu/tree/master/sacrebleu/tokenizers
+    """
+
+    _REGEX = (
+        # language-dependent part (assuming Western languages)
+        (re.compile(r"([\{-\~\[-\` -\&\(-\+\:-\@\/])"), r" \1 "),
+        # tokenize period and comma unless preceded by a digit
+        (re.compile(r"([^0-9])([\.,])"), r"\1 \2 "),
+        # tokenize period and comma unless followed by a digit
+        (re.compile(r"([\.,])([^0-9])"), r" \1 \2"),
+        # tokenize dash when preceded by a digit
+        (re.compile(r"([0-9])(-)"), r"\1 \2 "),
+        # one space only between words
+        # NOTE: Doing this in Python (below) is faster
+        # (re.compile(r'\s+'), r' '),
+    )
+
+    if _REGEX_AVAILABLE:
+        import regex
+
+        _INT_REGEX = (
+            # Separate out punctuations preceeded by a non-digit
+            (regex.compile(r"(\P{N})(\p{P})"), r"\1 \2 "),
+            # Separate out punctuations followed by a non-digit
+            (regex.compile(r"(\p{P})(\P{N})"), r" \1 \2"),
+            # Separate out symbols
+            (regex.compile(r"(\p{S})"), r" \1 "),
+        )
+
+    _TOKENIZE_FN = {
+        "none": "_tokenize_base",
+        "13a": "_tokenize_13a",
+        "zh": "_tokenize_zh",
+        "intl": "_tokenize_international",
+        "char": "_tokenize_char",
+    }
+
+    def __init__(self, tokenize: Literal["none", "13a", "zh", "intl", "char"], lowercase: bool = False) -> None:
+        self.tokenize_fn = getattr(self, self._TOKENIZE_FN[tokenize])
+        self.lowercase = lowercase
+
+    def __call__(self, line: str) -> Sequence[str]:
+        tokenized_line = self.tokenize_fn(line)
+        return self._lower(tokenized_line, self.lowercase).split()
+
+    @classmethod
+    def tokenize(
+        cls, line: str, tokenize: Literal["none", "13a", "zh", "intl", "char"], lowercase: bool = False
+    ) -> Sequence[str]:
+        tokenize_fn = getattr(cls, cls._TOKENIZE_FN[tokenize])
+        tokenized_line = tokenize_fn(line)
+        return cls._lower(tokenized_line, lowercase).split()
+
+    @classmethod
+    def _tokenize_regex(cls, line: str) -> str:
+        """Common post-processing tokenizer for `13a` and `zh` tokenizers.
+        Args:
+            line: a segment to tokenize
+
+        Return:
+            the tokenized line
+        """
+        for (_re, repl) in cls._REGEX:
+            line = _re.sub(repl, line)
+        # no leading or trailing spaces, single space within words
+        return " ".join(line.split())
+
+    @staticmethod
+    def _is_chinese_char(uchar: str) -> bool:
+        """
+        Args:
+            uchar: input char in unicode
+
+        Return:
+            whether the input char is a Chinese character.
+        """
+        for start, end in _UCODE_RANGES:
+            if start <= uchar <= end:
+                return True
+        return False
+
+    @classmethod
+    def _tokenize_base(cls, line: str) -> str:
+        """Tokenizes an input line with the tokenizer.
+
+        Args:
+            line: a segment to tokenize
+
+        Return:
+            the tokenized line
+        """
+        return line
+
+    @classmethod
+    def _tokenize_13a(cls, line: str) -> str:
+        """Tokenizes an input line using a relatively minimal tokenization that is however equivalent to
+        mteval-v13a, used by WMT.
+
+        Args:
+            line: input sentence
+
+        Return:
+            tokenized sentence
+        """
+        # language-independent part:
+        line = line.replace("<skipped>", "")
+        line = line.replace("-\n", "")
+        line = line.replace("\n", " ")
+
+        if "&" in line:
+            line = line.replace("&quot;", '"')
+            line = line.replace("&amp;", "&")
+            line = line.replace("&lt;", "<")
+            line = line.replace("&gt;", ">")
+
+        return cls._tokenize_regex(line)
+
+    @classmethod
+    def _tokenize_zh(cls, line: str) -> str:
+        """The tokenization of Chinese text in this script contains two
+        steps: separate each Chinese characters (by utf-8 encoding); tokenize
+        the non Chinese part (following the `13a` i.e. mteval tokenizer).
+        Author: Shujian Huang huangsj@nju.edu.cn
+
+        Args:
+            line: input sentence
+
+        Return:
+            tokenized sentence
+        """
+
+        line = line.strip()
+        line_in_chars = ""
+
+        for char in line:
+            if cls._is_chinese_char(char):
+                line_in_chars += " "
+                line_in_chars += char
+                line_in_chars += " "
+            else:
+                line_in_chars += char
+
+        return cls._tokenize_regex(line_in_chars)
+
+    @classmethod
+    def _tokenize_international(cls, line: str) -> str:
+        """Tokenizes a string following the official BLEU implementation.
+
+        See github.com/moses-smt/mosesdecoder/blob/master/scripts/generic/mteval-v14.pl#L954-L983
+
+        In our case, the input string is expected to be just one line.
+        We just tokenize on punctuation and symbols,
+        except when a punctuation is preceded and followed by a digit
+        (e.g. a comma/dot as a thousand/decimal separator).
+        We do not recover escaped forms of punctuations such as &apos; or &gt;
+        as these should never appear in MT system outputs (see issue #138)
+
+        Note that a number (e.g., a year) followed by a dot at the end of
+        sentence is NOT tokenized, i.e. the dot stays with the number because
+        `s/(\\p{P})(\\P{N})/ $1 $2/g` does not match this case (unless we add a
+        space after each sentence). However, this error is already in the
+        original mteval-v14.pl and we want to be consistent with it.
+        The error is not present in the non-international version,
+        which uses `$norm_text = " $norm_text "`.
+
+        Args:
+            line: the input string to tokenize.
+
+        Return:
+            The tokenized string.
+        """
+        for (_re, repl) in cls._INT_REGEX:
+            line = _re.sub(repl, line)
+
+        return " ".join(line.split())
+
+    @classmethod
+    def _tokenize_char(cls, line: str) -> str:
+        """Tokenizes all the characters in the input line.
+
+        Args:
+            line: a segment to tokenize
+
+        Return:
+            the tokenized line
+        """
+        return " ".join(char for char in line)
+
+    @staticmethod
+    def _lower(line: str, lowercase: bool) -> str:
+        if lowercase:
+            return line.lower()
+        return line
+
+
+def sacre_bleu_score(
+    reference_corpus: Sequence[Sequence[str]],
+    translate_corpus: Sequence[str],
+    n_gram: int = 4,
+    smooth: bool = False,
+    tokenize: Literal["none", "13a", "zh", "intl", "char"] = "13a",
+    lowercase: bool = False,
+) -> Tensor:
+    """Calculate `BLEU score`_ [1] of machine translated text with one or more references. This implementation
+    follows the behaviour of SacreBLEU [2] implementation from https://github.com/mjpost/sacrebleu.
+
+    Args:
+        reference_corpus:
+            An iterable of iterables of reference corpus
+        translate_corpus:
+            An iterable of machine translated corpus
+        n_gram:
+            Gram value ranged from 1 to 4 (Default 4)
+        smooth:
+            Whether or not to apply smoothing – see [2]
+        tokenize:
+            Tokenization technique to be used. (Default '13a')
+            Supported tokenization: ['none', '13a', 'zh', 'intl', 'char']
+        lowercase:
+            If ``True``, BLEU score over lowercased text is calculated.
+
+    Return:
+        Tensor with BLEU Score
+
+    Example:
+        >>> from paddlemetrics.functional import sacre_bleu_score
+        >>> translate_corpus = ['the cat is on the mat']
+        >>> reference_corpus = [['there is a cat on the mat', 'a cat is on the mat']]
+        >>> sacre_bleu_score(reference_corpus, translate_corpus)
+        tensor(0.7598)
+
+    References:
+        [1] BLEU: a Method for Automatic Evaluation of Machine Translation by Papineni,
+        Kishore, Salim Roukos, Todd Ward, and Wei-Jing Zhu `BLEU`_
+
+        [2] A Call for Clarity in Reporting BLEU Scores by Matt Post.
+
+        [3] Automatic Evaluation of Machine Translation Quality Using Longest Common Subsequence
+        and Skip-Bigram Statistics by Chin-Yew Lin and Franz Josef Och `Machine Translation Evolution`_
+    """
+    if tokenize not in AVAILABLE_TOKENIZERS:
+        raise ValueError(f"Argument `tokenize` expected to be one of {AVAILABLE_TOKENIZERS} but got {tokenize}.")
+
+    if tokenize not in _SacreBLEUTokenizer._TOKENIZE_FN.keys():
+        raise ValueError(
+            f"Unsupported tokenizer selected. Please, choose one of {list(_SacreBLEUTokenizer._TOKENIZE_FN.keys())}"
+        )
+    if len(translate_corpus) != len(reference_corpus):
+        raise ValueError(f"Corpus has different size {len(translate_corpus)} != {len(reference_corpus)}")
+    if tokenize == "intl" and not _REGEX_AVAILABLE:
+        raise ValueError(
+            "`'intl'` tokenization requires `regex` installed. Use `pip install regex` or `pip install "
+            "paddlemetrics[text]`."
+        )
+
+    reference_corpus_: Sequence[Sequence[Sequence[str]]] = [
+        [_SacreBLEUTokenizer.tokenize(line, tokenize, lowercase) for line in reference]
+        for reference in reference_corpus
+    ]
+    translate_corpus_: Sequence[Sequence[str]] = [
+        _SacreBLEUTokenizer.tokenize(line, tokenize, lowercase) for line in translate_corpus
+    ]
+
+    numerator = B.zeros(n_gram)
+    denominator = B.zeros(n_gram)
+    trans_len = tensor(0, dtype=B.float)
+    ref_len = tensor(0, dtype=B.float)
+
+    trans_len, ref_len = _bleu_score_update(
+        reference_corpus_, translate_corpus_, numerator, denominator, trans_len, ref_len, n_gram
+    )
+
+    return _bleu_score_compute(trans_len, ref_len, numerator, denominator, n_gram, smooth)
diff --git a/RE/paddlemetric/src/paddlemetrics/functional/text/wer.py b/RE/paddlemetric/src/paddlemetrics/functional/text/wer.py
new file mode 100644
index 00000000..4cd19b05
--- /dev/null
+++ b/RE/paddlemetric/src/paddlemetrics/functional/text/wer.py
@@ -0,0 +1,114 @@
+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from typing import List, Optional, Tuple, Union
+from warnings import warn
+
+import paddleext.torchapi as B
+from paddleext.torchapi import  Tensor, tensor
+
+
+def _edit_distance(prediction_tokens: List[str], reference_tokens: List[str]) -> int:
+    """Standard dynamic programming algorithm to compute the edit distance.
+
+    Args:
+        prediction_tokens: A tokenized predicted sentence
+        reference_tokens: A tokenized reference sentence
+
+    Returns:
+        (int) Edit distance between the predicted sentence and the reference sentence
+    """
+    dp = [[0] * (len(reference_tokens) + 1) for _ in range(len(prediction_tokens) + 1)]
+    for i in range(len(prediction_tokens) + 1):
+        dp[i][0] = i
+    for j in range(len(reference_tokens) + 1):
+        dp[0][j] = j
+    for i in range(1, len(prediction_tokens) + 1):
+        for j in range(1, len(reference_tokens) + 1):
+            if prediction_tokens[i - 1] == reference_tokens[j - 1]:
+                dp[i][j] = dp[i - 1][j - 1]
+            else:
+                dp[i][j] = min(dp[i - 1][j], dp[i][j - 1], dp[i - 1][j - 1]) + 1
+    return dp[-1][-1]
+
+
+def _wer_update(
+    predictions: Union[str, List[str]],
+    references: Union[str, List[str]],
+) -> Tuple[Tensor, Tensor]:
+    """Update the wer score with the current set of references and predictions.
+
+    Args:
+        predictions: Transcription(s) to score as a string or list of strings
+        references: Reference(s) for each speech input as a string or list of strings
+
+    Returns:
+        (Tensor) Number of edit operations to get from the reference to the prediction, summed over all samples
+        (Tensor) Number of words over all references
+    """
+    if isinstance(predictions, str):
+        predictions = [predictions]
+    if isinstance(references, str):
+        references = [references]
+    errors = tensor(0, dtype=B.float)
+    total = tensor(0, dtype=B.float)
+    for prediction, reference in zip(predictions, references):
+        prediction_tokens = prediction.split()
+        reference_tokens = reference.split()
+        errors += _edit_distance(prediction_tokens, reference_tokens)
+        total += len(reference_tokens)
+    return errors, total
+
+
+def _wer_compute(errors: Tensor, total: Tensor) -> Tensor:
+    """Compute the word error rate.
+
+    Args:
+        errors: Number of edit operations to get from the reference to the prediction, summed over all samples
+        total: Number of words over all references
+
+    Returns:
+        (Tensor) Word error rate
+    """
+    return errors / total
+
+
+def wer(
+    predictions: Union[str, List[str]],
+    references: Union[str, List[str]],
+    concatenate_texts: Optional[bool] = None,  # TODO: remove in v0.7
+) -> Tensor:
+    """Word error rate (WER_) is a common metric of the performance of an automatic speech recognition system. This
+    value indicates the percentage of words that were incorrectly predicted. The lower the value, the better the
+    performance of the ASR system with a WER of 0 being a perfect score.
+
+    Args:
+        predictions: Transcription(s) to score as a string or list of strings
+        references: Reference(s) for each speech input as a string or list of strings
+        concatenate_texts: Whether to concatenate all input texts or compute WER iteratively
+            This argument is deprecated in v0.6 and it will be removed in v0.7.
+
+    Returns:
+        (Tensor) Word error rate
+
+    Examples:
+        >>> predictions = ["this is the prediction", "there is an other sample"]
+        >>> references = ["this is the reference", "there is another one"]
+        >>> wer(predictions=predictions, references=references)
+        tensor(0.5000)
+    """
+    if concatenate_texts is not None:
+        warn("`concatenate_texts` has been deprecated in v0.6 and it will be removed in v0.7", DeprecationWarning)
+    errors, total = _wer_update(predictions, references)
+    return _wer_compute(errors, total)
diff --git a/RE/paddlemetric/src/paddlemetrics/image/__init__.py b/RE/paddlemetric/src/paddlemetrics/image/__init__.py
new file mode 100644
index 00000000..c3fb3568
--- /dev/null
+++ b/RE/paddlemetric/src/paddlemetrics/image/__init__.py
@@ -0,0 +1,19 @@
+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#from paddlemetrics.image.fid import FID  # noqa: F401
+from paddlemetrics.image.inception import IS  # noqa: F401
+from paddlemetrics.image.kid import KID  # noqa: F401
+from paddlemetrics.image.lpip_similarity import LPIPS  # noqa: F401
+from paddlemetrics.image.psnr import PSNR  # noqa: F401
+from paddlemetrics.image.ssim import SSIM  # noqa: F401
diff --git a/RE/paddlemetric/src/paddlemetrics/image/fid.py b/RE/paddlemetric/src/paddlemetrics/image/fid.py
new file mode 100644
index 00000000..6f2965db
--- /dev/null
+++ b/RE/paddlemetric/src/paddlemetrics/image/fid.py
@@ -0,0 +1,283 @@
+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from typing import Any, Callable, List, Optional, Union
+
+import numpy as np
+import paddleext.torchapi as B
+from paddleext.torchapi import Tensor
+from paddleext.torchapi.autograd import Function
+
+from paddlemetrics.metric import Metric
+from paddlemetrics.utilities import rank_zero_info, rank_zero_warn
+from paddlemetrics.utilities.data import dim_zero_cat
+from paddlemetrics.utilities.imports import _SCIPY_AVAILABLE, _TORCH_FIDELITY_AVAILABLE
+
+if _TORCH_FIDELITY_AVAILABLE:
+    from torch_fidelity.feature_extractor_inceptionv3 import FeatureExtractorInceptionV3
+else:
+
+    class FeatureExtractorInceptionV3(B.nn.Module):  # type: ignore
+        pass
+
+
+if _SCIPY_AVAILABLE:
+    import scipy
+
+
+class NoTrainInceptionV3(FeatureExtractorInceptionV3):
+    def __init__(
+        self,
+        name: str,
+        features_list: List[str],
+        feature_extractor_weights_path: Optional[str] = None,
+    ) -> None:
+        super().__init__(name, features_list, feature_extractor_weights_path)
+        # put into evaluation mode
+        self.eval()
+
+    def train(self, mode: bool) -> "NoTrainInceptionV3":
+        """the inception network should not be able to be switched away from evaluation mode."""
+        return super().train(False)
+
+    def forward(self, x: Tensor) -> Tensor:
+        out = super().forward(x)
+        return out[0].reshape(x.shape[0], -1)
+
+
+class MatrixSquareRoot(Function):
+    """Square root of a positive definite matrix.
+
+    All credit to:     `Square Root of a Positive Definite Matrix`_
+    """
+
+    @staticmethod
+    def forward(ctx: Any, input_data: Tensor) -> Tensor:
+        # TODO: update whenever pytorch gets an matrix square root function
+        # Issue: https://github.com/pytorch/pytorch/issues/9983
+        m = input_data.detach().cpu().numpy().astype(np.float_)
+        scipy_res, _ = scipy.linalg.sqrtm(m, disp=False)
+        sqrtm = B.from_numpy(scipy_res.real).to(input_data)
+        ctx.save_for_backward(sqrtm)
+        return sqrtm
+
+    @staticmethod
+    def backward(ctx: Any, grad_output: Tensor) -> Tensor:
+        grad_input = None
+        if ctx.needs_input_grad[0]:
+            (sqrtm,) = ctx.saved_tensors
+            sqrtm = sqrtm.data.cpu().numpy().astype(np.float_)
+            gm = grad_output.data.cpu().numpy().astype(np.float_)
+
+            # Given a positive semi-definite matrix X,
+            # since X = X^{1/2}X^{1/2}, we can compute the gradient of the
+            # matrix square root dX^{1/2} by solving the Sylvester equation:
+            # dX = (d(X^{1/2})X^{1/2} + X^{1/2}(dX^{1/2}).
+            grad_sqrtm = scipy.linalg.solve_sylvester(sqrtm, sqrtm, gm)
+
+            grad_input = B.from_numpy(grad_sqrtm).to(grad_output)
+        return grad_input
+
+
+sqrtm = MatrixSquareRoot.apply
+
+
+def _compute_fid(mu1: Tensor, sigma1: Tensor, mu2: Tensor, sigma2: Tensor, eps: float = 1e-6) -> Tensor:
+    r"""
+    Adjusted version of `Fid Score`_
+
+    The Frechet Inception Distance between two multivariate Gaussians X_x ~ N(mu_1, sigm_1)
+    and X_y ~ N(mu_2, sigm_2) is d^2 = ||mu_1 - mu_2||^2 + Tr(sigm_1 + sigm_2 - 2*sqrt(sigm_1*sigm_2)).
+
+    Args:
+        mu1: mean of activations calculated on predicted (x) samples
+        sigma1: covariance matrix over activations calculated on predicted (x) samples
+        mu2: mean of activations calculated on target (y) samples
+        sigma2: covariance matrix over activations calculated on target (y) samples
+        eps: offset constant. used if sigma_1 @ sigma_2 matrix is singular
+
+    Returns:
+        Scalar value of the distance between sets.
+    """
+    diff = mu1 - mu2
+
+    covmean = sqrtm(sigma1.mm(sigma2))
+    # Product might be almost singular
+    if not B.isfinite(covmean).all():
+        rank_zero_info(f"FID calculation produces singular product; adding {eps} to diagonal of covariance estimates")
+        offset = B.eye(sigma1.size(0), device=mu1.device, dtype=mu1.dtype) * eps
+        covmean = sqrtm((sigma1 + offset).mm(sigma2 + offset))
+
+    tr_covmean = B.trace(covmean)
+    return diff.dot(diff) + B.trace(sigma1) + B.trace(sigma2) - 2 * tr_covmean
+
+
+class FID(Metric):
+    r"""
+    Calculates Fréchet inception distance (FID_) which is used to access the quality of generated images. Given by
+
+    .. math::
+        FID = |\mu - \mu_w| + tr(\Sigma + \Sigma_w - 2(\Sigma \Sigma_w)^{\frac{1}{2}})
+
+    where :math:`\mathcal{N}(\mu, \Sigma)` is the multivariate normal distribution estimated from Inception v3 [1]
+    features calculated on real life images and :math:`\mathcal{N}(\mu_w, \Sigma_w)` is the multivariate normal
+    distribution estimated from Inception v3 features calculated on generated (fake) images. The metric was
+    originally proposed in [1].
+
+    Using the default feature extraction (Inception v3 using the original weights from [2]), the input is
+    expected to be mini-batches of 3-channel RGB images of shape (3 x H x W) with dtype uint8. All images
+    will be resized to 299 x 299 which is the size of the original training data. The boolian flag ``real``
+    determines if the images should update the statistics of the real distribution or the fake distribution.
+
+    .. note:: using this metrics requires you to have ``scipy`` install. Either install as ``pip install
+        paddlemetrics[image]`` or ``pip install scipy``
+
+    .. note:: using this metric with the default feature extractor requires that ``torch-fidelity``
+        is installed. Either install as ``pip install paddlemetrics[image]`` or
+        ``pip install torch-fidelity``
+
+    .. note:: the ``forward`` method can be used but ``compute_on_step`` is disabled by default (oppesit of
+        all other metrics) as this metric does not really make sense to calculate on a single batch. This
+        means that by default ``forward`` will just call ``update`` underneat.
+
+    Args:
+        feature:
+            Either an integer or ``nn.Module``:
+
+            - an integer will indicate the inceptionv3 feature layer to choose. Can be one of the following:
+              64, 192, 768, 2048
+            - an ``nn.Module`` for using a custom feature extractor. Expects that its forward method returns
+              an ``[N,d]`` matrix where ``N`` is the batch size and ``d`` is the feature size.
+
+        compute_on_step:
+            Forward only calls ``update()`` and return ``None`` if this is set to ``False``.
+        dist_sync_on_step:
+            Synchronize metric state across processes at each ``forward()``
+            before returning the value at the step
+        process_group:
+            Specify the process group on which synchronization is called.
+            default: ``None`` (which selects the entire world)
+        dist_sync_fn:
+            Callback that performs the allgather operation on the metric state. When ``None``, DDP
+            will be used to perform the allgather
+
+    References:
+        [1] Rethinking the Inception Architecture for Computer Vision
+        Christian Szegedy, Vincent Vanhoucke, Sergey Ioffe, Jonathon Shlens, Zbigniew Wojna
+        https://arxiv.org/abs/1512.00567
+
+        [2] GANs Trained by a Two Time-Scale Update Rule Converge to a Local Nash Equilibrium,
+        Martin Heusel, Hubert Ramsauer, Thomas Unterthiner, Bernhard Nessler, Sepp Hochreiter
+        https://arxiv.org/abs/1706.08500
+
+    Raises:
+        ValueError:
+            If ``feature`` is set to an ``int`` (default settings) and ``torch-fidelity`` is not installed
+        ValueError:
+            If ``feature`` is set to an ``int`` not in [64, 192, 768, 2048]
+        TypeError:
+            If ``feature`` is not an ``str``, ``int`` or ``B.nn.Module``
+
+    Example:
+        >>> import torchapi as B
+        >>> _ = B.manual_seed(123)
+        >>> from paddlemetrics import FID
+        >>> fid = FID(feature=64)  # doctest: +SKIP
+        >>> # generate two slightly overlapping image intensity distributions
+        >>> imgs_dist1 = B.randint(0, 200, (100, 3, 299, 299), dtype=B.uint8)  # doctest: +SKIP
+        >>> imgs_dist2 = B.randint(100, 255, (100, 3, 299, 299), dtype=B.uint8)  # doctest: +SKIP
+        >>> fid.update(imgs_dist1, real=True)  # doctest: +SKIP
+        >>> fid.update(imgs_dist2, real=False)  # doctest: +SKIP
+        >>> fid.compute()  # doctest: +SKIP
+        tensor(12.7202)
+
+    """
+    real_features: List[Tensor]
+    fake_features: List[Tensor]
+
+    def __init__(
+        self,
+        feature: Union[int, B.nn.Module] = 2048,
+        compute_on_step: bool = False,
+        dist_sync_on_step: bool = False,
+        process_group: Optional[Any] = None,
+        dist_sync_fn: Callable[[Tensor], List[Tensor]] = None,
+    ) -> None:
+        super().__init__(
+            compute_on_step=compute_on_step,
+            dist_sync_on_step=dist_sync_on_step,
+            process_group=process_group,
+            dist_sync_fn=dist_sync_fn,
+        )
+
+        rank_zero_warn(
+            "Metric `FID` will save all extracted features in buffer."
+            " For large datasets this may lead to large memory footprint.",
+            UserWarning,
+        )
+
+        if isinstance(feature, int):
+            if not _TORCH_FIDELITY_AVAILABLE:
+                raise ValueError(
+                    "FID metric requires that Torch-fidelity is installed."
+                    "Either install as `pip install paddlemetrics[image]` or `pip install torch-fidelity`"
+                )
+            valid_int_input = [64, 192, 768, 2048]
+            if feature not in valid_int_input:
+                raise ValueError(
+                    f"Integer input to argument `feature` must be one of {valid_int_input}, but got {feature}."
+                )
+
+            self.inception = NoTrainInceptionV3(name="inception-v3-compat", features_list=[str(feature)])
+        elif isinstance(feature, B.nn.Module):
+            self.inception = feature
+        else:
+            raise TypeError("Got unknown input to argument `feature`")
+
+        self.add_state("real_features", [], dist_reduce_fx=None)
+        self.add_state("fake_features", [], dist_reduce_fx=None)
+
+    def update(self, imgs: Tensor, real: bool) -> None:  # type: ignore
+        """Update the state with extracted features.
+
+        Args:
+            imgs: tensor with images feed to the feature extractor
+            real: bool indicating if imgs belong to the real or the fake distribution
+        """
+        features = self.inception(imgs)
+
+        if real:
+            self.real_features.append(features)
+        else:
+            self.fake_features.append(features)
+
+    def compute(self) -> Tensor:
+        """Calculate FID score based on accumulated extracted features from the two distributions."""
+        real_features = dim_zero_cat(self.real_features)
+        fake_features = dim_zero_cat(self.fake_features)
+        # computation is extremely sensitive so it needs to happen in double precision
+        orig_dtype = real_features.dtype
+        real_features = real_features.double()
+        fake_features = fake_features.double()
+
+        # calculate mean and covariance
+        n = real_features.shape[0]
+        mean1 = real_features.mean(dim=0)
+        mean2 = fake_features.mean(dim=0)
+        diff1 = real_features - mean1
+        diff2 = fake_features - mean2
+        cov1 = 1.0 / (n - 1) * diff1.t().mm(diff1)
+        cov2 = 1.0 / (n - 1) * diff2.t().mm(diff2)
+
+        # compute fid
+        return _compute_fid(mean1, cov1, mean2, cov2).to(orig_dtype)
diff --git a/RE/paddlemetric/src/paddlemetrics/image/inception.py b/RE/paddlemetric/src/paddlemetrics/image/inception.py
new file mode 100644
index 00000000..6c05b9a4
--- /dev/null
+++ b/RE/paddlemetric/src/paddlemetrics/image/inception.py
@@ -0,0 +1,179 @@
+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from typing import Any, Callable, List, Optional, Tuple, Union
+
+import paddleext.torchapi as B
+from paddleext.torchapi import  Tensor
+
+#from paddlemetrics.image.fid import NoTrainInceptionV3
+from paddlemetrics.metric import Metric
+from paddlemetrics.utilities import rank_zero_warn
+from paddlemetrics.utilities.data import dim_zero_cat
+from paddlemetrics.utilities.imports import _TORCH_FIDELITY_AVAILABLE
+
+
+class IS(Metric):
+    r"""
+    Calculates the Inception Score (IS) which is used to access how realistic generated images are.
+    It is defined as
+
+    .. math::
+        IS = exp(\mathbb{E}_x KL(p(y | x ) || p(y)))
+
+    where :math:`KL(p(y | x) || p(y))` is the KL divergence between the conditional distribution :math:`p(y|x)`
+    and the margianl distribution :math:`p(y)`. Both the conditional and marginal distribution is calculated
+    from features extracted from the images. The score is calculated on random splits of the images such that
+    both a mean and standard deviation of the score are returned. The metric was originally proposed in [1].
+
+    Using the default feature extraction (Inception v3 using the original weights from [2]), the input is
+    expected to be mini-batches of 3-channel RGB images of shape (3 x H x W) with dtype uint8. All images
+    will be resized to 299 x 299 which is the size of the original training data.
+
+    .. note:: using this metric with the default feature extractor requires that ``torch-fidelity``
+        is installed. Either install as ``pip install paddlemetrics[image]`` or
+        ``pip install torch-fidelity``
+
+    .. note:: the ``forward`` method can be used but ``compute_on_step`` is disabled by default (oppesit of
+        all other metrics) as this metric does not really make sense to calculate on a single batch. This
+        means that by default ``forward`` will just call ``update`` underneat.
+
+    Args:
+        feature:
+            Either an str, integer or ``nn.Module``:
+
+            - an str or integer will indicate the inceptionv3 feature layer to choose. Can be one of the following:
+              'logits_unbiased', 64, 192, 768, 2048
+            - an ``nn.Module`` for using a custom feature extractor. Expects that its forward method returns
+              an ``[N,d]`` matrix where ``N`` is the batch size and ``d`` is the feature size.
+
+        splits: integer determining how many splits the inception score calculation should be split among
+
+        compute_on_step:
+            Forward only calls ``update()`` and return ``None`` if this is set to ``False``.
+        dist_sync_on_step:
+            Synchronize metric state across processes at each ``forward()``
+            before returning the value at the step
+        process_group:
+            Specify the process group on which synchronization is called.
+            default: ``None`` (which selects the entire world)
+        dist_sync_fn:
+            Callback that performs the allgather operation on the metric state. When ``None``, DDP
+            will be used to perform the allgather
+
+    References:
+        [1] Improved Techniques for Training GANs
+        Tim Salimans, Ian Goodfellow, Wojciech Zaremba, Vicki Cheung, Alec Radford, Xi Chen
+        https://arxiv.org/abs/1606.03498
+
+        [2] GANs Trained by a Two Time-Scale Update Rule Converge to a Local Nash Equilibrium,
+        Martin Heusel, Hubert Ramsauer, Thomas Unterthiner, Bernhard Nessler, Sepp Hochreiter
+        https://arxiv.org/abs/1706.08500
+
+    Raises:
+        ValueError:
+            If ``feature`` is set to an ``str`` or ``int`` and ``torch-fidelity`` is not installed
+        ValueError:
+            If ``feature`` is set to an ``str`` or ``int`` and not one of ['logits_unbiased', 64, 192, 768, 2048]
+        TypeError:
+            If ``feature`` is not an ``str``, ``int`` or ``B.nn.Module``
+
+    Example:
+        >>> import torchapi as B
+        >>> _ = B.manual_seed(123)
+        >>> from paddlemetrics import IS
+        >>> inception = IS()  # doctest: +SKIP
+        >>> # generate some images
+        >>> imgs = B.randint(0, 255, (100, 3, 299, 299), dtype=B.uint8)  # doctest: +SKIP
+        >>> inception.update(imgs)  # doctest: +SKIP
+        >>> inception.compute()  # doctest: +SKIP
+        (tensor(1.0569), tensor(0.0113))
+
+    """
+    features: List
+
+    def __init__(
+        self,
+        feature: Union[str, int, B.nn.Module] = "logits_unbiased",
+        splits: int = 10,
+        compute_on_step: bool = False,
+        dist_sync_on_step: bool = False,
+        process_group: Optional[Any] = None,
+        dist_sync_fn: Callable[[Tensor], List[Tensor]] = None,
+    ) -> None:
+        super().__init__(
+            compute_on_step=compute_on_step,
+            dist_sync_on_step=dist_sync_on_step,
+            process_group=process_group,
+            dist_sync_fn=dist_sync_fn,
+        )
+
+        rank_zero_warn(
+            "Metric `IS` will save all extracted features in buffer."
+            " For large datasets this may lead to large memory footprint.",
+            UserWarning,
+        )
+
+        if isinstance(feature, (str, int)):
+            if not _TORCH_FIDELITY_AVAILABLE:
+                raise ValueError(
+                    "IS metric requires that Torch-fidelity is installed."
+                    "Either install as `pip install paddlemetrics[image]`"
+                    " or `pip install torch-fidelity`"
+                )
+            valid_int_input = ("logits_unbiased", 64, 192, 768, 2048)
+            if feature not in valid_int_input:
+                raise ValueError(
+                    f"Integer input to argument `feature` must be one of {valid_int_input}," f" but got {feature}."
+                )
+
+            self.inception = NoTrainInceptionV3(name="inception-v3-compat", features_list=[str(feature)])
+        elif isinstance(feature, B.nn.Module):
+            self.inception = feature
+        else:
+            raise TypeError("Got unknown input to argument `feature`")
+
+        self.splits = splits
+        self.add_state("features", [], dist_reduce_fx=None)
+
+    def update(self, imgs: Tensor) -> None:  # type: ignore
+        """Update the state with extracted features.
+
+        Args:
+            imgs: tensor with images feed to the feature extractor
+        """
+        features = self.inception(imgs)
+        self.features.append(features)
+
+    def compute(self) -> Tuple[Tensor, Tensor]:
+        features = dim_zero_cat(self.features)
+        # random permute the features
+        idx = B.randperm(features.shape[0])
+        features = features[idx]
+
+        # calculate probs and logits
+        prob = features.softmax(dim=1)
+        log_prob = features.log_softmax(dim=1)
+
+        # split into groups
+        prob = prob.chunk(self.splits, dim=0)
+        log_prob = log_prob.chunk(self.splits, dim=0)
+
+        # calculate score per split
+        mean_prob = [p.mean(dim=0, keepdim=True) for p in prob]
+        kl_ = [p * (log_p - m_p.log()) for p, log_p, m_p in zip(prob, log_prob, mean_prob)]
+        kl_ = [k.sum(dim=1).mean().exp() for k in kl_]
+        kl = B.stack(kl_)
+
+        # return mean and std
+        return kl.mean(), kl.std()
diff --git a/RE/paddlemetric/src/paddlemetrics/image/kid.py b/RE/paddlemetric/src/paddlemetrics/image/kid.py
new file mode 100644
index 00000000..2f3d3a6b
--- /dev/null
+++ b/RE/paddlemetric/src/paddlemetrics/image/kid.py
@@ -0,0 +1,277 @@
+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from typing import Any, Callable, List, Optional, Tuple, Union
+
+import paddleext.torchapi as B
+from paddleext.torchapi import Tensor
+from paddleext.torchapi.nn import Module
+
+from paddlemetrics.image.fid import NoTrainInceptionV3
+from paddlemetrics.metric import Metric
+from paddlemetrics.utilities import rank_zero_warn
+from paddlemetrics.utilities.data import dim_zero_cat
+from paddlemetrics.utilities.imports import _TORCH_FIDELITY_AVAILABLE
+
+
+def maximum_mean_discrepancy(k_xx: Tensor, k_xy: Tensor, k_yy: Tensor) -> Tensor:
+    """Adapted from `KID Score`_"""
+    m = k_xx.shape[0]
+
+    diag_x = B.diag(k_xx)
+    diag_y = B.diag(k_yy)
+
+    kt_xx_sums = k_xx.sum(dim=-1) - diag_x
+    kt_yy_sums = k_yy.sum(dim=-1) - diag_y
+    k_xy_sums = k_xy.sum(dim=0)
+
+    kt_xx_sum = kt_xx_sums.sum()
+    kt_yy_sum = kt_yy_sums.sum()
+    k_xy_sum = k_xy_sums.sum()
+
+    value = (kt_xx_sum + kt_yy_sum) / (m * (m - 1))
+    value -= 2 * k_xy_sum / (m ** 2)
+    return value
+
+
+def poly_kernel(f1: Tensor, f2: Tensor, degree: int = 3, gamma: Optional[float] = None, coef: float = 1.0) -> Tensor:
+    """Adapted from `KID Score`_"""
+    if gamma is None:
+        gamma = 1.0 / f1.shape[1]
+    kernel = (f1 @ f2.T * gamma + coef) ** degree
+    return kernel
+
+
+def poly_mmd(
+    f_real: Tensor, f_fake: Tensor, degree: int = 3, gamma: Optional[float] = None, coef: float = 1.0
+) -> Tensor:
+    """Adapted from `KID Score`_"""
+    k_11 = poly_kernel(f_real, f_real, degree, gamma, coef)
+    k_22 = poly_kernel(f_fake, f_fake, degree, gamma, coef)
+    k_12 = poly_kernel(f_real, f_fake, degree, gamma, coef)
+    return maximum_mean_discrepancy(k_11, k_12, k_22)
+
+
+class KID(Metric):
+    r"""
+    Calculates Kernel Inception Distance (KID) which is used to access the quality of generated images. Given by
+
+    .. math::
+        KID = MMD(f_{real}, f_{fake})^2
+
+    where :math:`MMD` is the maximum mean discrepancy and :math:`I_{real}, I_{fake}` are extracted features
+    from real and fake images, see [1] for more details. In particular, calculating the MMD requires the
+    evaluation of a polynomial kernel function :math:`k`
+
+    .. math::
+        k(x,y) = (\gamma * x^T y + coef)^{degree}
+
+    which controls the distance between two features. In practise the MMD is calculated over a number of
+    subsets to be able to both get the mean and standard deviation of KID.
+
+    Using the default feature extraction (Inception v3 using the original weights from [2]), the input is
+    expected to be mini-batches of 3-channel RGB images of shape (3 x H x W) with dtype uint8. All images
+    will be resized to 299 x 299 which is the size of the original training data.
+
+    .. note:: using this metric with the default feature extractor requires that ``torch-fidelity``
+        is installed. Either install as ``pip install paddlemetrics[image]`` or
+        ``pip install torch-fidelity``
+
+    .. note:: the ``forward`` method can be used but ``compute_on_step`` is disabled by default (oppesit of
+        all other metrics) as this metric does not really make sense to calculate on a single batch. This
+        means that by default ``forward`` will just call ``update`` underneat.
+
+    Args:
+        feature:
+            Either an str, integer or ``nn.Module``:
+
+            - an str or integer will indicate the inceptionv3 feature layer to choose. Can be one of the following:
+              'logits_unbiased', 64, 192, 768, 2048
+            - an ``nn.Module`` for using a custom feature extractor. Expects that its forward method returns
+              an ``[N,d]`` matrix where ``N`` is the batch size and ``d`` is the feature size.
+
+        subsets:
+            Number of subsets to calculate the mean and standard deviation scores over
+        subset_size:
+            Number of randomly picked samples in each subset
+        degree:
+            Degree of the polynomial kernel function
+        gamma:
+            Scale-length of polynomial kernel. If set to ``None`` will be automatically set to the feature size
+        coef:
+            Bias term in the polynomial kernel.
+        compute_on_step:
+            Forward only calls ``update()`` and return ``None`` if this is set to ``False``.
+        dist_sync_on_step:
+            Synchronize metric state across processes at each ``forward()``
+            before returning the value at the step
+        process_group:
+            Specify the process group on which synchronization is called.
+            default: ``None`` (which selects the entire world)
+        dist_sync_fn:
+            Callback that performs the allgather operation on the metric state. When ``None``, DDP
+            will be used to perform the allgather
+
+    References:
+        [1] Demystifying MMD GANs
+        Mikołaj Bińkowski, Danica J. Sutherland, Michael Arbel, Arthur Gretton
+        https://arxiv.org/abs/1801.01401
+
+        [2] GANs Trained by a Two Time-Scale Update Rule Converge to a Local Nash Equilibrium,
+        Martin Heusel, Hubert Ramsauer, Thomas Unterthiner, Bernhard Nessler, Sepp Hochreiter
+        https://arxiv.org/abs/1706.08500
+
+    Raises:
+        ValueError:
+            If ``feature`` is set to an ``int`` (default settings) and ``torch-fidelity`` is not installed
+        ValueError:
+            If ``feature`` is set to an ``int`` not in [64, 192, 768, 2048]
+        ValueError:
+            If ``subsets`` is not an integer larger than 0
+        ValueError:
+            If ``subset_size`` is not an integer larger than 0
+        ValueError:
+            If ``degree`` is not an integer larger than 0
+        ValueError:
+            If ``gamma`` is niether ``None`` or a float larger than 0
+        ValueError:
+            If ``coef`` is not an float larger than 0
+
+    Example:
+        >>> import torchapi as B
+        >>> _ = B.manual_seed(123)
+        >>> from paddlemetrics import KID
+        >>> kid = KID(subset_size=50)  # doctest: +SKIP
+        >>> # generate two slightly overlapping image intensity distributions
+        >>> imgs_dist1 = B.randint(0, 200, (100, 3, 299, 299), dtype=B.uint8)  # doctest: +SKIP
+        >>> imgs_dist2 = B.randint(100, 255, (100, 3, 299, 299), dtype=B.uint8)  # doctest: +SKIP
+        >>> kid.update(imgs_dist1, real=True)  # doctest: +SKIP
+        >>> kid.update(imgs_dist2, real=False)  # doctest: +SKIP
+        >>> kid_mean, kid_std = kid.compute()  # doctest: +SKIP
+        >>> print((kid_mean, kid_std))  # doctest: +SKIP
+        (tensor(0.0338), tensor(0.0025))
+
+    """
+    real_features: List[Tensor]
+    fake_features: List[Tensor]
+
+    def __init__(
+        self,
+        feature: Union[str, int, B.nn.Module] = 2048,
+        subsets: int = 100,
+        subset_size: int = 1000,
+        degree: int = 3,
+        gamma: Optional[float] = None,  # type: ignore
+        coef: float = 1.0,
+        compute_on_step: bool = False,
+        dist_sync_on_step: bool = False,
+        process_group: Optional[Any] = None,
+        dist_sync_fn: Callable = None,
+    ) -> None:
+        super().__init__(
+            compute_on_step=compute_on_step,
+            dist_sync_on_step=dist_sync_on_step,
+            process_group=process_group,
+            dist_sync_fn=dist_sync_fn,
+        )
+
+        rank_zero_warn(
+            "Metric `KID` will save all extracted features in buffer."
+            " For large datasets this may lead to large memory footprint.",
+            UserWarning,
+        )
+
+        if isinstance(feature, (str, int)):
+            if not _TORCH_FIDELITY_AVAILABLE:
+                raise RuntimeError(
+                    "KID metric requires that Torch-fidelity is installed."
+                    " Either install as `pip install paddlemetrics[image]`"
+                    " or `pip install torch-fidelity`"
+                )
+            valid_int_input = ("logits_unbiased", 64, 192, 768, 2048)
+            if feature not in valid_int_input:
+                raise ValueError(
+                    f"Integer input to argument `feature` must be one of {valid_int_input}," f" but got {feature}."
+                )
+
+            self.inception: Module = NoTrainInceptionV3(name="inception-v3-compat", features_list=[str(feature)])
+        elif isinstance(feature, Module):
+            self.inception = feature
+        else:
+            raise TypeError("Got unknown input to argument `feature`")
+
+        if not (isinstance(subsets, int) and subsets > 0):
+            raise ValueError("Argument `subsets` expected to be integer larger than 0")
+        self.subsets = subsets
+
+        if not (isinstance(subset_size, int) and subset_size > 0):
+            raise ValueError("Argument `subset_size` expected to be integer larger than 0")
+        self.subset_size = subset_size
+
+        if not (isinstance(degree, int) and degree > 0):
+            raise ValueError("Argument `degree` expected to be integer larger than 0")
+        self.degree = degree
+
+        if gamma is not None and not (isinstance(gamma, float) and gamma > 0):
+            raise ValueError("Argument `gamma` expected to be `None` or float larger than 0")
+        self.gamma = gamma
+
+        if not (isinstance(coef, float) and coef > 0):
+            raise ValueError("Argument `coef` expected to be float larger than 0")
+        self.coef = coef
+
+        # states for extracted features
+        self.add_state("real_features", [], dist_reduce_fx=None)
+        self.add_state("fake_features", [], dist_reduce_fx=None)
+
+    def update(self, imgs: Tensor, real: bool) -> None:  # type: ignore
+        """Update the state with extracted features.
+
+        Args:
+            imgs: tensor with images feed to the feature extractor
+            real: bool indicating if imgs belong to the real or the fake distribution
+        """
+        features = self.inception(imgs)
+
+        if real:
+            self.real_features.append(features)
+        else:
+            self.fake_features.append(features)
+
+    def compute(self) -> Tuple[Tensor, Tensor]:
+        """Calculate KID score based on accumulated extracted features from the two distributions. Returns a tuple
+        of mean and standard deviation of KID scores calculated on subsets of extracted features.
+
+        Implementation inspired by `Fid Score`_
+        """
+        real_features = dim_zero_cat(self.real_features)
+        fake_features = dim_zero_cat(self.fake_features)
+
+        n_samples_real = real_features.shape[0]
+        if n_samples_real < self.subset_size:
+            raise ValueError("Argument `subset_size` should be smaller than the number of samples")
+        n_samples_fake = fake_features.shape[0]
+        if n_samples_fake < self.subset_size:
+            raise ValueError("Argument `subset_size` should be smaller than the number of samples")
+
+        kid_scores_ = []
+        for _ in range(self.subsets):
+            perm = B.randperm(n_samples_real)
+            f_real = real_features[perm[: self.subset_size]]
+            perm = B.randperm(n_samples_fake)
+            f_fake = fake_features[perm[: self.subset_size]]
+
+            o = poly_mmd(f_real, f_fake, self.degree, self.gamma, self.coef)
+            kid_scores_.append(o)
+        kid_scores = B.stack(kid_scores_)
+        return kid_scores.mean(), kid_scores.std(unbiased=False)
diff --git a/RE/paddlemetric/src/paddlemetrics/image/lpip_similarity.py b/RE/paddlemetric/src/paddlemetrics/image/lpip_similarity.py
new file mode 100644
index 00000000..7cf6d03a
--- /dev/null
+++ b/RE/paddlemetric/src/paddlemetrics/image/lpip_similarity.py
@@ -0,0 +1,156 @@
+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from typing import Any, Callable, List, Optional
+
+import paddleext.torchapi as B
+from paddleext.torchapi import  Tensor
+
+from paddlemetrics.metric import Metric
+from paddlemetrics.utilities.imports import _LPIPS_AVAILABLE
+
+if _LPIPS_AVAILABLE:
+    from lpips import LPIPS as Lpips_backbone
+else:
+
+    class Lpips_backbone(B.nn.Module):  # type: ignore
+        pass
+
+
+class NoTrainLpips(Lpips_backbone):
+    def train(self, mode: bool) -> "NoTrainLpips":
+        """the network should not be able to be switched away from evaluation mode."""
+        return super().train(False)
+
+
+def _valid_img(img: Tensor) -> bool:
+    """check that input is a valid image to the network."""
+    return img.ndim == 4 and img.shape[1] == 3 and img.min() >= -1.0 and img.max() <= 1.0
+
+
+class LPIPS(Metric):
+    """The Learned Perceptual Image Patch Similarity (`LPIPS_`) is used to judge the perceptual similarity between
+    two images. LPIPS essentially computes the similarity between the activations of two image patches for some
+    pre-defined network. This measure have been shown to match human perseption well. A low LPIPS score means that
+    image patches are perceptual similar.
+
+    Both input image patches are expected to have shape `[N, 3, H, W]` and be normalized to the [-1,1]
+    range. The minimum size of `H, W` depends on the chosen backbone (see `net_type` arg).
+
+    .. note:: using this metrics requires you to have ``lpips`` package installed. Either install
+        as ``pip install paddlemetrics[image]`` or ``pip install lpips``
+
+    .. note:: this metric is not scriptable when using ``torch<1.8``. Please update your pytorch installation
+        if this is a issue.
+
+    Args:
+        net_type: str indicating backbone network type to use. Choose between `'alex'`, `'vgg'` or `'squeeze'`
+        reduction: str indicating how to reduce over the batch dimension. Choose between `'sum'` or `'mean'`.
+        compute_on_step:
+            Forward only calls ``update()`` and return ``None`` if this is set to ``False``.
+        dist_sync_on_step:
+            Synchronize metric state across processes at each ``forward()``
+            before returning the value at the step
+        process_group:
+            Specify the process group on which synchronization is called.
+            default: ``None`` (which selects the entire world)
+        dist_sync_fn:
+            Callback that performs the allgather operation on the metric state. When ``None``, DDP
+            will be used to perform the allgather
+
+    Raises:
+        ValueError:
+            If ``lpips`` package is not installed
+        ValueError:
+            If ``net_type`` is not one of ``"vgg"``, ``"alex"`` or ``"squeeze"``
+        ValueError:
+            If ``reduction`` is not one of ``"mean"`` or ``"sum"``
+
+    Example:
+        >>> import torchapi as B
+        >>> _ = B.manual_seed(123)
+        >>> from paddlemetrics import LPIPS
+        >>> lpips = LPIPS(net_type='vgg')
+        >>> img1 = B.rand(10, 3, 100, 100)
+        >>> img2 = B.rand(10, 3, 100, 100)
+        >>> lpips(img1, img2)
+        tensor([0.3566], grad_fn=<DivBackward0>)
+    """
+
+    is_differentiable = True
+    real_features: List[Tensor]
+    fake_features: List[Tensor]
+
+    # due to the use of named tuple in the backbone the net variable cannot be scriptet
+    __jit_ignored_attributes__ = ["net"]
+
+    def __init__(
+        self,
+        net_type: str = "alex",
+        reduction: str = "mean",
+        compute_on_step: bool = True,
+        dist_sync_on_step: bool = False,
+        process_group: Optional[Any] = None,
+        dist_sync_fn: Callable[[Tensor], List[Tensor]] = None,
+    ) -> None:
+        super().__init__(
+            compute_on_step=compute_on_step,
+            dist_sync_on_step=dist_sync_on_step,
+            process_group=process_group,
+            dist_sync_fn=dist_sync_fn,
+        )
+
+        if not _LPIPS_AVAILABLE:
+            raise ValueError(
+                "LPIPS metric requires that lpips is installed."
+                "Either install as `pip install paddlemetrics[image]` or `pip install lpips`"
+            )
+
+        valid_net_type = ("vgg", "alex", "squeeze")
+        if net_type not in valid_net_type:
+            raise ValueError(f"Argument `net_type` must be one of {valid_net_type}, but got {net_type}.")
+        self.net = NoTrainLpips(net=net_type, verbose=False)
+
+        valid_reduction = ("mean", "sum")
+        if reduction not in valid_reduction:
+            raise ValueError(f"Argument `reduction` must be one of {valid_reduction}, but got {reduction}")
+        self.reduction = reduction
+
+        self.add_state("sum_scores", B.zeros(1), dist_reduce_fx="sum")
+        self.add_state("total", B.zeros(1), dist_reduce_fx="sum")
+
+    def update(self, img1: Tensor, img2: Tensor) -> None:  # type: ignore
+        """Update internal states with lpips score.
+
+        Args:
+            img1: tensor with images of shape [N, 3, H, W]
+            img2: tensor with images of shape [N, 3, H, W]
+        """
+        if not (_valid_img(img1) and _valid_img(img2)):
+            raise ValueError(
+                "Expected both input arguments to be normalized tensors (all values in range [-1,1])"
+                f" and to have shape [N, 3, H, W] but `img1` have shape {img1.shape} with values in"
+                f" range {[img1.min(), img1.max()]} and `img2` have shape {img2.shape} with value"
+                f" in range {[img2.min(), img2.max()]}"
+            )
+
+        loss = self.net(img1, img2).squeeze()
+        self.sum_scores += loss.sum()
+        self.total += img1.shape[0]
+
+    def compute(self) -> Tensor:
+        """Compute final perceptual similarity metric."""
+        if self.reduction == "mean":
+            return self.sum_scores / self.total
+        if self.reduction == "sum":
+            return self.sum_scores
diff --git a/RE/paddlemetric/src/paddlemetrics/image/psnr.py b/RE/paddlemetric/src/paddlemetrics/image/psnr.py
new file mode 100644
index 00000000..3226203d
--- /dev/null
+++ b/RE/paddlemetric/src/paddlemetrics/image/psnr.py
@@ -0,0 +1,147 @@
+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from typing import Any, Optional, Sequence, Tuple, Union
+
+import paddleext.torchapi as B
+from paddleext.torchapi import  Tensor, tensor
+
+from paddlemetrics.functional.image.psnr import _psnr_compute, _psnr_update
+from paddlemetrics.metric import Metric
+from paddlemetrics.utilities import rank_zero_warn
+
+
+class PSNR(Metric):
+    r"""
+    Computes `Computes Peak Signal-to-Noise Ratio`_ (PSNR):
+
+    .. math:: \text{PSNR}(I, J) = 10 * \log_{10} \left(\frac{\max(I)^2}{\text{MSE}(I, J)}\right)
+
+    Where :math:`\text{MSE}` denotes the `mean-squared-error`_ function.
+
+    Args:
+        data_range:
+            the range of the data. If None, it is determined from the data (max - min).
+            The ``data_range`` must be given when ``dim`` is not None.
+        base: a base of a logarithm to use (default: 10)
+        reduction: a method to reduce metric score over labels.
+
+            - ``'elementwise_mean'``: takes the mean (default)
+            - ``'sum'``: takes the sum
+            - ``'none'``: no reduction will be applied
+
+        dim:
+            Dimensions to reduce PSNR scores over, provided as either an integer or a list of integers. Default is
+            None meaning scores will be reduced across all dimensions and all batches.
+        compute_on_step:
+            Forward only calls ``update()`` and return None if this is set to False. default: True
+        dist_sync_on_step:
+            Synchronize metric state across processes at each ``forward()``
+            before returning the value at the step. default: False
+        process_group:
+            Specify the process group on which synchronization is called. default: None (which selects the entire world)
+
+    Raises:
+        ValueError:
+            If ``dim`` is not ``None`` and ``data_range`` is not given.
+
+    Example:
+        >>> from paddlemetrics import PSNR
+        >>> psnr = PSNR()
+        >>> preds = B.tensor([[0.0, 1.0], [2.0, 3.0]])
+        >>> target = B.tensor([[3.0, 2.0], [1.0, 0.0]])
+        >>> psnr(preds, target)
+        tensor(2.5527)
+
+    .. note::
+        Half precision is only support on GPU for this metric
+
+    """
+    min_target: Tensor
+    max_target: Tensor
+
+    def __init__(
+        self,
+        data_range: Optional[float] = None,
+        base: float = 10.0,
+        reduction: str = "elementwise_mean",
+        dim: Optional[Union[int, Tuple[int, ...]]] = None,
+        compute_on_step: bool = True,
+        dist_sync_on_step: bool = False,
+        process_group: Optional[Any] = None,
+    ) -> None:
+        super().__init__(
+            compute_on_step=compute_on_step,
+            dist_sync_on_step=dist_sync_on_step,
+            process_group=process_group,
+        )
+
+        if dim is None and reduction != "elementwise_mean":
+            rank_zero_warn(f"The `reduction={reduction}` will not have any effect when `dim` is None.")
+
+        if dim is None:
+            self.add_state("sum_squared_error", default=tensor(0.0), dist_reduce_fx="sum")
+            self.add_state("total", default=tensor(0), dist_reduce_fx="sum")
+        else:
+            self.add_state("sum_squared_error", default=[])
+            self.add_state("total", default=[])
+
+        if data_range is None:
+            if dim is not None:
+                # Maybe we could use `B.amax(target, dim=dim) - B.amin(target, dim=dim)` in PyTorch 1.7 to
+                # calculate `data_range` in the future.
+                raise ValueError("The `data_range` must be given when `dim` is not None.")
+
+            self.data_range = None
+            self.add_state("min_target", default=tensor(0.0), dist_reduce_fx=B.min)
+            self.add_state("max_target", default=tensor(0.0), dist_reduce_fx=B.max)
+        else:
+            self.add_state("data_range", default=tensor(float(data_range)), dist_reduce_fx="mean")
+        self.base = base
+        self.reduction = reduction
+        self.dim = tuple(dim) if isinstance(dim, Sequence) else dim
+
+    def update(self, preds: Tensor, target: Tensor) -> None:  # type: ignore
+        """Update state with predictions and targets.
+
+        Args:
+            preds: Predictions from model
+            target: Ground truth values
+        """
+        sum_squared_error, n_obs = _psnr_update(preds, target, dim=self.dim)
+        if self.dim is None:
+            if self.data_range is None:
+                # keep track of min and max target values
+                self.min_target = min(target.min(), self.min_target)
+                self.max_target = max(target.max(), self.max_target)
+
+            self.sum_squared_error += sum_squared_error
+            self.total += n_obs
+        else:
+            self.sum_squared_error.append(sum_squared_error)
+            self.total.append(n_obs)
+
+    def compute(self) -> Tensor:
+        """Compute peak signal-to-noise ratio over state."""
+        if self.data_range is not None:
+            data_range = self.data_range
+        else:
+            data_range = self.max_target - self.min_target
+
+        if self.dim is None:
+            sum_squared_error = self.sum_squared_error
+            total = self.total
+        else:
+            sum_squared_error = B.cat([values.flatten() for values in self.sum_squared_error])
+            total = B.cat([values.flatten() for values in self.total])
+        return _psnr_compute(sum_squared_error, total, data_range, base=self.base, reduction=self.reduction)
diff --git a/RE/paddlemetric/src/paddlemetrics/image/ssim.py b/RE/paddlemetric/src/paddlemetrics/image/ssim.py
new file mode 100644
index 00000000..f34a19b1
--- /dev/null
+++ b/RE/paddlemetric/src/paddlemetrics/image/ssim.py
@@ -0,0 +1,105 @@
+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from typing import Any, List, Optional, Sequence
+
+import paddleext.torchapi as B
+from paddleext.torchapi import  Tensor
+
+from paddlemetrics.functional.image.ssim import _ssim_compute, _ssim_update
+from paddlemetrics.metric import Metric
+from paddlemetrics.utilities import rank_zero_warn
+from paddlemetrics.utilities.data import dim_zero_cat
+
+
+class SSIM(Metric):
+    """Computes Structual Similarity Index Measure (SSIM_).
+
+    Args:
+        kernel_size: size of the gaussian kernel (default: (11, 11))
+        sigma: Standard deviation of the gaussian kernel (default: (1.5, 1.5))
+        reduction: a method to reduce metric score over labels.
+
+            - ``'elementwise_mean'``: takes the mean (default)
+            - ``'sum'``: takes the sum
+            - ``'none'``: no reduction will be applied
+
+        data_range: Range of the image. If ``None``, it is determined from the image (max - min)
+        k1: Parameter of SSIM. Default: 0.01
+        k2: Parameter of SSIM. Default: 0.03
+
+    Return:
+        Tensor with SSIM score
+
+    Example:
+        >>> from paddlemetrics import SSIM
+        >>> preds = B.rand([16, 1, 16, 16])
+        >>> target = preds * 0.75
+        >>> ssim = SSIM()
+        >>> ssim(preds, target)
+        tensor(0.9219)
+    """
+
+    preds: List[Tensor]
+    target: List[Tensor]
+
+    def __init__(
+        self,
+        kernel_size: Sequence[int] = (11, 11),
+        sigma: Sequence[float] = (1.5, 1.5),
+        reduction: str = "elementwise_mean",
+        data_range: Optional[float] = None,
+        k1: float = 0.01,
+        k2: float = 0.03,
+        compute_on_step: bool = True,
+        dist_sync_on_step: bool = False,
+        process_group: Optional[Any] = None,
+    ) -> None:
+        super().__init__(
+            compute_on_step=compute_on_step,
+            dist_sync_on_step=dist_sync_on_step,
+            process_group=process_group,
+        )
+        rank_zero_warn(
+            "Metric `SSIM` will save all targets and"
+            " predictions in buffer. For large datasets this may lead"
+            " to large memory footprint."
+        )
+
+        self.add_state("preds", default=[], dist_reduce_fx="cat")
+        self.add_state("target", default=[], dist_reduce_fx="cat")
+        self.kernel_size = kernel_size
+        self.sigma = sigma
+        self.data_range = data_range
+        self.k1 = k1
+        self.k2 = k2
+        self.reduction = reduction
+
+    def update(self, preds: Tensor, target: Tensor) -> None:  # type: ignore
+        """Update state with predictions and targets.
+
+        Args:
+            preds: Predictions from model
+            target: Ground truth values
+        """
+        preds, target = _ssim_update(preds, target)
+        self.preds.append(preds)
+        self.target.append(target)
+
+    def compute(self) -> Tensor:
+        """Computes explained variance over state."""
+        preds = dim_zero_cat(self.preds)
+        target = dim_zero_cat(self.target)
+        return _ssim_compute(
+            preds, target, self.kernel_size, self.sigma, self.reduction, self.data_range, self.k1, self.k2
+        )
diff --git a/RE/paddlemetric/src/paddlemetrics/metric.py b/RE/paddlemetric/src/paddlemetrics/metric.py
new file mode 100644
index 00000000..21c2148b
--- /dev/null
+++ b/RE/paddlemetric/src/paddlemetrics/metric.py
@@ -0,0 +1,775 @@
+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import functools
+import inspect
+import operator as op
+from abc import ABC, abstractmethod
+from collections.abc import Sequence
+from contextlib import contextmanager
+from copy import deepcopy
+from typing import Any, Callable, Dict, Generator, List, Optional, Union
+
+import paddleext.torchapi as B
+from paddleext.torchapi import Tensor, Module
+
+from paddlemetrics.utilities import apply_to_collection, rank_zero_warn
+from paddlemetrics.utilities.data import _flatten, dim_zero_cat, dim_zero_max, dim_zero_mean, dim_zero_min, dim_zero_sum
+from paddlemetrics.utilities.distributed import gather_all_tensors
+from paddlemetrics.utilities.exceptions import paddlemetricsUserError
+from paddlemetrics.utilities.imports import _LIGHTNING_AVAILABLE, _compare_version
+
+
+def jit_distributed_available() -> bool:
+    return B.distributed.is_available() and B.distributed.is_initialized()
+
+
+class Metric(Module):
+    """Base class for all metrics present in the Metrics API.
+
+    Implements ``add_state()``, ``forward()``, ``reset()`` and a few other things to
+    handle distributed synchronization and per-step metric computation.
+
+    Override ``update()`` and ``compute()`` functions to implement your own metric. Use
+    ``add_state()`` to register metric state variables which keep track of state on each
+    call of ``update()`` and are synchronized across processes when ``compute()`` is called.
+
+    Note:
+        Metric state variables can either be ``B.Tensors`` or an empty list which can we used
+        to store `B.Tensors``.
+
+    Note:
+        Different metrics only override ``update()`` and not ``forward()``. A call to ``update()``
+        is valid, but it won't return the metric value at the current step. A call to ``forward()``
+        automatically calls ``update()`` and also returns the metric value at the current step.
+
+    Args:
+        compute_on_step:
+            Forward only calls ``update()`` and returns None if this is set to False. default: True
+        dist_sync_on_step:
+            Synchronize metric state across processes at each ``forward()``
+            before returning the value at the step.
+        process_group:
+            Specify the process group on which synchronization is called. default: None (which selects the entire world)
+        dist_sync_fn:
+            Callback that performs the allgather operation on the metric state. When `None`, DDP
+            will be used to perform the allgather.
+    """
+
+    __jit_ignored_attributes__ = ["device"]
+    __jit_unused_properties__ = ["is_differentiable"]
+    is_differentiable: Optional[bool] = None
+    higher_is_better: Optional[bool] = None
+
+    def __init__(
+        self,
+        compute_on_step: bool = True,
+        dist_sync_on_step: bool = False,
+        process_group: Optional[Any] = None,
+        dist_sync_fn: Callable = None,
+    ) -> None:
+        super().__init__()
+
+        # see (https://github.com/pytorch/pytorch/blob/3e6bb5233f9ca2c5aa55d9cda22a7ee85439aa6e/
+        # B.nn/modules/module.py#L227)
+#        B._C._log_api_usage_once(f"paddlemetrics.metric.{self.__class__.__name__}")
+
+#        self._LIGHTNING_GREATER_EQUAL_1_3 = _compare_version("pytorch_lightning", op.ge, "1.3.0")
+        self._device = B.device("cpu")
+
+        self.dist_sync_on_step = dist_sync_on_step
+        self.compute_on_step = compute_on_step
+        self.process_group = process_group
+        self.dist_sync_fn = dist_sync_fn
+        self._to_sync = True
+        self._should_unsync = True
+
+        self._update_signature = inspect.signature(self.update)
+        self.update: Callable = self._wrap_update(self.update)  # type: ignore
+        self.compute: Callable = self._wrap_compute(self.compute)  # type: ignore
+        self._computed = None
+        self._forward_cache = None
+        self._update_called = False
+
+        # initialize state
+        self._defaults: Dict[str, Union[List, Tensor]] = {}
+        self._persistent: Dict[str, bool] = {}
+        self._reductions: Dict[str, Union[str, Callable[[Union[List[Tensor], Tensor]], Tensor], None]] = {}
+
+        # state management
+        self._is_synced = False
+        self._cache: Optional[Dict[str, Union[List[Tensor], Tensor]]] = None
+
+    def to(self, *args, **kwargs):
+
+        return self
+        # result = super().to(*args, **kwargs)
+        #
+        # return result if result is not None else self
+
+    def add_state(
+        self,
+        name: str,
+        default: Union[list, Tensor],
+        dist_reduce_fx: Optional[Union[str, Callable]] = None,
+        persistent: bool = False,
+    ) -> None:
+        """Adds metric state variable. Only used by subclasses.
+
+        Args:
+            name: The name of the state variable. The variable will then be accessible at ``self.name``.
+            default: Default value of the state; can either be a ``B.Tensor`` or an empty list. The state will be
+                reset to this value when ``self.reset()`` is called.
+            dist_reduce_fx (Optional): Function to reduce state across multiple processes in distributed mode.
+                If value is ``"sum"``, ``"mean"``, ``"cat"``, ``"min"`` or ``"max"`` we will use ``B.sum``,
+                ``B.mean``, ``B.cat``, ``B.min`` and ``B.max``` respectively, each with argument
+                ``dim=0``. Note that the ``"cat"`` reduction only makes sense if the state is a list, and not
+                a tensor. The user can also pass a custom function in this parameter.
+            persistent (Optional): whether the state will be saved as part of the modules ``state_dict``.
+                Default is ``False``.
+
+        Note:
+            Setting ``dist_reduce_fx`` to None will return the metric state synchronized across different processes.
+            However, there won't be any reduction function applied to the synchronized metric state.
+
+            The metric states would be synced as follows
+
+            - If the metric state is ``B.Tensor``, the synced value will be a stacked ``B.Tensor`` across
+              the process dimension if the metric state was a ``B.Tensor``. The original ``B.Tensor`` metric
+              state retains dimension and hence the synchronized output will be of shape ``(num_process, ...)``.
+
+            - If the metric state is a ``list``, the synced value will be a ``list`` containing the
+              combined elements from all processes.
+
+        Note:
+            When passing a custom function to ``dist_reduce_fx``, expect the synchronized metric state to follow
+            the format discussed in the above note.
+
+        Raises:
+            ValueError:
+                If ``default`` is not a ``tensor`` or an ``empty list``.
+            ValueError:
+                If ``dist_reduce_fx`` is not callable or one of ``"mean"``, ``"sum"``, ``"cat"``, ``None``.
+        """
+        if not isinstance(default, (Tensor, list)) or (isinstance(default, list) and default):
+            raise ValueError(f"state variable must be a tensor or any empty list (where you can append tensors): {type(default)}")
+
+        if dist_reduce_fx == "sum":
+            dist_reduce_fx = dim_zero_sum
+        elif dist_reduce_fx == "mean":
+            dist_reduce_fx = dim_zero_mean
+        elif dist_reduce_fx == "max":
+            dist_reduce_fx = dim_zero_max
+        elif dist_reduce_fx == "min":
+            dist_reduce_fx = dim_zero_min
+        elif dist_reduce_fx == "cat":
+            dist_reduce_fx = dim_zero_cat
+        elif dist_reduce_fx is not None and not callable(dist_reduce_fx):
+            raise ValueError("`dist_reduce_fx` must be callable or one of ['mean', 'sum', 'cat', None]")
+
+        if isinstance(default, Tensor):
+            default = default.contiguous()
+
+        setattr(self, name, default)
+
+        self._defaults[name] = deepcopy(default)
+        self._persistent[name] = persistent
+        self._reductions[name] = dist_reduce_fx
+
+#    @B.jit.unused
+    def forward(self, *args: Any, **kwargs: Any) -> Any:
+        """Automatically calls ``update()``.
+
+        Returns the metric value over inputs if ``compute_on_step`` is True.
+        """
+        # add current step
+        if self._is_synced:
+            raise paddlemetricsUserError(
+                "The Metric shouldn't be synced when performing ``update``. "
+                "HINT: Did you forget to call ``unsync`` ?."
+            )
+
+        with B.no_grad():
+            self.update(*args, **kwargs)
+
+        if self.compute_on_step:
+            self._to_sync = self.dist_sync_on_step
+            # skip restore cache operation from compute as cache is stored below.
+            self._should_unsync = False
+
+            # save context before switch
+            cache = {attr: getattr(self, attr) for attr in self._defaults}
+
+            # call reset, update, compute, on single batch
+            self.reset()
+            self.update(*args, **kwargs)
+            self._forward_cache = self.compute()
+
+            # restore context
+            for attr, val in cache.items():
+                setattr(self, attr, val)
+            self._is_synced = False
+
+            self._should_unsync = True
+            self._to_sync = True
+            self._computed = None
+
+            return self._forward_cache
+
+    def _sync_dist(self, dist_sync_fn: Callable = gather_all_tensors, process_group: Optional[Any] = None) -> None:
+        input_dict = {attr: getattr(self, attr) for attr in self._reductions}
+
+        for attr, reduction_fn in self._reductions.items():
+            # pre-concatenate metric states that are lists to reduce number of all_gather operations
+            if reduction_fn == dim_zero_cat and isinstance(input_dict[attr], list) and len(input_dict[attr]) > 1:
+                input_dict[attr] = [dim_zero_cat(input_dict[attr])]
+
+        output_dict = apply_to_collection(
+            input_dict,
+            Tensor,
+            dist_sync_fn,
+            group=process_group or self.process_group,
+        )
+
+        for attr, reduction_fn in self._reductions.items():
+            # pre-processing ops (stack or flatten for inputs)
+            if isinstance(output_dict[attr][0], Tensor):
+                output_dict[attr] = B.stack(output_dict[attr])
+            elif isinstance(output_dict[attr][0], list):
+                output_dict[attr] = _flatten(output_dict[attr])
+
+            if not (callable(reduction_fn) or reduction_fn is None):
+                raise TypeError("reduction_fn must be callable or None")
+            reduced = reduction_fn(output_dict[attr]) if reduction_fn is not None else output_dict[attr]
+            setattr(self, attr, reduced)
+
+    def _wrap_update(self, update: Callable) -> Callable:
+        @functools.wraps(update)
+        def wrapped_func(*args: Any, **kwargs: Any) -> Optional[Any]:
+            self._computed = None
+            self._update_called = True
+            return update(*args, **kwargs)
+
+        return wrapped_func
+
+    def sync(
+        self,
+        dist_sync_fn: Optional[Callable] = None,
+        process_group: Optional[Any] = None,
+        should_sync: bool = True,
+        distributed_available: Optional[Callable] = jit_distributed_available,
+    ) -> None:
+        """Sync function for manually controlling when metrics states should be synced across processes.
+
+        Args:
+            dist_sync_fn: Function to be used to perform states synchronization
+            process_group:
+                Specify the process group on which synchronization is called.
+                default: None (which selects the entire world)
+            should_sync: Whether to apply to state synchronization. This will have an impact
+                only when running in a distributed setting.
+            distributed_available: Function to determine if we are running inside a distributed setting
+        """
+        if self._is_synced and should_sync:
+            raise paddlemetricsUserError("The Metric has already been synced.")
+
+        is_distributed = distributed_available() if callable(distributed_available) else None
+
+        if not should_sync or not is_distributed:
+            return
+
+        if dist_sync_fn is None:
+            dist_sync_fn = gather_all_tensors
+
+        # cache prior to syncing
+        self._cache = {attr: getattr(self, attr) for attr in self._defaults}
+
+        # sync
+        self._sync_dist(dist_sync_fn, process_group=process_group)
+        self._is_synced = True
+
+    def unsync(self, should_unsync: bool = True) -> None:
+        """Unsync function for manually controlling when metrics states should be reverted back to their local
+        states.
+
+        Args:
+            should_unsync: Whether to perform unsync
+        """
+        if not should_unsync:
+            return
+
+        if not self._is_synced:
+            raise paddlemetricsUserError("The Metric has already been un-synced.")
+
+        if self._cache is None:
+            raise paddlemetricsUserError("The internal cache should exist to unsync the Metric.")
+
+        # if we synced, restore to cache so that we can continue to accumulate un-synced state
+        for attr, val in self._cache.items():
+            setattr(self, attr, val)
+        self._is_synced = False
+        self._cache = None
+
+    @contextmanager
+    def sync_context(
+        self,
+        dist_sync_fn: Optional[Callable] = None,
+        process_group: Optional[Any] = None,
+        should_sync: bool = True,
+        should_unsync: bool = True,
+        distributed_available: Optional[Callable] = jit_distributed_available,
+    ) -> Generator:
+        """Context manager to synchronize the states between processes when running in a distributed setting and
+        restore the local cache states after yielding.
+
+        Args:
+            dist_sync_fn: Function to be used to perform states synchronization
+            process_group:
+                Specify the process group on which synchronization is called.
+                default: None (which selects the entire world)
+            should_sync: Whether to apply to state synchronization. This will have an impact
+                only when running in a distributed setting.
+            should_unsync: Whether to restore the cache state so that the metrics can
+                continue to be accumulated.
+            distributed_available: Function to determine if we are running inside a distributed setting
+        """
+        self.sync(
+            dist_sync_fn=dist_sync_fn,
+            process_group=process_group,
+            should_sync=should_sync,
+            distributed_available=distributed_available,
+        )
+
+        yield
+
+        self.unsync(should_unsync=self._is_synced and should_unsync)
+
+    def _wrap_compute(self, compute: Callable) -> Callable:
+        @functools.wraps(compute)
+        def wrapped_func(*args: Any, **kwargs: Any) -> Any:
+            if not self._update_called:
+                rank_zero_warn(
+                    f"The ``compute`` method of metric {self.__class__.__name__}"
+                    " was called before the ``update`` method which may lead to errors,"
+                    " as metric states have not yet been updated.",
+                    UserWarning,
+                )
+
+            # return cached value
+            if self._computed is not None:
+                return self._computed
+
+            # compute relies on the sync context manager to gather the states across processes and apply reduction
+            # if synchronization happened, the current rank accumulated states will be restored to keep
+            # accumulation going if ``should_unsync=True``,
+            with self.sync_context(
+                dist_sync_fn=self.dist_sync_fn, should_sync=self._to_sync, should_unsync=self._should_unsync
+            ):
+                self._computed = compute(*args, **kwargs)
+
+            return self._computed
+
+        return wrapped_func
+
+    @abstractmethod
+    def update(self, *_: Any, **__: Any) -> None:
+        """Override this method to update the state variables of your metric class."""
+
+    @abstractmethod
+    def compute(self) -> Any:
+        """Override this method to compute the final metric value from state variables synchronized across the
+        distributed backend."""
+
+    def reset(self) -> None:
+        """This method automatically resets the metric state variables to their default value."""
+        self._update_called = False
+        self._forward_cache = None
+        # lower lightning versions requires this implicitly to log metric objects correctly in self.log
+#        if not _LIGHTNING_AVAILABLE or self._LIGHTNING_GREATER_EQUAL_1_3:
+        self._computed = None
+
+        for attr, default in self._defaults.items():
+            current_val = getattr(self, attr)
+            if isinstance(default, Tensor):
+                setattr(self, attr, default.detach().clone().to(current_val.device))
+            else:
+                setattr(self, attr, [])
+
+        # reset internal states
+        self._cache = None
+        self._is_synced = False
+
+    def clone(self) -> "Metric":
+        """Make a copy of the metric."""
+        return deepcopy(self)
+
+    def __getstate__(self) -> Dict[str, Any]:
+        # ignore update and compute functions for pickling
+        return {k: v for k, v in self.__dict__.items() if k not in ["update", "compute", "_update_signature"]}
+
+    def __setstate__(self, state: Dict[str, Any]) -> None:
+        # manually restore update and compute functions for pickling
+        self.__dict__.update(state)
+        self._update_signature = inspect.signature(self.update)
+        self.update: Callable = self._wrap_update(self.update)  # type: ignore
+        self.compute: Callable = self._wrap_compute(self.compute)  # type: ignore
+
+    def __setattr__(self, name: str, value: Any) -> None:
+        if name in ("higher_is_better", "is_differentiable"):
+            raise RuntimeError(f"Can't change const `{name}`.")
+        super().__setattr__(name, value)
+
+    @property
+    def device(self) -> "B.device":
+        """Return the device of the metric."""
+        return self._device
+
+    def type(self, dst_type: Union[str, B.dtype]) -> "Metric":
+        """Method override default and prevent dtype casting.
+
+        Please use `metric.set_dtype(dtype)` instead.
+        """
+        return self
+
+    def float(self) -> "Metric":
+        """Method override default and prevent dtype casting.
+
+        Please use `metric.set_dtype(dtype)` instead.
+        """
+        return self
+
+    def double(self) -> "Metric":
+        """Method override default and prevent dtype casting.
+
+        Please use `metric.set_dtype(dtype)` instead.
+        """
+        return self
+
+    def half(self) -> "Metric":
+        """Method override default and prevent dtype casting.
+
+        Please use `metric.set_dtype(dtype)` instead.
+        """
+        return self
+
+    def set_dtype(self, dst_type: Union[str, B.dtype]) -> None:
+        """Special version of `type` for transferring all metric states to specific dtype
+        Arguments:
+            dst_type (type or string): the desired type
+        """
+        return super().type(dst_type)
+
+    def _apply(self, fn: Callable, *args, **kwargs) -> Module:
+        """Overwrite _apply function such that we can also move metric states to the correct device when `.to`,
+        `.cuda`, etc methods are called."""
+        this = super()._apply(fn, *args, **kwargs)
+        if this is None: # for paddle
+            this = self 
+        # Also apply fn to metric states and defaults
+        for key, value in this._defaults.items():
+            if isinstance(value, Tensor):
+                this._defaults[key] = fn(value, *args, **kwargs)
+            elif isinstance(value, Sequence):
+                this._defaults[key] = [fn(v, *args, **kwargs) for v in value]
+
+            current_val = getattr(this, key)
+            if isinstance(current_val, Tensor):
+                setattr(this, key, fn(current_val, *args, **kwargs))
+            elif isinstance(current_val, Sequence):
+                setattr(this, key, [fn(cur_v, *args, **kwargs) for cur_v in current_val])
+            else:
+                raise TypeError(
+                    "Expected metric state to be either a Tensor" f"or a list of Tensor, but encountered {current_val}"
+                )
+
+        # make sure to update the device attribute
+        # if the dummy tensor moves device by fn function we should also update the attribute
+        self._device = fn(B.zeros(1, device=self.device), *args, **kwargs).device
+
+        # Additional apply to forward cache and computed attributes (may be nested)
+        if this._computed is not None:
+            this._computed = apply_to_collection(this._computed, Tensor, fn)
+        if this._forward_cache is not None:
+            this._forward_cache = apply_to_collection(this._forward_cache, Tensor, fn)
+
+        return this
+
+    def persistent(self, mode: bool = False) -> None:
+        """Method for post-init to change if metric states should be saved to its state_dict."""
+        for key in self._persistent:
+            self._persistent[key] = mode
+
+    def state_dict(
+        self,
+        destination: Dict[str, Any] = None,
+        prefix: str = "",
+        keep_vars: bool = False,
+    ) -> Optional[Dict[str, Any]]:
+        destination = super().state_dict(destination=destination, prefix=prefix, keep_vars=keep_vars)
+        # Register metric states to be part of the state_dict
+        for key in self._defaults:
+            if not self._persistent[key]:
+                continue
+            current_val = getattr(self, key)
+            if not keep_vars:
+                if isinstance(current_val, Tensor):
+                    current_val = current_val.detach()
+                elif isinstance(current_val, list):
+                    current_val = [cur_v.detach() if isinstance(cur_v, Tensor) else cur_v for cur_v in current_val]
+            destination[prefix + key] = deepcopy(current_val)  # type: ignore
+        return destination
+
+    def _load_from_state_dict(
+        self,
+        state_dict: dict,
+        prefix: str,
+        local_metadata: dict,
+        strict: bool,
+        missing_keys: List[str],
+        unexpected_keys: List[str],
+        error_msgs: List[str],
+    ) -> None:
+        """Loads metric states from state_dict."""
+
+        for key in self._defaults:
+            name = prefix + key
+            if name in state_dict:
+                setattr(self, key, state_dict.pop(name))
+        super()._load_from_state_dict(
+            state_dict, prefix, local_metadata, True, missing_keys, unexpected_keys, error_msgs
+        )
+
+    def _filter_kwargs(self, **kwargs: Any) -> Dict[str, Any]:
+        """filter kwargs such that they match the update signature of the metric."""
+
+        # filter all parameters based on update signature except those of
+        # type VAR_POSITIONAL (*args) and VAR_KEYWORD (**kwargs)
+        _params = (inspect.Parameter.VAR_POSITIONAL, inspect.Parameter.VAR_KEYWORD)
+        _sign_params = self._update_signature.parameters
+        filtered_kwargs = {
+            k: v for k, v in kwargs.items() if (k in _sign_params.keys() and _sign_params[k].kind not in _params)
+        }
+
+        # if no kwargs filtered, return al kwargs as default
+        if not filtered_kwargs:
+            filtered_kwargs = kwargs
+        return filtered_kwargs
+
+    def __hash__(self) -> int:
+        # we need to add the id here, since PyTorch requires a module hash to be unique.
+        # Internally, PyTorch nn.Module relies on that for children discovery
+        # (see https://github.com/pytorch/pytorch/blob/v1.9.0/B.nn/modules/module.py#L1544)
+        # For metrics that include tensors it is not a problem,
+        # since their hash is unique based on the memory location but we cannot rely on that for every metric.
+        hash_vals = [self.__class__.__name__, id(self)]
+
+        for key in self._defaults:
+            val = getattr(self, key)
+            # Special case: allow list values, so long
+            # as their elements are hashable
+            if hasattr(val, "__iter__") and not isinstance(val, Tensor):
+                hash_vals.extend(val)
+            else:
+                hash_vals.append(val)
+
+        return hash(tuple(hash_vals))
+
+    def __add__(self, other: "Metric") -> "Metric":
+        return CompositionalMetric(B.add, self, other)
+
+    def __and__(self, other: "Metric") -> "Metric":
+        return CompositionalMetric(B.bitwise_and, self, other)
+
+    # Fixme: this shall return bool instead of Metric
+    def __eq__(self, other: "Metric") -> "Metric":  # type: ignore
+        return CompositionalMetric(B.eq, self, other)
+
+    def __floordiv__(self, other: "Metric") -> "Metric":
+        return CompositionalMetric(B.floor_divide, self, other)
+
+    def __ge__(self, other: "Metric") -> "Metric":
+        return CompositionalMetric(B.ge, self, other)
+
+    def __gt__(self, other: "Metric") -> "Metric":
+        return CompositionalMetric(B.gt, self, other)
+
+    def __le__(self, other: "Metric") -> "Metric":
+        return CompositionalMetric(B.le, self, other)
+
+    def __lt__(self, other: "Metric") -> "Metric":
+        return CompositionalMetric(B.lt, self, other)
+
+    def __matmul__(self, other: "Metric") -> "Metric":
+        return CompositionalMetric(B.matmul, self, other)
+
+    def __mod__(self, other: "Metric") -> "Metric":
+        return CompositionalMetric(B.fmod, self, other)
+
+    def __mul__(self, other: "Metric") -> "Metric":
+        return CompositionalMetric(B.mul, self, other)
+
+    # Fixme: this shall return bool instead of Metric
+    def __ne__(self, other: "Metric") -> "Metric":  # type: ignore
+        return CompositionalMetric(B.ne, self, other)
+
+    def __or__(self, other: "Metric") -> "Metric":
+        return CompositionalMetric(B.bitwise_or, self, other)
+
+    def __pow__(self, other: "Metric") -> "Metric":
+        return CompositionalMetric(B.pow, self, other)
+
+    def __radd__(self, other: "Metric") -> "Metric":
+        return CompositionalMetric(B.add, other, self)
+
+    def __rand__(self, other: "Metric") -> "Metric":
+        # swap them since bitwise_and only supports that way and it's commutative
+        return CompositionalMetric(B.bitwise_and, self, other)
+
+    def __rfloordiv__(self, other: "Metric") -> "Metric":
+        return CompositionalMetric(B.floor_divide, other, self)
+
+    def __rmatmul__(self, other: "Metric") -> "Metric":
+        return CompositionalMetric(B.matmul, other, self)
+
+    def __rmod__(self, other: "Metric") -> "Metric":
+        return CompositionalMetric(B.fmod, other, self)
+
+    def __rmul__(self, other: "Metric") -> "Metric":
+        return CompositionalMetric(B.mul, other, self)
+
+    def __ror__(self, other: "Metric") -> "Metric":
+        return CompositionalMetric(B.bitwise_or, other, self)
+
+    def __rpow__(self, other: "Metric") -> "Metric":
+        return CompositionalMetric(B.pow, other, self)
+
+    def __rsub__(self, other: "Metric") -> "Metric":
+        return CompositionalMetric(B.sub, other, self)
+
+    def __rtruediv__(self, other: "Metric") -> "Metric":
+        return CompositionalMetric(B.true_divide, other, self)
+
+    def __rxor__(self, other: "Metric") -> "Metric":
+        return CompositionalMetric(B.bitwise_xor, other, self)
+
+    def __sub__(self, other: "Metric") -> "Metric":
+        return CompositionalMetric(B.sub, self, other)
+
+    def __truediv__(self, other: "Metric") -> "Metric":
+        return CompositionalMetric(B.true_divide, self, other)
+
+    def __xor__(self, other: "Metric") -> "Metric":
+        return CompositionalMetric(B.bitwise_xor, self, other)
+
+    def __abs__(self) -> "Metric":
+        return CompositionalMetric(B.abs, self, None)
+
+    def __inv__(self) -> "Metric":
+        return CompositionalMetric(B.bitwise_not, self, None)
+
+    def __invert__(self) -> "Metric":
+        return self.__inv__()
+
+    def __neg__(self) -> "Metric":
+        return CompositionalMetric(_neg, self, None)
+
+    def __pos__(self) -> "Metric":
+        return CompositionalMetric(B.abs, self, None)
+
+    def __getitem__(self, idx: int) -> "Metric":
+        return CompositionalMetric(lambda x: x[idx], self, None)
+
+
+def _neg(x: Tensor) -> Tensor:
+    return -B.abs(x)
+
+
+class CompositionalMetric(Metric):
+    """Composition of two metrics with a specific operator which will be executed upon metrics compute."""
+
+    def __init__(
+        self,
+        operator: Callable,
+        metric_a: Union[Metric, int, float, Tensor],
+        metric_b: Union[Metric, int, float, Tensor, None],
+    ) -> None:
+        """
+        Args:
+            operator: the operator taking in one (if metric_b is None)
+                or two arguments. Will be applied to outputs of metric_a.compute()
+                and (optionally if metric_b is not None) metric_b.compute()
+            metric_a: first metric whose compute() result is the first argument of operator
+            metric_b: second metric whose compute() result is the second argument of operator.
+                For operators taking in only one input, this should be None
+        """
+        super().__init__()
+
+        self.op = operator
+
+        if isinstance(metric_a, Tensor):
+            self.register_buffer("metric_a", metric_a)
+        else:
+            self.metric_a = metric_a
+
+        if isinstance(metric_b, Tensor):
+            self.register_buffer("metric_b", metric_b)
+        else:
+            self.metric_b = metric_b
+
+    def _sync_dist(self, dist_sync_fn: Optional[Callable] = None, process_group: Optional[Any] = None) -> None:
+        # No syncing required here. syncing will be done in metric_a and metric_b
+        pass
+
+    def update(self, *args: Any, **kwargs: Any) -> None:
+        if isinstance(self.metric_a, Metric):
+            self.metric_a.update(*args, **self.metric_a._filter_kwargs(**kwargs))
+
+        if isinstance(self.metric_b, Metric):
+            self.metric_b.update(*args, **self.metric_b._filter_kwargs(**kwargs))
+
+    def compute(self) -> Any:
+
+        # also some parsing for kwargs?
+        if isinstance(self.metric_a, Metric):
+            val_a = self.metric_a.compute()
+        else:
+            val_a = self.metric_a
+
+        if isinstance(self.metric_b, Metric):
+            val_b = self.metric_b.compute()
+        else:
+            val_b = self.metric_b
+
+        if val_b is None:
+            return self.op(val_a)
+
+        return self.op(val_a, val_b)
+
+    def reset(self) -> None:
+        if isinstance(self.metric_a, Metric):
+            self.metric_a.reset()
+
+        if isinstance(self.metric_b, Metric):
+            self.metric_b.reset()
+
+    def persistent(self, mode: bool = False) -> None:
+        if isinstance(self.metric_a, Metric):
+            self.metric_a.persistent(mode=mode)
+        if isinstance(self.metric_b, Metric):
+            self.metric_b.persistent(mode=mode)
+
+    def __repr__(self) -> str:
+        _op_metrics = f"(\n  {self.op.__name__}(\n    {repr(self.metric_a)},\n    {repr(self.metric_b)}\n  )\n)"
+        repr_str = self.__class__.__name__ + _op_metrics
+
+        return repr_str
diff --git a/RE/paddlemetric/src/paddlemetrics/py.typed b/RE/paddlemetric/src/paddlemetrics/py.typed
new file mode 100644
index 00000000..e69de29b
diff --git a/RE/paddlemetric/src/paddlemetrics/regression/__init__.py b/RE/paddlemetric/src/paddlemetrics/regression/__init__.py
new file mode 100644
index 00000000..aafc1024
--- /dev/null
+++ b/RE/paddlemetric/src/paddlemetrics/regression/__init__.py
@@ -0,0 +1,26 @@
+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from paddlemetrics.regression.cosine_similarity import CosineSimilarity  # noqa: F401
+from paddlemetrics.regression.explained_variance import ExplainedVariance  # noqa: F401
+from paddlemetrics.regression.mean_absolute_error import MeanAbsoluteError  # noqa: F401
+from paddlemetrics.regression.mean_absolute_percentage_error import MeanAbsolutePercentageError  # noqa: F401
+from paddlemetrics.regression.mean_squared_error import MeanSquaredError  # noqa: F401
+from paddlemetrics.regression.mean_squared_log_error import MeanSquaredLogError  # noqa: F401
+from paddlemetrics.regression.pearson import PearsonCorrcoef  # noqa: F401
+from paddlemetrics.regression.r2 import R2Score  # noqa: F401
+from paddlemetrics.regression.spearman import SpearmanCorrcoef  # noqa: F401
+from paddlemetrics.regression.symmetric_mean_absolute_percentage_error import (  # noqa: F401
+    SymmetricMeanAbsolutePercentageError,
+)
+from paddlemetrics.regression.tweedie_deviance import TweedieDevianceScore  # noqa: F401
diff --git a/RE/paddlemetric/src/paddlemetrics/regression/cosine_similarity.py b/RE/paddlemetric/src/paddlemetrics/regression/cosine_similarity.py
new file mode 100644
index 00000000..3b2946e2
--- /dev/null
+++ b/RE/paddlemetric/src/paddlemetrics/regression/cosine_similarity.py
@@ -0,0 +1,105 @@
+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from typing import Any, Callable, List, Optional
+
+import paddleext.torchapi as B
+from paddleext.torchapi import  Tensor
+
+from paddlemetrics.functional.regression.cosine_similarity import _cosine_similarity_compute, _cosine_similarity_update
+from paddlemetrics.metric import Metric
+from paddlemetrics.utilities.data import dim_zero_cat
+
+
+class CosineSimilarity(Metric):
+    r"""
+    Computes the `Cosine Similarity`_
+    between targets and predictions:
+
+    .. math::
+        cos_{sim}(x,y) = \frac{x \cdot y}{||x|| \cdot ||y||} =
+        \frac{\sum_{i=1}^n x_i y_i}{\sqrt{\sum_{i=1}^n x_i^2}\sqrt{\sum_{i=1}^n y_i^2}}
+
+    where :math:`y` is a tensor of target values, and :math:`x` is a tensor of predictions.
+
+    Forward accepts
+
+    - ``preds`` (float tensor): ``(N,d)``
+    - ``target`` (float tensor): ``(N,d)``
+
+    Args:
+        reduction: how to reduce over the batch dimension using 'sum', 'mean' or 'none'
+                    (taking the individual scores)
+        compute_on_step:
+            Forward only calls ``update()`` and return ``None`` if this is set to ``False``.
+        dist_sync_on_step:
+            Synchronize metric state across processes at each ``forward()``
+            before returning the value at the step.
+        process_group:
+            Specify the process group on which synchronization is called.
+            default: ``None`` (which selects the entire world)
+        dist_sync_fn:
+            Callback that performs the allgather operation on the metric state. When ``None``, DDP
+            will be used to perform the all gather.
+
+    Example:
+        >>> from paddlemetrics import CosineSimilarity
+        >>> target = B.tensor([[0, 1], [1, 1]])
+        >>> preds = B.tensor([[0, 1], [0, 1]])
+        >>> cosine_similarity = CosineSimilarity(reduction = 'mean')
+        >>> cosine_similarity(preds, target)
+        tensor(0.8536)
+
+    """
+    is_differentiable = True
+    preds: List[Tensor]
+    target: List[Tensor]
+
+    def __init__(
+        self,
+        reduction: str = "sum",
+        compute_on_step: bool = True,
+        dist_sync_on_step: bool = False,
+        process_group: Optional[Any] = None,
+        dist_sync_fn: Callable = None,
+    ) -> None:
+        super().__init__(
+            compute_on_step=compute_on_step,
+            dist_sync_on_step=dist_sync_on_step,
+            process_group=process_group,
+            dist_sync_fn=dist_sync_fn,
+        )
+        allowed_reduction = ("sum", "mean", "none", None)
+        if reduction not in allowed_reduction:
+            raise ValueError(f"Expected argument `reduction` to be one of {allowed_reduction} but got {reduction}")
+        self.reduction = reduction
+
+        self.add_state("preds", [], dist_reduce_fx="cat")
+        self.add_state("target", [], dist_reduce_fx="cat")
+
+    def update(self, preds: Tensor, target: Tensor) -> None:  # type: ignore
+        """Update metric states with predictions and targets.
+
+        Args:
+            preds: Predicted tensor with shape ``(N,d)``
+            target: Ground truth tensor with shape ``(N,d)``
+        """
+        preds, target = _cosine_similarity_update(preds, target)
+
+        self.preds.append(preds)
+        self.target.append(target)
+
+    def compute(self) -> Tensor:
+        preds = dim_zero_cat(self.preds)
+        target = dim_zero_cat(self.target)
+        return _cosine_similarity_compute(preds, target, self.reduction)
diff --git a/RE/paddlemetric/src/paddlemetrics/regression/explained_variance.py b/RE/paddlemetric/src/paddlemetrics/regression/explained_variance.py
new file mode 100644
index 00000000..226ac076
--- /dev/null
+++ b/RE/paddlemetric/src/paddlemetrics/regression/explained_variance.py
@@ -0,0 +1,136 @@
+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from typing import Any, Callable, Optional, Sequence, Union
+
+import paddleext.torchapi as B
+from paddleext.torchapi import  Tensor, tensor
+
+from paddlemetrics.functional.regression.explained_variance import (
+    _explained_variance_compute,
+    _explained_variance_update,
+)
+from paddlemetrics.metric import Metric
+
+
+class ExplainedVariance(Metric):
+    r"""
+    Computes `explained variance`_:
+
+    .. math:: \text{ExplainedVariance} = 1 - \frac{\text{Var}(y - \hat{y})}{\text{Var}(y)}
+
+    Where :math:`y` is a tensor of target values, and :math:`\hat{y}` is a
+    tensor of predictions.
+
+    Forward accepts
+
+    - ``preds`` (float tensor): ``(N,)`` or ``(N, ...)`` (multioutput)
+    - ``target`` (long tensor): ``(N,)`` or ``(N, ...)`` (multioutput)
+
+    In the case of multioutput, as default the variances will be uniformly
+    averaged over the additional dimensions. Please see argument `multioutput`
+    for changing this behavior.
+
+    Args:
+        multioutput:
+            Defines aggregation in the case of multiple output scores. Can be one
+            of the following strings (default is `'uniform_average'`.):
+
+            * `'raw_values'` returns full set of scores
+            * `'uniform_average'` scores are uniformly averaged
+            * `'variance_weighted'` scores are weighted by their individual variances
+
+        compute_on_step:
+            Forward only calls ``update()`` and return None if this is set to False. default: True
+        dist_sync_on_step:
+            Synchronize metric state across processes at each ``forward()``
+            before returning the value at the step. default: False
+        process_group:
+            Specify the process group on which synchronization is called. default: None (which selects the entire world)
+
+    Raises:
+        ValueError:
+            If ``multioutput`` is not one of ``"raw_values"``, ``"uniform_average"`` or ``"variance_weighted"``.
+
+    Example:
+        >>> from paddlemetrics import ExplainedVariance
+        >>> target = B.tensor([3, -0.5, 2, 7])
+        >>> preds = B.tensor([2.5, 0.0, 2, 8])
+        >>> explained_variance = ExplainedVariance()
+        >>> explained_variance(preds, target)
+        tensor(0.9572)
+
+        >>> target = B.tensor([[0.5, 1], [-1, 1], [7, -6]])
+        >>> preds = B.tensor([[0, 2], [-1, 2], [8, -5]])
+        >>> explained_variance = ExplainedVariance(multioutput='raw_values')
+        >>> explained_variance(preds, target)
+        tensor([0.9677, 1.0000])
+
+    """
+    is_differentiable = True
+    n_obs: Tensor
+    sum_error: Tensor
+    sum_squared_error: Tensor
+    sum_target: Tensor
+    sum_squared_target: Tensor
+
+    def __init__(
+        self,
+        multioutput: str = "uniform_average",
+        compute_on_step: bool = True,
+        dist_sync_on_step: bool = False,
+        process_group: Optional[Any] = None,
+        dist_sync_fn: Callable = None,
+    ) -> None:
+        super().__init__(
+            compute_on_step=compute_on_step,
+            dist_sync_on_step=dist_sync_on_step,
+            process_group=process_group,
+            dist_sync_fn=dist_sync_fn,
+        )
+        allowed_multioutput = ("raw_values", "uniform_average", "variance_weighted")
+        if multioutput not in allowed_multioutput:
+            raise ValueError(
+                f"Invalid input to argument `multioutput`. Choose one of the following: {allowed_multioutput}"
+            )
+        self.multioutput: str = multioutput
+        self.add_state("sum_error", default=tensor(0.0), dist_reduce_fx="sum")
+        self.add_state("sum_squared_error", default=tensor(0.0), dist_reduce_fx="sum")
+        self.add_state("sum_target", default=tensor(0.0), dist_reduce_fx="sum")
+        self.add_state("sum_squared_target", default=tensor(0.0), dist_reduce_fx="sum")
+        self.add_state("n_obs", default=tensor(0.0), dist_reduce_fx="sum")
+
+    def update(self, preds: Tensor, target: Tensor) -> None:  # type: ignore
+        """Update state with predictions and targets.
+
+        Args:
+            preds: Predictions from model
+            target: Ground truth values
+        """
+        n_obs, sum_error, sum_squared_error, sum_target, sum_squared_target = _explained_variance_update(preds, target)
+        self.n_obs = self.n_obs + n_obs
+        self.sum_error = self.sum_error + sum_error
+        self.sum_squared_error = self.sum_squared_error + sum_squared_error
+        self.sum_target = self.sum_target + sum_target
+        self.sum_squared_target = self.sum_squared_target + sum_squared_target
+
+    def compute(self) -> Union[Tensor, Sequence[Tensor]]:
+        """Computes explained variance over state."""
+        return _explained_variance_compute(
+            self.n_obs,
+            self.sum_error,
+            self.sum_squared_error,
+            self.sum_target,
+            self.sum_squared_target,
+            self.multioutput,
+        )
diff --git a/RE/paddlemetric/src/paddlemetrics/regression/mean_absolute_error.py b/RE/paddlemetric/src/paddlemetrics/regression/mean_absolute_error.py
new file mode 100644
index 00000000..8614bed2
--- /dev/null
+++ b/RE/paddlemetric/src/paddlemetrics/regression/mean_absolute_error.py
@@ -0,0 +1,86 @@
+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from typing import Any, Callable, Optional
+
+import paddleext.torchapi as B
+from paddleext.torchapi import  Tensor, tensor
+
+from paddlemetrics.functional.regression.mean_absolute_error import (
+    _mean_absolute_error_compute,
+    _mean_absolute_error_update,
+)
+from paddlemetrics.metric import Metric
+
+
+class MeanAbsoluteError(Metric):
+    r"""
+    `Computes Mean Absolute Error`_ (MAE):
+
+    .. math:: \text{MAE} = \frac{1}{N}\sum_i^N | y_i - \hat{y_i} |
+
+    Where :math:`y` is a tensor of target values, and :math:`\hat{y}` is a tensor of predictions.
+
+    Args:
+        compute_on_step:
+            Forward only calls ``update()`` and return None if this is set to False. default: True
+        dist_sync_on_step:
+            Synchronize metric state across processes at each ``forward()``
+            before returning the value at the step. default: False
+        process_group:
+            Specify the process group on which synchronization is called. default: None (which selects the entire world)
+
+    Example:
+        >>> from paddlemetrics import MeanAbsoluteError
+        >>> target = B.tensor([3.0, -0.5, 2.0, 7.0])
+        >>> preds = B.tensor([2.5, 0.0, 2.0, 8.0])
+        >>> mean_absolute_error = MeanAbsoluteError()
+        >>> mean_absolute_error(preds, target)
+        tensor(0.5000)
+    """
+    is_differentiable = True
+    sum_abs_error: Tensor
+    total: Tensor
+
+    def __init__(
+        self,
+        compute_on_step: bool = True,
+        dist_sync_on_step: bool = False,
+        process_group: Optional[Any] = None,
+        dist_sync_fn: Callable = None,
+    ) -> None:
+        super().__init__(
+            compute_on_step=compute_on_step,
+            dist_sync_on_step=dist_sync_on_step,
+            process_group=process_group,
+            dist_sync_fn=dist_sync_fn,
+        )
+
+        self.add_state("sum_abs_error", default=tensor(0.0), dist_reduce_fx="sum")
+        self.add_state("total", default=tensor(0), dist_reduce_fx="sum")
+
+    def update(self, preds: Tensor, target: Tensor) -> None:  # type: ignore
+        """Update state with predictions and targets.
+
+        Args:
+            preds: Predictions from model
+            target: Ground truth values
+        """
+        sum_abs_error, n_obs = _mean_absolute_error_update(preds, target)
+
+        self.sum_abs_error += sum_abs_error
+        self.total += n_obs
+
+    def compute(self) -> Tensor:
+        """Computes mean absolute error over state."""
+        return _mean_absolute_error_compute(self.sum_abs_error, self.total)
diff --git a/RE/paddlemetric/src/paddlemetrics/regression/mean_absolute_percentage_error.py b/RE/paddlemetric/src/paddlemetrics/regression/mean_absolute_percentage_error.py
new file mode 100644
index 00000000..66d9c091
--- /dev/null
+++ b/RE/paddlemetric/src/paddlemetrics/regression/mean_absolute_percentage_error.py
@@ -0,0 +1,95 @@
+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from typing import Any, Callable, Optional
+
+import paddleext.torchapi as B
+from paddleext.torchapi import  Tensor, tensor
+
+from paddlemetrics.functional.regression.mean_absolute_percentage_error import (
+    _mean_absolute_percentage_error_compute,
+    _mean_absolute_percentage_error_update,
+)
+from paddlemetrics.metric import Metric
+
+
+class MeanAbsolutePercentageError(Metric):
+    r"""
+    Computes `Mean Absolute Percentage Error`_ (MAPE):
+
+    .. math:: \text{MAPE} = \frac{1}{n}\sum_1^n\frac{|   y_i - \hat{y_i} |}{\max(\epsilon, y_i)}
+
+    Where :math:`y` is a tensor of target values, and :math:`\hat{y}` is a tensor of predictions.
+
+    Args:
+        compute_on_step:
+            Forward only calls ``update()`` and return None if this is set to False. default: True
+        dist_sync_on_step:
+            Synchronize metric state across processes at each ``forward()``
+            before returning the value at the step. default: False
+        process_group:
+            Specify the process group on which synchronization is called. default: None (which selects the entire world)
+
+    Note:
+        The epsilon value is taken from `scikit-learn's implementation of MAPE`_.
+
+    Note:
+        MAPE output is a non-negative floating point. Best result is 0.0 . But it is important to note that,
+        bad predictions, can lead to arbitarily large values. Especially when some ``target`` values are close to 0.
+        This `MAPE implementation returns`_ a very large number instead of ``inf``.
+
+    Example:
+        >>> from paddlemetrics import MeanAbsolutePercentageError
+        >>> target = B.tensor([1, 10, 1e6])
+        >>> preds = B.tensor([0.9, 15, 1.2e6])
+        >>> mean_abs_percentage_error = MeanAbsolutePercentageError()
+        >>> mean_abs_percentage_error(preds, target)
+        tensor(0.2667)
+
+    """
+    is_differentiable = True
+    sum_abs_per_error: Tensor
+    total: Tensor
+
+    def __init__(
+        self,
+        compute_on_step: bool = True,
+        dist_sync_on_step: bool = False,
+        process_group: Optional[Any] = None,
+        dist_sync_fn: Callable = None,
+    ) -> None:
+        super().__init__(
+            compute_on_step=compute_on_step,
+            dist_sync_on_step=dist_sync_on_step,
+            process_group=process_group,
+            dist_sync_fn=dist_sync_fn,
+        )
+
+        self.add_state("sum_abs_per_error", default=tensor(0.0), dist_reduce_fx="sum")
+        self.add_state("total", default=tensor(0.0), dist_reduce_fx="sum")
+
+    def update(self, preds: Tensor, target: Tensor) -> None:  # type: ignore
+        """Update state with predictions and targets.
+
+        Args:
+            preds: Predictions from model
+            target: Ground truth values
+        """
+        sum_abs_per_error, num_obs = _mean_absolute_percentage_error_update(preds, target)
+
+        self.sum_abs_per_error += sum_abs_per_error
+        self.total += num_obs
+
+    def compute(self) -> Tensor:
+        """Computes mean absolute percentage error over state."""
+        return _mean_absolute_percentage_error_compute(self.sum_abs_per_error, self.total)
diff --git a/RE/paddlemetric/src/paddlemetrics/regression/mean_squared_error.py b/RE/paddlemetric/src/paddlemetrics/regression/mean_squared_error.py
new file mode 100644
index 00000000..8c1c9245
--- /dev/null
+++ b/RE/paddlemetric/src/paddlemetrics/regression/mean_squared_error.py
@@ -0,0 +1,91 @@
+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from typing import Any, Callable, Optional
+
+import paddleext.torchapi as B
+from paddleext.torchapi import  Tensor, tensor
+
+from paddlemetrics.functional.regression.mean_squared_error import (
+    _mean_squared_error_compute,
+    _mean_squared_error_update,
+)
+from paddlemetrics.metric import Metric
+
+
+class MeanSquaredError(Metric):
+    r"""
+    Computes `mean squared error`_ (MSE):
+
+    .. math:: \text{MSE} = \frac{1}{N}\sum_i^N(y_i - \hat{y_i})^2
+
+    Where :math:`y` is a tensor of target values, and :math:`\hat{y}` is a tensor of predictions.
+
+    Args:
+        compute_on_step:
+            Forward only calls ``update()`` and return None if this is set to False. default: True
+        dist_sync_on_step:
+            Synchronize metric state across processes at each ``forward()``
+            before returning the value at the step. default: False
+        process_group:
+            Specify the process group on which synchronization is called. default: None (which selects the entire world)
+        squared:
+            If True returns MSE value, if False returns RMSE value.
+
+    Example:
+        >>> from paddlemetrics import MeanSquaredError
+        >>> target = B.tensor([2.5, 5.0, 4.0, 8.0])
+        >>> preds = B.tensor([3.0, 5.0, 2.5, 7.0])
+        >>> mean_squared_error = MeanSquaredError()
+        >>> mean_squared_error(preds, target)
+        tensor(0.8750)
+
+    """
+    is_differentiable = True
+    sum_squared_error: Tensor
+    total: Tensor
+
+    def __init__(
+        self,
+        compute_on_step: bool = True,
+        dist_sync_on_step: bool = False,
+        process_group: Optional[Any] = None,
+        dist_sync_fn: Callable = None,
+        squared: bool = True,
+    ) -> None:
+        super().__init__(
+            compute_on_step=compute_on_step,
+            dist_sync_on_step=dist_sync_on_step,
+            process_group=process_group,
+            dist_sync_fn=dist_sync_fn,
+        )
+
+        self.add_state("sum_squared_error", default=tensor(0.0), dist_reduce_fx="sum")
+        self.add_state("total", default=tensor(0), dist_reduce_fx="sum")
+        self.squared = squared
+
+    def update(self, preds: Tensor, target: Tensor) -> None:  # type: ignore
+        """Update state with predictions and targets.
+
+        Args:
+            preds: Predictions from model
+            target: Ground truth values
+        """
+        sum_squared_error, n_obs = _mean_squared_error_update(preds, target)
+
+        self.sum_squared_error += sum_squared_error
+        self.total += n_obs
+
+    def compute(self) -> Tensor:
+        """Computes mean squared error over state."""
+        return _mean_squared_error_compute(self.sum_squared_error, self.total, squared=self.squared)
diff --git a/RE/paddlemetric/src/paddlemetrics/regression/mean_squared_log_error.py b/RE/paddlemetric/src/paddlemetrics/regression/mean_squared_log_error.py
new file mode 100644
index 00000000..e36773b0
--- /dev/null
+++ b/RE/paddlemetric/src/paddlemetrics/regression/mean_squared_log_error.py
@@ -0,0 +1,90 @@
+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from typing import Any, Callable, Optional
+
+import paddleext.torchapi as B
+from paddleext.torchapi import  Tensor, tensor
+
+from paddlemetrics.functional.regression.mean_squared_log_error import (
+    _mean_squared_log_error_compute,
+    _mean_squared_log_error_update,
+)
+from paddlemetrics.metric import Metric
+
+
+class MeanSquaredLogError(Metric):
+    r"""
+    Computes `mean squared logarithmic error`_ (MSLE):
+
+    .. math:: \text{MSLE} = \frac{1}{N}\sum_i^N (\log_e(1 + y_i) - \log_e(1 + \hat{y_i}))^2
+
+    Where :math:`y` is a tensor of target values, and :math:`\hat{y}` is a tensor of predictions.
+
+    Args:
+        compute_on_step:
+            Forward only calls ``update()`` and return None if this is set to False. default: True
+        dist_sync_on_step:
+            Synchronize metric state across processes at each ``forward()``
+            before returning the value at the step. default: False
+        process_group:
+            Specify the process group on which synchronization is called. default: None (which selects the entire world)
+
+    Example:
+        >>> from paddlemetrics import MeanSquaredLogError
+        >>> target = B.tensor([2.5, 5, 4, 8])
+        >>> preds = B.tensor([3, 5, 2.5, 7])
+        >>> mean_squared_log_error = MeanSquaredLogError()
+        >>> mean_squared_log_error(preds, target)
+        tensor(0.0397)
+
+    .. note::
+        Half precision is only support on GPU for this metric
+
+    """
+    is_differentiable = True
+    sum_squared_log_error: Tensor
+    total: Tensor
+
+    def __init__(
+        self,
+        compute_on_step: bool = True,
+        dist_sync_on_step: bool = False,
+        process_group: Optional[Any] = None,
+        dist_sync_fn: Callable = None,
+    ) -> None:
+        super().__init__(
+            compute_on_step=compute_on_step,
+            dist_sync_on_step=dist_sync_on_step,
+            process_group=process_group,
+            dist_sync_fn=dist_sync_fn,
+        )
+
+        self.add_state("sum_squared_log_error", default=tensor(0.0), dist_reduce_fx="sum")
+        self.add_state("total", default=tensor(0), dist_reduce_fx="sum")
+
+    def update(self, preds: Tensor, target: Tensor) -> None:  # type: ignore
+        """Update state with predictions and targets.
+
+        Args:
+            preds: Predictions from model
+            target: Ground truth values
+        """
+        sum_squared_log_error, n_obs = _mean_squared_log_error_update(preds, target)
+
+        self.sum_squared_log_error += sum_squared_log_error
+        self.total += n_obs
+
+    def compute(self) -> Tensor:
+        """Compute mean squared logarithmic error over state."""
+        return _mean_squared_log_error_compute(self.sum_squared_log_error, self.total)
diff --git a/RE/paddlemetric/src/paddlemetrics/regression/pearson.py b/RE/paddlemetric/src/paddlemetrics/regression/pearson.py
new file mode 100644
index 00000000..7927392a
--- /dev/null
+++ b/RE/paddlemetric/src/paddlemetrics/regression/pearson.py
@@ -0,0 +1,140 @@
+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from typing import Any, List, Optional, Tuple
+
+import paddleext.torchapi as B
+from paddleext.torchapi import  Tensor
+
+from paddlemetrics.functional.regression.pearson import _pearson_corrcoef_compute, _pearson_corrcoef_update
+from paddlemetrics.metric import Metric
+
+
+def _final_aggregation(
+    means_x: Tensor,
+    means_y: Tensor,
+    vars_x: Tensor,
+    vars_y: Tensor,
+    corrs_xy: Tensor,
+    nbs: Tensor,
+) -> Tuple[Tensor, Tensor, Tensor, Tensor]:
+    """Aggregate the statistics from multiple devices.
+
+    Formula taken from here: `Aggregate the statistics from multiple devices`_
+    """
+    # assert len(means_x) > 1 and len(means_y) > 1 and len(vars_x) > 1 and len(vars_y) > 1 and len(corrs_xy) > 1
+    mx1, my1, vx1, vy1, cxy1, n1 = means_x[0], means_y[0], vars_x[0], vars_y[0], corrs_xy[0], nbs[0]
+    for i in range(1, len(means_x)):
+        mx2, my2, vx2, vy2, cxy2, n2 = means_x[i], means_y[i], vars_x[i], vars_y[i], corrs_xy[i], nbs[i]
+
+        nb = n1 + n2
+        mean_x = (n1 * mx1 + n2 * mx2) / nb
+        mean_y = (n1 * my1 + n2 * my2) / nb
+        var_x = 1 / (n1 + n2 - 1) * ((n1 - 1) * vx1 + (n2 - 1) * vx2 + ((n1 * n2) / (n1 + n2)) * (mx1 - mx2) ** 2)
+        var_y = 1 / (n1 + n2 - 1) * ((n1 - 1) * vy1 + (n2 - 1) * vy2 + ((n1 * n2) / (n1 + n2)) * (my1 - my2) ** 2)
+
+        corr1 = n1 * cxy1 + n1 * (mx1 - mean_x) * (my1 - mean_y)
+        corr2 = n2 * cxy2 + n2 * (mx2 - mean_x) * (my2 - mean_y)
+        corr_xy = (corr1 + corr2) / (n1 + n2)
+
+        mx1, my1, vx1, vy1, cxy1, n1 = mean_x, mean_y, var_x, var_y, corr_xy, nb
+
+    return var_x, var_y, corr_xy, nb
+
+
+class PearsonCorrcoef(Metric):
+    r"""
+    Computes `Pearson Correlation Coefficient`_:
+
+    .. math::
+        P_{corr}(x,y) = \frac{cov(x,y)}{\sigma_x \sigma_y}
+
+    Where :math:`y` is a tensor of target values, and :math:`x` is a
+    tensor of predictions.
+
+    Forward accepts
+
+    - ``preds`` (float tensor): ``(N,)``
+    - ``target``(float tensor): ``(N,)``
+
+    Args:
+        compute_on_step:
+            Forward only calls ``update()`` and return None if this is set to False. default: True
+        dist_sync_on_step:
+            Synchronize metric state across processes at each ``forward()``
+            before returning the value at the step. default: False
+        process_group:
+            Specify the process group on which synchronization is called. default: None (which selects the entire world)
+
+    Example:
+        >>> from paddlemetrics import PearsonCorrcoef
+        >>> target = B.tensor([3, -0.5, 2, 7])
+        >>> preds = B.tensor([2.5, 0.0, 2, 8])
+        >>> pearson = PearsonCorrcoef()
+        >>> pearson(preds, target)
+        tensor(0.9849)
+
+    """
+    is_differentiable = True
+    preds: List[Tensor]
+    target: List[Tensor]
+    mean_x: Tensor
+    mean_y: Tensor
+    var_x: Tensor
+    var_y: Tensor
+    corr_xy: Tensor
+    n_total: Tensor
+
+    def __init__(
+        self,
+        compute_on_step: bool = True,
+        dist_sync_on_step: bool = False,
+        process_group: Optional[Any] = None,
+    ) -> None:
+        super().__init__(
+            compute_on_step=compute_on_step,
+            dist_sync_on_step=dist_sync_on_step,
+            process_group=process_group,
+        )
+
+        self.add_state("mean_x", default=B.zeros(1), dist_reduce_fx=None)
+        self.add_state("mean_y", default=B.zeros(1), dist_reduce_fx=None)
+        self.add_state("var_x", default=B.zeros(1), dist_reduce_fx=None)
+        self.add_state("var_y", default=B.zeros(1), dist_reduce_fx=None)
+        self.add_state("corr_xy", default=B.zeros(1), dist_reduce_fx=None)
+        self.add_state("n_total", default=B.zeros(1), dist_reduce_fx=None)
+
+    def update(self, preds: Tensor, target: Tensor) -> None:  # type: ignore
+        """Update state with predictions and targets.
+
+        Args:
+            preds: Predictions from model
+            target: Ground truth values
+        """
+        self.mean_x, self.mean_y, self.var_x, self.var_y, self.corr_xy, self.n_total = _pearson_corrcoef_update(
+            preds, target, self.mean_x, self.mean_y, self.var_x, self.var_y, self.corr_xy, self.n_total
+        )
+
+    def compute(self) -> Tensor:
+        """Computes pearson correlation coefficient over state."""
+        if self.mean_x.numel() > 1:  # multiple devices, need further reduction
+            var_x, var_y, corr_xy, n_total = _final_aggregation(
+                self.mean_x, self.mean_y, self.var_x, self.var_y, self.corr_xy, self.n_total
+            )
+        else:
+            var_x = self.var_x
+            var_y = self.var_y
+            corr_xy = self.corr_xy
+            n_total = self.n_total
+
+        return _pearson_corrcoef_compute(var_x, var_y, corr_xy, n_total)
diff --git a/RE/paddlemetric/src/paddlemetrics/regression/r2.py b/RE/paddlemetric/src/paddlemetrics/regression/r2.py
new file mode 100644
index 00000000..36db3d8d
--- /dev/null
+++ b/RE/paddlemetric/src/paddlemetrics/regression/r2.py
@@ -0,0 +1,149 @@
+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from typing import Any, Callable, Optional
+
+import paddleext.torchapi as B
+from paddleext.torchapi import  Tensor, tensor
+
+from paddlemetrics.functional.regression.r2 import _r2_score_compute, _r2_score_update
+from paddlemetrics.metric import Metric
+
+
+class R2Score(Metric):
+    r"""
+    Computes r2 score also known as `R2 Score_Coefficient Determination`_:
+
+    .. math:: R^2 = 1 - \frac{SS_{res}}{SS_{tot}}
+
+    where :math:`SS_{res}=\sum_i (y_i - f(x_i))^2` is the sum of residual squares, and
+    :math:`SS_{tot}=\sum_i (y_i - \bar{y})^2` is total sum of squares. Can also calculate
+    adjusted r2 score given by
+
+    .. math:: R^2_{adj} = 1 - \frac{(1-R^2)(n-1)}{n-k-1}
+
+    where the parameter :math:`k` (the number of independent regressors) should
+    be provided as the `adjusted` argument.
+
+    Forward accepts
+
+    - ``preds`` (float tensor): ``(N,)`` or ``(N, M)`` (multioutput)
+    - ``target`` (float tensor): ``(N,)`` or ``(N, M)`` (multioutput)
+
+    In the case of multioutput, as default the variances will be uniformly
+    averaged over the additional dimensions. Please see argument `multioutput`
+    for changing this behavior.
+
+    Args:
+        num_outputs:
+            Number of outputs in multioutput setting (default is 1)
+        adjusted:
+            number of independent regressors for calculating adjusted r2 score.
+            Default 0 (standard r2 score).
+        multioutput:
+            Defines aggregation in the case of multiple output scores. Can be one
+            of the following strings (default is ``'uniform_average'``.):
+
+            * ``'raw_values'`` returns full set of scores
+            * ``'uniform_average'`` scores are uniformly averaged
+            * ``'variance_weighted'`` scores are weighted by their individual variances
+
+        compute_on_step:
+            Forward only calls ``update()`` and return None if this is set to False. default: True
+        dist_sync_on_step:
+            Synchronize metric state across processes at each ``forward()``
+            before returning the value at the step. default: False
+        process_group:
+            Specify the process group on which synchronization is called. default: None (which selects the entire world)
+
+    Raises:
+        ValueError:
+            If ``adjusted`` parameter is not an integer larger or equal to 0.
+        ValueError:
+            If ``multioutput`` is not one of ``"raw_values"``, ``"uniform_average"`` or ``"variance_weighted"``.
+
+    Example:
+        >>> from paddlemetrics import R2Score
+        >>> target = B.tensor([3, -0.5, 2, 7])
+        >>> preds = B.tensor([2.5, 0.0, 2, 8])
+        >>> r2score = R2Score()
+        >>> r2score(preds, target)
+        tensor(0.9486)
+
+        >>> target = B.tensor([[0.5, 1], [-1, 1], [7, -6]])
+        >>> preds = B.tensor([[0, 2], [-1, 2], [8, -5]])
+        >>> r2score = R2Score(num_outputs=2, multioutput='raw_values')
+        >>> r2score(preds, target)
+        tensor([0.9654, 0.9082])
+
+    """
+    is_differentiable = True
+    sum_squared_error: Tensor
+    sum_error: Tensor
+    residual: Tensor
+    total: Tensor
+
+    def __init__(
+        self,
+        num_outputs: int = 1,
+        adjusted: int = 0,
+        multioutput: str = "uniform_average",
+        compute_on_step: bool = True,
+        dist_sync_on_step: bool = False,
+        process_group: Optional[Any] = None,
+        dist_sync_fn: Callable = None,
+    ) -> None:
+        super().__init__(
+            compute_on_step=compute_on_step,
+            dist_sync_on_step=dist_sync_on_step,
+            process_group=process_group,
+            dist_sync_fn=dist_sync_fn,
+        )
+
+        self.num_outputs = num_outputs
+
+        if adjusted < 0 or not isinstance(adjusted, int):
+            raise ValueError("`adjusted` parameter should be an integer larger or equal to 0.")
+        self.adjusted = adjusted
+
+        allowed_multioutput = ("raw_values", "uniform_average", "variance_weighted")
+        if multioutput not in allowed_multioutput:
+            raise ValueError(
+                f"Invalid input to argument `multioutput`. Choose one of the following: {allowed_multioutput}"
+            )
+        self.multioutput = multioutput
+
+        self.add_state("sum_squared_error", default=B.zeros(self.num_outputs), dist_reduce_fx="sum")
+        self.add_state("sum_error", default=B.zeros(self.num_outputs), dist_reduce_fx="sum")
+        self.add_state("residual", default=B.zeros(self.num_outputs), dist_reduce_fx="sum")
+        self.add_state("total", default=tensor(0), dist_reduce_fx="sum")
+
+    def update(self, preds: Tensor, target: Tensor) -> None:  # type: ignore
+        """Update state with predictions and targets.
+
+        Args:
+            preds: Predictions from model
+            target: Ground truth values
+        """
+        sum_squared_error, sum_error, residual, total = _r2_score_update(preds, target)
+
+        self.sum_squared_error += sum_squared_error
+        self.sum_error += sum_error
+        self.residual += residual
+        self.total += total
+
+    def compute(self) -> Tensor:
+        """Computes r2 score over the metric states."""
+        return _r2_score_compute(
+            self.sum_squared_error, self.sum_error, self.residual, self.total, self.adjusted, self.multioutput
+        )
diff --git a/RE/paddlemetric/src/paddlemetrics/regression/spearman.py b/RE/paddlemetric/src/paddlemetrics/regression/spearman.py
new file mode 100644
index 00000000..76249378
--- /dev/null
+++ b/RE/paddlemetric/src/paddlemetrics/regression/spearman.py
@@ -0,0 +1,96 @@
+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from typing import Any, Callable, List, Optional
+
+import paddleext.torchapi as B
+from paddleext.torchapi import  Tensor
+
+from paddlemetrics.functional.regression.spearman import _spearman_corrcoef_compute, _spearman_corrcoef_update
+from paddlemetrics.metric import Metric
+from paddlemetrics.utilities import rank_zero_warn
+from paddlemetrics.utilities.data import dim_zero_cat
+
+
+class SpearmanCorrcoef(Metric):
+    r"""
+    Computes `spearmans rank correlation coefficient`_.
+
+    .. math:
+        r_s = = \frac{cov(rg_x, rg_y)}{\sigma_{rg_x} * \sigma_{rg_y}}
+
+    where rg_x and rg_y are the rank associated to the variables x and y. Spearmans correlations coefficient
+    corresponds to the standard pearsons correlation coefficient calculated on the rank variables.
+
+    Args:
+        compute_on_step:
+            Forward only calls ``update()`` and return None if this is set to False. default: True
+        dist_sync_on_step:
+            Synchronize metric state across processes at each ``forward()``
+            before returning the value at the step. default: False
+        process_group:
+            Specify the process group on which synchronization is called. default: None (which selects the entire world)
+        dist_sync_fn:
+            Callback that performs the allgather operation on the metric state. When ``None``, DDP
+            will be used to perform the allgather
+
+    Example:
+        >>> from paddlemetrics import SpearmanCorrcoef
+        >>> target = B.tensor([3, -0.5, 2, 7])
+        >>> preds = B.tensor([2.5, 0.0, 2, 8])
+        >>> spearman = SpearmanCorrcoef()
+        >>> spearman(preds, target)
+        tensor(1.0000)
+
+    """
+    is_differentiable = False
+    preds: List[Tensor]
+    target: List[Tensor]
+
+    def __init__(
+        self,
+        compute_on_step: bool = True,
+        dist_sync_on_step: bool = False,
+        process_group: Optional[Any] = None,
+        dist_sync_fn: Optional[Callable] = None,
+    ) -> None:
+        super().__init__(
+            compute_on_step=compute_on_step,
+            dist_sync_on_step=dist_sync_on_step,
+            process_group=process_group,
+            dist_sync_fn=dist_sync_fn,
+        )
+        rank_zero_warn(
+            "Metric `SpearmanCorrcoef` will save all targets and predictions in the buffer."
+            " For large datasets, this may lead to large memory footprint."
+        )
+
+        self.add_state("preds", default=[], dist_reduce_fx="cat")
+        self.add_state("target", default=[], dist_reduce_fx="cat")
+
+    def update(self, preds: Tensor, target: Tensor) -> None:  # type: ignore
+        """Update state with predictions and targets.
+
+        Args:
+            preds: Predictions from model
+            target: Ground truth values
+        """
+        preds, target = _spearman_corrcoef_update(preds, target)
+        self.preds.append(preds)
+        self.target.append(target)
+
+    def compute(self) -> Tensor:
+        """Computes spearmans correlation coefficient."""
+        preds = dim_zero_cat(self.preds)
+        target = dim_zero_cat(self.target)
+        return _spearman_corrcoef_compute(preds, target)
diff --git a/RE/paddlemetric/src/paddlemetrics/regression/symmetric_mean_absolute_percentage_error.py b/RE/paddlemetric/src/paddlemetrics/regression/symmetric_mean_absolute_percentage_error.py
new file mode 100644
index 00000000..3e545e08
--- /dev/null
+++ b/RE/paddlemetric/src/paddlemetrics/regression/symmetric_mean_absolute_percentage_error.py
@@ -0,0 +1,92 @@
+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from typing import Any, Callable, Optional
+
+import paddleext.torchapi as B
+from paddleext.torchapi import  Tensor, tensor
+
+from paddlemetrics.functional.regression.symmetric_mean_absolute_percentage_error import (
+    _symmetric_mean_absolute_percentage_error_compute,
+    _symmetric_mean_absolute_percentage_error_update,
+)
+from paddlemetrics.metric import Metric
+
+
+class SymmetricMeanAbsolutePercentageError(Metric):
+    r"""
+    Computes symmetric mean absolute percentage error (`SMAPE`_).
+
+    .. math:: \text{SMAPE} = \frac{2}{n}\sum_1^n max(\frac{|   y_i - \hat{y_i} |}{| y_i | + | \hat{y_i} |, \epsilon})
+
+    Where :math:`y` is a tensor of target values, and :math:`\hat{y}` is a tensor of predictions.
+
+    Args:
+        compute_on_step:
+            Forward only calls ``update()`` and return None if this is set to False.
+        dist_sync_on_step:
+            Synchronize metric state across processes at each ``forward()`` before returning the value at the step.
+        process_group:
+            Specify the process group on which synchronization is called. default: None (which selects the entire world)
+
+    Note:
+        The epsilon value is taken from `scikit-learn's implementation of SMAPE`_.
+
+    Note:
+        SMAPE output is a non-negative floating point between 0 and 1. Best result is 0.0 .
+
+
+    Example:
+        >>> from paddlemetrics import SymmetricMeanAbsolutePercentageError
+        >>> target = B.tensor([1, 10, 1e6])
+        >>> preds = B.tensor([0.9, 15, 1.2e6])
+        >>> smape = SymmetricMeanAbsolutePercentageError()
+        >>> smape(preds, target)
+        tensor(0.2290)
+    """
+    is_differentiable = True
+    sum_abs_per_error: Tensor
+    total: Tensor
+
+    def __init__(
+        self,
+        compute_on_step: bool = True,
+        dist_sync_on_step: bool = False,
+        process_group: Optional[Any] = None,
+        dist_sync_fn: Callable = None,
+    ) -> None:
+        super().__init__(
+            compute_on_step=compute_on_step,
+            dist_sync_on_step=dist_sync_on_step,
+            process_group=process_group,
+            dist_sync_fn=dist_sync_fn,
+        )
+
+        self.add_state("sum_abs_per_error", default=tensor(0.0), dist_reduce_fx="sum")
+        self.add_state("total", default=tensor(0.0), dist_reduce_fx="sum")
+
+    def update(self, preds: Tensor, target: Tensor) -> None:  # type: ignore
+        """Update state with predictions and targets.
+
+        Args:
+            preds: Predictions from model
+            target: Ground truth values
+        """
+        sum_abs_per_error, num_obs = _symmetric_mean_absolute_percentage_error_update(preds, target)
+
+        self.sum_abs_per_error += sum_abs_per_error
+        self.total += num_obs
+
+    def compute(self) -> Tensor:
+        """Computes mean absolute percentage error over state."""
+        return _symmetric_mean_absolute_percentage_error_compute(self.sum_abs_per_error, self.total)
diff --git a/RE/paddlemetric/src/paddlemetrics/regression/tweedie_deviance.py b/RE/paddlemetric/src/paddlemetrics/regression/tweedie_deviance.py
new file mode 100644
index 00000000..4687bdd5
--- /dev/null
+++ b/RE/paddlemetric/src/paddlemetrics/regression/tweedie_deviance.py
@@ -0,0 +1,116 @@
+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from typing import Any, Callable, Optional
+
+import paddleext.torchapi as B
+from paddleext.torchapi import  Tensor
+
+from paddlemetrics.functional.regression.tweedie_deviance import (
+    _tweedie_deviance_score_compute,
+    _tweedie_deviance_score_update,
+)
+from paddlemetrics.metric import Metric
+
+
+class TweedieDevianceScore(Metric):
+    r"""
+    Computes the `Tweedie Deviance Score`_ between targets and predictions:
+
+    .. math::
+        deviance\_score(\hat{y},y) =
+        \begin{cases}
+        (\hat{y} - y)^2, & \text{for }power=0\\
+        2 * (y * log(\frac{y}{\hat{y}}) + \hat{y} - y),  & \text{for }power=1\\
+        2 * (log(\frac{\hat{y}}{y}) + \frac{y}{\hat{y}} - 1),  & \text{for }power=2\\
+        2 * (\frac{(max(y,0))^{2}}{(1 - power)(2 - power)} - \frac{y(\hat{y})^{1 - power}}{1 - power} + \frac{(\hat{y})
+            ^{2 - power}}{2 - power}), & \text{otherwise}
+        \end{cases}
+
+    where :math:`y` is a tensor of targets values, and :math:`\hat{y}` is a tensor of predictions.
+
+    Forward accepts
+
+    - ``preds`` (float tensor): ``(N,...)``
+    - ``targets`` (float tensor): ``(N,...)``
+
+    Args:
+        power:
+            - power < 0 : Extreme stable distribution. (Requires: preds > 0.)
+            - power = 0 : Normal distribution. (Requires: targets and preds can be any real numbers.)
+            - power = 1 : Poisson distribution. (Requires: targets >= 0 and y_pred > 0.)
+            - 1 < p < 2 : Compound Poisson distribution. (Requires: targets >= 0 and preds > 0.)
+            - power = 2 : Gamma distribution. (Requires: targets > 0 and preds > 0.)
+            - power = 3 : Inverse Gaussian distribution. (Requires: targets > 0 and preds > 0.)
+            - otherwise : Positive stable distribution. (Requires: targets > 0 and preds > 0.)
+        compute_on_step:
+            Forward only calls ``update()`` and return ``None`` if this is set to ``False``.
+        dist_sync_on_step:
+            Synchronize metric state across processes at each ``forward()``
+            before returning the value at the step.
+        process_group:
+            Specify the process group on which synchronization is called.
+            default: ``None`` (which selects the entire world)
+        dist_sync_fn:
+            Callback that performs the allgather operation on the metric state. When ``None``, DDP
+            will be used to perform the all gather.
+
+    Example:
+        >>> from paddlemetrics import TweedieDevianceScore
+        >>> targets = B.tensor([1.0, 2.0, 3.0, 4.0])
+        >>> preds = B.tensor([4.0, 3.0, 2.0, 1.0])
+        >>> deviance_score = TweedieDevianceScore(power=2)
+        >>> deviance_score(preds, targets)
+        tensor(1.2083)
+
+    """
+    is_differentiable = True
+    sum_deviance_score: Tensor
+    num_observations: Tensor
+
+    def __init__(
+        self,
+        power: float = 0.0,
+        compute_on_step: bool = True,
+        dist_sync_on_step: bool = False,
+        process_group: Optional[Any] = None,
+        dist_sync_fn: Callable = None,
+    ) -> None:
+        super().__init__(
+            compute_on_step=compute_on_step,
+            dist_sync_on_step=dist_sync_on_step,
+            process_group=process_group,
+            dist_sync_fn=dist_sync_fn,
+        )
+        if 0 < power < 1:
+            raise ValueError(f"Deviance Score is not defined for power={power}.")
+
+        self.power: float = power
+
+        self.add_state("sum_deviance_score", B.tensor(0.0), dist_reduce_fx="sum")
+        self.add_state("num_observations", B.tensor(0), dist_reduce_fx="sum")
+
+    def update(self, preds: Tensor, targets: Tensor) -> None:  # type: ignore
+        """Update metric states with predictions and targets.
+
+        Args:
+            preds: Predicted tensor with shape ``(N,d)``
+            targets: Ground truth tensor with shape ``(N,d)``
+        """
+        sum_deviance_score, num_observations = _tweedie_deviance_score_update(preds, targets, self.power)
+
+        self.sum_deviance_score += sum_deviance_score
+        self.num_observations += num_observations
+
+    def compute(self) -> Tensor:
+        return _tweedie_deviance_score_compute(self.sum_deviance_score, self.num_observations)
diff --git a/RE/paddlemetric/src/paddlemetrics/retrieval/__init__.py b/RE/paddlemetric/src/paddlemetrics/retrieval/__init__.py
new file mode 100644
index 00000000..208a0224
--- /dev/null
+++ b/RE/paddlemetric/src/paddlemetrics/retrieval/__init__.py
@@ -0,0 +1,22 @@
+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from paddlemetrics.retrieval.mean_average_precision import RetrievalMAP  # noqa: F401
+from paddlemetrics.retrieval.mean_reciprocal_rank import RetrievalMRR  # noqa: F401
+from paddlemetrics.retrieval.retrieval_fallout import RetrievalFallOut  # noqa: F401
+from paddlemetrics.retrieval.retrieval_hit_rate import RetrievalHitRate  # noqa: F401
+from paddlemetrics.retrieval.retrieval_metric import RetrievalMetric  # noqa: F401
+from paddlemetrics.retrieval.retrieval_ndcg import RetrievalNormalizedDCG  # noqa: F401
+from paddlemetrics.retrieval.retrieval_precision import RetrievalPrecision  # noqa: F401
+from paddlemetrics.retrieval.retrieval_r_precision import RetrievalRPrecision  # noqa: F401
+from paddlemetrics.retrieval.retrieval_recall import RetrievalRecall  # noqa: F401
diff --git a/RE/paddlemetric/src/paddlemetrics/retrieval/mean_average_precision.py b/RE/paddlemetric/src/paddlemetrics/retrieval/mean_average_precision.py
new file mode 100644
index 00000000..ee7f9065
--- /dev/null
+++ b/RE/paddlemetric/src/paddlemetrics/retrieval/mean_average_precision.py
@@ -0,0 +1,70 @@
+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from paddleext.torchapi import  Tensor, tensor
+
+from paddlemetrics.functional.retrieval.average_precision import retrieval_average_precision
+from paddlemetrics.retrieval.retrieval_metric import RetrievalMetric
+
+
+class RetrievalMAP(RetrievalMetric):
+    """Computes `Mean Average Precision`_.
+
+    Works with binary target data. Accepts float predictions from a model output.
+
+    Forward accepts
+
+    - ``preds`` (float tensor): ``(N, ...)``
+    - ``target`` (long or bool tensor): ``(N, ...)``
+    - ``indexes`` (long tensor): ``(N, ...)``
+
+    ``indexes``, ``preds`` and ``target`` must have the same dimension.
+    ``indexes`` indicate to which query a prediction belongs.
+    Predictions will be first grouped by ``indexes`` and then `MAP` will be computed as the mean
+    of the `Average Precisions` over each query.
+
+    Args:
+        empty_target_action:
+            Specify what to do with queries that do not have at least a positive ``target``. Choose from:
+
+            - ``'neg'``: those queries count as ``0.0`` (default)
+            - ``'pos'``: those queries count as ``1.0``
+            - ``'skip'``: skip those queries; if all queries are skipped, ``0.0`` is returned
+            - ``'error'``: raise a ``ValueError``
+
+        compute_on_step:
+            Forward only calls ``update()`` and return None if this is set to False. default: True
+        dist_sync_on_step:
+            Synchronize metric state across processes at each ``forward()``
+            before returning the value at the step. default: False
+        process_group:
+            Specify the process group on which synchronization is called. default: None (which selects
+            the entire world)
+        dist_sync_fn:
+            Callback that performs the allgather operation on the metric state. When `None`, DDP
+            will be used to perform the allgather. default: None
+
+    Example:
+        >>> from paddlemetrics import RetrievalMAP
+        >>> indexes = tensor([0, 0, 0, 1, 1, 1, 1])
+        >>> preds = tensor([0.2, 0.3, 0.5, 0.1, 0.3, 0.5, 0.2])
+        >>> target = tensor([False, False, True, False, True, False, True])
+        >>> rmap = RetrievalMAP()
+        >>> rmap(preds, target, indexes=indexes)
+        tensor(0.7917)
+    """
+
+    higher_is_better = True
+
+    def _metric(self, preds: Tensor, target: Tensor) -> Tensor:
+        return retrieval_average_precision(preds, target)
diff --git a/RE/paddlemetric/src/paddlemetrics/retrieval/mean_reciprocal_rank.py b/RE/paddlemetric/src/paddlemetrics/retrieval/mean_reciprocal_rank.py
new file mode 100644
index 00000000..76f15bde
--- /dev/null
+++ b/RE/paddlemetric/src/paddlemetrics/retrieval/mean_reciprocal_rank.py
@@ -0,0 +1,70 @@
+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from paddleext.torchapi import  Tensor, tensor
+
+from paddlemetrics.functional.retrieval.reciprocal_rank import retrieval_reciprocal_rank
+from paddlemetrics.retrieval.retrieval_metric import RetrievalMetric
+
+
+class RetrievalMRR(RetrievalMetric):
+    """Computes `Mean Reciprocal Rank`_.
+
+    Works with binary target data. Accepts float predictions from a model output.
+
+    Forward accepts
+
+    - ``preds`` (float tensor): ``(N, ...)``
+    - ``target`` (long or bool tensor): ``(N, ...)``
+    - ``indexes`` (long tensor): ``(N, ...)``
+
+    ``indexes``, ``preds`` and ``target`` must have the same dimension.
+    ``indexes`` indicate to which query a prediction belongs.
+    Predictions will be first grouped by ``indexes`` and then `MRR` will be computed as the mean
+    of the `Reciprocal Rank` over each query.
+
+    Args:
+        empty_target_action:
+            Specify what to do with queries that do not have at least a positive ``target``. Choose from:
+
+            - ``'neg'``: those queries count as ``0.0`` (default)
+            - ``'pos'``: those queries count as ``1.0``
+            - ``'skip'``: skip those queries; if all queries are skipped, ``0.0`` is returned
+            - ``'error'``: raise a ``ValueError``
+
+        compute_on_step:
+            Forward only calls ``update()`` and return None if this is set to False. default: True
+        dist_sync_on_step:
+            Synchronize metric state across processes at each ``forward()``
+            before returning the value at the step. default: False
+        process_group:
+            Specify the process group on which synchronization is called. default: None (which selects
+            the entire world)
+        dist_sync_fn:
+            Callback that performs the allgather operation on the metric state. When `None`, DDP
+            will be used to perform the allgather. default: None
+
+    Example:
+        >>> from paddlemetrics import RetrievalMRR
+        >>> indexes = tensor([0, 0, 0, 1, 1, 1, 1])
+        >>> preds = tensor([0.2, 0.3, 0.5, 0.1, 0.3, 0.5, 0.2])
+        >>> target = tensor([False, False, True, False, True, False, True])
+        >>> mrr = RetrievalMRR()
+        >>> mrr(preds, target, indexes=indexes)
+        tensor(0.7500)
+    """
+
+    higher_is_better = True
+
+    def _metric(self, preds: Tensor, target: Tensor) -> Tensor:
+        return retrieval_reciprocal_rank(preds, target)
diff --git a/RE/paddlemetric/src/paddlemetrics/retrieval/retrieval_fallout.py b/RE/paddlemetric/src/paddlemetrics/retrieval/retrieval_fallout.py
new file mode 100644
index 00000000..38b70f7c
--- /dev/null
+++ b/RE/paddlemetric/src/paddlemetrics/retrieval/retrieval_fallout.py
@@ -0,0 +1,131 @@
+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from typing import Any, Callable, Optional
+
+import paddleext.torchapi as B
+from paddleext.torchapi import  Tensor, tensor
+
+from paddlemetrics.functional.retrieval.fall_out import retrieval_fall_out
+from paddlemetrics.retrieval.retrieval_metric import RetrievalMetric
+from paddlemetrics.utilities.data import get_group_indexes
+
+
+class RetrievalFallOut(RetrievalMetric):
+    """Computes `Fall-out`_.
+
+    Works with binary target data. Accepts float predictions from a model output.
+
+    Forward accepts:
+
+    - ``preds`` (float tensor): ``(N, ...)``
+    - ``target`` (long or bool tensor): ``(N, ...)``
+    - ``indexes`` (long tensor): ``(N, ...)``
+
+    ``indexes``, ``preds`` and ``target`` must have the same dimension.
+    ``indexes`` indicate to which query a prediction belongs.
+    Predictions will be first grouped by ``indexes`` and then `Fall-out` will be computed as the mean
+    of the `Fall-out` over each query.
+
+    Args:
+        empty_target_action:
+            Specify what to do with queries that do not have at least a negative ``target``. Choose from:
+
+            - ``'neg'``: those queries count as ``0.0`` (default)
+            - ``'pos'``: those queries count as ``1.0``
+            - ``'skip'``: skip those queries; if all queries are skipped, ``0.0`` is returned
+            - ``'error'``: raise a ``ValueError``
+
+        k: consider only the top k elements for each query (default: None, which considers them all)
+        compute_on_step:
+            Forward only calls ``update()`` and return None if this is set to False. default: True
+        dist_sync_on_step:
+            Synchronize metric state across processes at each ``forward()``
+            before returning the value at the step. default: False
+        process_group:
+            Specify the process group on which synchronization is called. default: None (which selects
+            the entire world)
+        dist_sync_fn:
+            Callback that performs the allgather operation on the metric state. When `None`, DDP
+            will be used to perform the allgather. default: None
+
+    Raises:
+        ValueError:
+            If ``k`` parameter is not `None` or an integer larger than 0
+
+    Example:
+        >>> from paddlemetrics import RetrievalFallOut
+        >>> indexes = tensor([0, 0, 0, 1, 1, 1, 1])
+        >>> preds = tensor([0.2, 0.3, 0.5, 0.1, 0.3, 0.5, 0.2])
+        >>> target = tensor([False, False, True, False, True, False, True])
+        >>> fo = RetrievalFallOut(k=2)
+        >>> fo(preds, target, indexes=indexes)
+        tensor(0.5000)
+    """
+
+    higher_is_better = False
+
+    def __init__(
+        self,
+        empty_target_action: str = "pos",
+        k: int = None,
+        compute_on_step: bool = True,
+        dist_sync_on_step: bool = False,
+        process_group: Optional[Any] = None,
+        dist_sync_fn: Callable = None,
+    ) -> None:
+        super().__init__(
+            empty_target_action=empty_target_action,
+            compute_on_step=compute_on_step,
+            dist_sync_on_step=dist_sync_on_step,
+            process_group=process_group,
+            dist_sync_fn=dist_sync_fn,
+        )
+
+        if (k is not None) and not (isinstance(k, int) and k > 0):
+            raise ValueError("`k` has to be a positive integer or None")
+        self.k = k
+
+    def compute(self) -> Tensor:
+        """First concat state `indexes`, `preds` and `target` since they were stored as lists.
+
+        After that, compute list of groups that will help in keeping together predictions about the same query. Finally,
+        for each group compute the `_metric` if the number of negative targets is at least 1, otherwise behave as
+        specified by `self.empty_target_action`.
+        """
+        indexes = B.cat(self.indexes, dim=0)
+        preds = B.cat(self.preds, dim=0)
+        target = B.cat(self.target, dim=0)
+
+        res = []
+        groups = get_group_indexes(indexes)
+
+        for group in groups:
+            mini_preds = preds[group]
+            mini_target = target[group]
+
+            if not (1 - mini_target).sum():
+                if self.empty_target_action == "error":
+                    raise ValueError("`compute` method was provided with a query with no negative target.")
+                if self.empty_target_action == "pos":
+                    res.append(tensor(1.0))
+                elif self.empty_target_action == "neg":
+                    res.append(tensor(0.0))
+            else:
+                # ensure list containt only float tensors
+                res.append(self._metric(mini_preds, mini_target))
+
+        return B.stack([x.to(preds) for x in res]).mean() if res else tensor(0.0).to(preds)
+
+    def _metric(self, preds: Tensor, target: Tensor) -> Tensor:
+        return retrieval_fall_out(preds, target, k=self.k)
diff --git a/RE/paddlemetric/src/paddlemetrics/retrieval/retrieval_hit_rate.py b/RE/paddlemetric/src/paddlemetrics/retrieval/retrieval_hit_rate.py
new file mode 100644
index 00000000..6a053b7b
--- /dev/null
+++ b/RE/paddlemetric/src/paddlemetrics/retrieval/retrieval_hit_rate.py
@@ -0,0 +1,98 @@
+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from typing import Any, Callable, Optional
+
+from paddleext.torchapi import  Tensor, tensor
+
+from paddlemetrics.functional.retrieval.hit_rate import retrieval_hit_rate
+from paddlemetrics.retrieval.retrieval_metric import RetrievalMetric
+
+
+class RetrievalHitRate(RetrievalMetric):
+    """Computes `IR HitRate`.
+
+    Works with binary target data. Accepts float predictions from a model output.
+
+    Forward accepts:
+
+    - ``preds`` (float tensor): ``(N, ...)``
+    - ``target`` (long or bool tensor): ``(N, ...)``
+    - ``indexes`` (long tensor): ``(N, ...)``
+
+    ``indexes``, ``preds`` and ``target`` must have the same dimension.
+    ``indexes`` indicate to which query a prediction belongs.
+    Predictions will be first grouped by ``indexes`` and then the `Hit Rate` will be computed as the mean
+    of the `Hit Rate` over each query.
+
+    Args:
+        empty_target_action:
+            Specify what to do with queries that do not have at least a positive ``target``. Choose from:
+
+            - ``'neg'``: those queries count as ``0.0`` (default)
+            - ``'pos'``: those queries count as ``1.0``
+            - ``'skip'``: skip those queries; if all queries are skipped, ``0.0`` is returned
+            - ``'error'``: raise a ``ValueError``
+
+        k: consider only the top k elements for each query (default: None, which considers them all)
+        compute_on_step:
+            Forward only calls ``update()`` and return None if this is set to False. default: True
+        dist_sync_on_step:
+            Synchronize metric state across processes at each ``forward()``
+            before returning the value at the step. default: False
+        process_group:
+            Specify the process group on which synchronization is called. default: None (which selects
+            the entire world)
+        dist_sync_fn:
+            Callback that performs the allgather operation on the metric state. When `None`, DDP
+            will be used to perform the allgather. default: None
+
+    Raises:
+        ValueError:
+            If ``k`` parameter is not `None` or an integer larger than 0
+
+    Example:
+        >>> from paddlemetrics import RetrievalHitRate
+        >>> indexes = tensor([0, 0, 0, 1, 1, 1, 1])
+        >>> preds = tensor([0.2, 0.3, 0.5, 0.1, 0.3, 0.5, 0.2])
+        >>> target = tensor([True, False, False, False, True, False, True])
+        >>> hr2 = RetrievalHitRate(k=2)
+        >>> hr2(preds, target, indexes=indexes)
+        tensor(0.5000)
+    """
+
+    higher_is_better = True
+
+    def __init__(
+        self,
+        empty_target_action: str = "neg",
+        k: int = None,
+        compute_on_step: bool = True,
+        dist_sync_on_step: bool = False,
+        process_group: Optional[Any] = None,
+        dist_sync_fn: Callable = None,
+    ) -> None:
+        super().__init__(
+            empty_target_action=empty_target_action,
+            compute_on_step=compute_on_step,
+            dist_sync_on_step=dist_sync_on_step,
+            process_group=process_group,
+            dist_sync_fn=dist_sync_fn,
+        )
+
+        if (k is not None) and not (isinstance(k, int) and k > 0):
+            raise ValueError("`k` has to be a positive integer or None")
+        self.k = k
+
+    def _metric(self, preds: Tensor, target: Tensor) -> Tensor:
+        return retrieval_hit_rate(preds, target, k=self.k)
diff --git a/RE/paddlemetric/src/paddlemetrics/retrieval/retrieval_metric.py b/RE/paddlemetric/src/paddlemetrics/retrieval/retrieval_metric.py
new file mode 100644
index 00000000..ab43876f
--- /dev/null
+++ b/RE/paddlemetric/src/paddlemetrics/retrieval/retrieval_metric.py
@@ -0,0 +1,147 @@
+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from abc import ABC, abstractmethod
+from typing import Any, Callable, List, Optional
+
+import paddleext.torchapi as B
+from paddleext.torchapi import  Tensor, tensor
+
+from paddlemetrics import Metric
+from paddlemetrics.utilities.checks import _check_retrieval_inputs
+from paddlemetrics.utilities.data import get_group_indexes
+
+#: get_group_indexes is used to group predictions belonging to the same document
+
+
+class RetrievalMetric(Metric):
+    """Works with binary target data. Accepts float predictions from a model output.
+
+    Forward accepts
+
+    - ``preds`` (float tensor): ``(N, ...)``
+    - ``target`` (long or bool tensor): ``(N, ...)``
+    - ``indexes`` (long tensor): ``(N, ...)``
+
+    `indexes`, `preds` and `target` must have the same dimension and will be flatten
+    to single dimension once provided.
+
+    `indexes` indicate to which query a prediction belongs.
+    Predictions will be first grouped by indexes. Then the
+    real metric, defined by overriding the `_metric` method,
+    will be computed as the mean of the scores over each query.
+
+    Args:
+        empty_target_action:
+            Specify what to do with queries that do not have at least a positive
+            or negative (depend on metric) target. Choose from:
+
+            - ``'neg'``: those queries count as ``0.0`` (default)
+            - ``'pos'``: those queries count as ``1.0``
+            - ``'skip'``: skip those queries; if all queries are skipped, ``0.0`` is returned
+            - ``'error'``: raise a ``ValueError``
+
+        compute_on_step:
+            Forward only calls ``update()`` and return None if this is set to False. default: True
+        dist_sync_on_step:
+            Synchronize metric state across processes at each ``forward()``
+            before returning the value at the step. default: False
+        process_group:
+            Specify the process group on which synchronization is called. default: None (which selects
+            the entire world)
+        dist_sync_fn:
+            Callback that performs the allgather operation on the metric state. When `None`, DDP
+            will be used to perform the allgather. default: None
+    """
+
+    indexes: List[Tensor]
+    preds: List[Tensor]
+    target: List[Tensor]
+    higher_is_better = True
+
+    def __init__(
+        self,
+        empty_target_action: str = "neg",
+        compute_on_step: bool = True,
+        dist_sync_on_step: bool = False,
+        process_group: Optional[Any] = None,
+        dist_sync_fn: Callable = None,
+    ) -> None:
+        super().__init__(
+            compute_on_step=compute_on_step,
+            dist_sync_on_step=dist_sync_on_step,
+            process_group=process_group,
+            dist_sync_fn=dist_sync_fn,
+        )
+        self.allow_non_binary_target = False
+
+        empty_target_action_options = ("error", "skip", "neg", "pos")
+        if empty_target_action not in empty_target_action_options:
+            raise ValueError(f"Argument `empty_target_action` received a wrong value `{empty_target_action}`.")
+
+        self.empty_target_action = empty_target_action
+
+        self.add_state("indexes", default=[], dist_reduce_fx=None)
+        self.add_state("preds", default=[], dist_reduce_fx=None)
+        self.add_state("target", default=[], dist_reduce_fx=None)
+
+    def update(self, preds: Tensor, target: Tensor, indexes: Tensor) -> None:  # type: ignore
+        """Check shape, check and convert dtypes, flatten and add to accumulators."""
+        if indexes is None:
+            raise ValueError("Argument `indexes` cannot be None")
+
+        indexes, preds, target = _check_retrieval_inputs(
+            indexes, preds, target, allow_non_binary_target=self.allow_non_binary_target
+        )
+
+        self.indexes.append(indexes)
+        self.preds.append(preds)
+        self.target.append(target)
+
+    def compute(self) -> Tensor:
+        """First concat state ``indexes``, ``preds`` and ``target`` since they were stored as lists.
+
+        After that, compute list of groups that will help in keeping together predictions about the same query. Finally,
+        for each group compute the ``_metric`` if the number of positive targets is at least 1, otherwise behave as
+        specified by ``self.empty_target_action``.
+        """
+        indexes = B.cat(self.indexes, dim=0)
+        preds = B.cat(self.preds, dim=0)
+        target = B.cat(self.target, dim=0)
+
+        res = []
+        groups = get_group_indexes(indexes)
+
+        for group in groups:
+            mini_preds = preds[group]
+            mini_target = target[group]
+
+            if not mini_target.sum():
+                if self.empty_target_action == "error":
+                    raise ValueError("`compute` method was provided with a query with no positive target.")
+                if self.empty_target_action == "pos":
+                    res.append(tensor(1.0))
+                elif self.empty_target_action == "neg":
+                    res.append(tensor(0.0))
+            else:
+                # ensure list contains only float tensors
+                res.append(self._metric(mini_preds, mini_target))
+
+        return B.stack([x.to(preds) for x in res]).mean() if res else tensor(0.0).to(preds)
+
+    @abstractmethod
+    def _metric(self, preds: Tensor, target: Tensor) -> Tensor:
+        """Compute a metric over a predictions and target of a single group.
+
+        This method should be overridden by subclasses.
+        """
diff --git a/RE/paddlemetric/src/paddlemetrics/retrieval/retrieval_ndcg.py b/RE/paddlemetric/src/paddlemetrics/retrieval/retrieval_ndcg.py
new file mode 100644
index 00000000..bb0740ca
--- /dev/null
+++ b/RE/paddlemetric/src/paddlemetrics/retrieval/retrieval_ndcg.py
@@ -0,0 +1,99 @@
+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from typing import Any, Callable, Optional
+
+from paddleext.torchapi import  Tensor, tensor
+
+from paddlemetrics.functional.retrieval.ndcg import retrieval_normalized_dcg
+from paddlemetrics.retrieval.retrieval_metric import RetrievalMetric
+
+
+class RetrievalNormalizedDCG(RetrievalMetric):
+    """Computes `Normalized Discounted Cumulative Gain`_.
+
+    Works with binary or positive integer target data. Accepts float predictions from a model output.
+
+    Forward accepts:
+
+    - ``preds`` (float tensor): ``(N, ...)``
+    - ``target`` (long, int, bool or float tensor): ``(N, ...)``
+    - ``indexes`` (long tensor): ``(N, ...)``
+
+    ``indexes``, ``preds`` and ``target`` must have the same dimension.
+    ``indexes`` indicate to which query a prediction belongs.
+    Predictions will be first grouped by ``indexes`` and then `Normalized Discounted Cumulative Gain`
+    will be computed as the mean of the `Normalized Discounted Cumulative Gain` over each query.
+
+    Args:
+        empty_target_action:
+            Specify what to do with queries that do not have at least a positive ``target``. Choose from:
+
+            - ``'neg'``: those queries count as ``0.0`` (default)
+            - ``'pos'``: those queries count as ``1.0``
+            - ``'skip'``: skip those queries; if all queries are skipped, ``0.0`` is returned
+            - ``'error'``: raise a ``ValueError``
+
+        k: consider only the top k elements for each query (default: None, which considers them all)
+        compute_on_step:
+            Forward only calls ``update()`` and return None if this is set to False. default: True
+        dist_sync_on_step:
+            Synchronize metric state across processes at each ``forward()``
+            before returning the value at the step. default: False
+        process_group:
+            Specify the process group on which synchronization is called. default: None (which selects
+            the entire world)
+        dist_sync_fn:
+            Callback that performs the allgather operation on the metric state. When `None`, DDP
+            will be used to perform the allgather. default: None
+
+    Raises:
+        ValueError:
+            If ``k`` parameter is not `None` or an integer larger than 0
+
+    Example:
+        >>> from paddlemetrics import RetrievalNormalizedDCG
+        >>> indexes = tensor([0, 0, 0, 1, 1, 1, 1])
+        >>> preds = tensor([0.2, 0.3, 0.5, 0.1, 0.3, 0.5, 0.2])
+        >>> target = tensor([False, False, True, False, True, False, True])
+        >>> ndcg = RetrievalNormalizedDCG()
+        >>> ndcg(preds, target, indexes=indexes)
+        tensor(0.8467)
+    """
+
+    higher_is_better = True
+
+    def __init__(
+        self,
+        empty_target_action: str = "neg",
+        k: int = None,
+        compute_on_step: bool = True,
+        dist_sync_on_step: bool = False,
+        process_group: Optional[Any] = None,
+        dist_sync_fn: Callable = None,
+    ) -> None:
+        super().__init__(
+            empty_target_action=empty_target_action,
+            compute_on_step=compute_on_step,
+            dist_sync_on_step=dist_sync_on_step,
+            process_group=process_group,
+            dist_sync_fn=dist_sync_fn,
+        )
+
+        if (k is not None) and not (isinstance(k, int) and k > 0):
+            raise ValueError("`k` has to be a positive integer or None")
+        self.k = k
+        self.allow_non_binary_target = True
+
+    def _metric(self, preds: Tensor, target: Tensor) -> Tensor:
+        return retrieval_normalized_dcg(preds, target, k=self.k)
diff --git a/RE/paddlemetric/src/paddlemetrics/retrieval/retrieval_precision.py b/RE/paddlemetric/src/paddlemetrics/retrieval/retrieval_precision.py
new file mode 100644
index 00000000..f0f983a8
--- /dev/null
+++ b/RE/paddlemetric/src/paddlemetrics/retrieval/retrieval_precision.py
@@ -0,0 +1,98 @@
+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from typing import Any, Callable, Optional
+
+from paddleext.torchapi import  Tensor, tensor
+
+from paddlemetrics.functional.retrieval.precision import retrieval_precision
+from paddlemetrics.retrieval.retrieval_metric import RetrievalMetric
+
+
+class RetrievalPrecision(RetrievalMetric):
+    """Computes `IR Precision`_.
+
+    Works with binary target data. Accepts float predictions from a model output.
+
+    Forward accepts:
+
+    - ``preds`` (float tensor): ``(N, ...)``
+    - ``target`` (long or bool tensor): ``(N, ...)``
+    - ``indexes`` (long tensor): ``(N, ...)``
+
+    ``indexes``, ``preds`` and ``target`` must have the same dimension.
+    ``indexes`` indicate to which query a prediction belongs.
+    Predictions will be first grouped by ``indexes`` and then `Precision` will be computed as the mean
+    of the `Precision` over each query.
+
+    Args:
+        empty_target_action:
+            Specify what to do with queries that do not have at least a positive ``target``. Choose from:
+
+            - ``'neg'``: those queries count as ``0.0`` (default)
+            - ``'pos'``: those queries count as ``1.0``
+            - ``'skip'``: skip those queries; if all queries are skipped, ``0.0`` is returned
+            - ``'error'``: raise a ``ValueError``
+
+        k: consider only the top k elements for each query (default: None, which considers them all)
+        compute_on_step:
+            Forward only calls ``update()`` and return None if this is set to False. default: True
+        dist_sync_on_step:
+            Synchronize metric state across processes at each ``forward()``
+            before returning the value at the step. default: False
+        process_group:
+            Specify the process group on which synchronization is called. default: None (which selects
+            the entire world)
+        dist_sync_fn:
+            Callback that performs the allgather operation on the metric state. When `None`, DDP
+            will be used to perform the allgather. default: None
+
+    Raises:
+        ValueError:
+            If ``k`` parameter is not `None` or an integer larger than 0
+
+    Example:
+        >>> from paddlemetrics import RetrievalPrecision
+        >>> indexes = tensor([0, 0, 0, 1, 1, 1, 1])
+        >>> preds = tensor([0.2, 0.3, 0.5, 0.1, 0.3, 0.5, 0.2])
+        >>> target = tensor([False, False, True, False, True, False, True])
+        >>> p2 = RetrievalPrecision(k=2)
+        >>> p2(preds, target, indexes=indexes)
+        tensor(0.5000)
+    """
+
+    higher_is_better = True
+
+    def __init__(
+        self,
+        empty_target_action: str = "neg",
+        k: int = None,
+        compute_on_step: bool = True,
+        dist_sync_on_step: bool = False,
+        process_group: Optional[Any] = None,
+        dist_sync_fn: Callable = None,
+    ) -> None:
+        super().__init__(
+            empty_target_action=empty_target_action,
+            compute_on_step=compute_on_step,
+            dist_sync_on_step=dist_sync_on_step,
+            process_group=process_group,
+            dist_sync_fn=dist_sync_fn,
+        )
+
+        if (k is not None) and not (isinstance(k, int) and k > 0):
+            raise ValueError("`k` has to be a positive integer or None")
+        self.k = k
+
+    def _metric(self, preds: Tensor, target: Tensor) -> Tensor:
+        return retrieval_precision(preds, target, k=self.k)
diff --git a/RE/paddlemetric/src/paddlemetrics/retrieval/retrieval_r_precision.py b/RE/paddlemetric/src/paddlemetrics/retrieval/retrieval_r_precision.py
new file mode 100644
index 00000000..75373532
--- /dev/null
+++ b/RE/paddlemetric/src/paddlemetrics/retrieval/retrieval_r_precision.py
@@ -0,0 +1,70 @@
+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from paddleext.torchapi import  Tensor, tensor
+
+from paddlemetrics.functional.retrieval.r_precision import retrieval_r_precision
+from paddlemetrics.retrieval.retrieval_metric import RetrievalMetric
+
+
+class RetrievalRPrecision(RetrievalMetric):
+    """Computes `IR R-Precision`_.
+
+    Works with binary target data. Accepts float predictions from a model output.
+
+    Forward accepts:
+
+    - ``preds`` (float tensor): ``(N, ...)``
+    - ``target`` (long or bool tensor): ``(N, ...)``
+    - ``indexes`` (long tensor): ``(N, ...)``
+
+    ``indexes``, ``preds`` and ``target`` must have the same dimension.
+    ``indexes`` indicate to which query a prediction belongs.
+    Predictions will be first grouped by ``indexes`` and then `R-Precision` will be computed as the mean
+    of the `R-Precision` over each query.
+
+    Args:
+        empty_target_action:
+            Specify what to do with queries that do not have at least a positive ``target``. Choose from:
+
+            - ``'neg'``: those queries count as ``0.0`` (default)
+            - ``'pos'``: those queries count as ``1.0``
+            - ``'skip'``: skip those queries; if all queries are skipped, ``0.0`` is returned
+            - ``'error'``: raise a ``ValueError``
+
+        compute_on_step:
+            Forward only calls ``update()`` and return None if this is set to False. default: True
+        dist_sync_on_step:
+            Synchronize metric state across processes at each ``forward()``
+            before returning the value at the step. default: False
+        process_group:
+            Specify the process group on which synchronization is called. default: None (which selects
+            the entire world)
+        dist_sync_fn:
+            Callback that performs the allgather operation on the metric state. When `None`, DDP
+            will be used to perform the allgather. default: None
+
+    Example:
+        >>> from paddlemetrics import RetrievalRPrecision
+        >>> indexes = tensor([0, 0, 0, 1, 1, 1, 1])
+        >>> preds = tensor([0.2, 0.3, 0.5, 0.1, 0.3, 0.5, 0.2])
+        >>> target = tensor([False, False, True, False, True, False, True])
+        >>> p2 = RetrievalRPrecision()
+        >>> p2(preds, target, indexes=indexes)
+        tensor(0.7500)
+    """
+
+    higher_is_better = True
+
+    def _metric(self, preds: Tensor, target: Tensor) -> Tensor:
+        return retrieval_r_precision(preds, target)
diff --git a/RE/paddlemetric/src/paddlemetrics/retrieval/retrieval_recall.py b/RE/paddlemetric/src/paddlemetrics/retrieval/retrieval_recall.py
new file mode 100644
index 00000000..26ace51c
--- /dev/null
+++ b/RE/paddlemetric/src/paddlemetrics/retrieval/retrieval_recall.py
@@ -0,0 +1,98 @@
+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from typing import Any, Callable, Optional
+
+from paddleext.torchapi import  Tensor, tensor
+
+from paddlemetrics.functional.retrieval.recall import retrieval_recall
+from paddlemetrics.retrieval.retrieval_metric import RetrievalMetric
+
+
+class RetrievalRecall(RetrievalMetric):
+    """Computes `IR Recall`_.
+
+    Works with binary target data. Accepts float predictions from a model output.
+
+    Forward accepts:
+
+    - ``preds`` (float tensor): ``(N, ...)``
+    - ``target`` (long or bool tensor): ``(N, ...)``
+    - ``indexes`` (long tensor): ``(N, ...)``
+
+    ``indexes``, ``preds`` and ``target`` must have the same dimension.
+    ``indexes`` indicate to which query a prediction belongs.
+    Predictions will be first grouped by ``indexes`` and then `Recall` will be computed as the mean
+    of the `Recall` over each query.
+
+    Args:
+        empty_target_action:
+            Specify what to do with queries that do not have at least a positive ``target``. Choose from:
+
+            - ``'neg'``: those queries count as ``0.0`` (default)
+            - ``'pos'``: those queries count as ``1.0``
+            - ``'skip'``: skip those queries; if all queries are skipped, ``0.0`` is returned
+            - ``'error'``: raise a ``ValueError``
+
+        k: consider only the top k elements for each query (default: None, which considers them all)
+        compute_on_step:
+            Forward only calls ``update()`` and return None if this is set to False. default: True
+        dist_sync_on_step:
+            Synchronize metric state across processes at each ``forward()``
+            before returning the value at the step. default: False
+        process_group:
+            Specify the process group on which synchronization is called. default: None (which selects
+            the entire world)
+        dist_sync_fn:
+            Callback that performs the allgather operation on the metric state. When `None`, DDP
+            will be used to perform the allgather. default: None
+
+    Raises:
+        ValueError:
+            If ``k`` parameter is not `None` or an integer larger than 0
+
+    Example:
+        >>> from paddlemetrics import RetrievalRecall
+        >>> indexes = tensor([0, 0, 0, 1, 1, 1, 1])
+        >>> preds = tensor([0.2, 0.3, 0.5, 0.1, 0.3, 0.5, 0.2])
+        >>> target = tensor([False, False, True, False, True, False, True])
+        >>> r2 = RetrievalRecall(k=2)
+        >>> r2(preds, target, indexes=indexes)
+        tensor(0.7500)
+    """
+
+    higher_is_better = True
+
+    def __init__(
+        self,
+        empty_target_action: str = "neg",
+        k: int = None,
+        compute_on_step: bool = True,
+        dist_sync_on_step: bool = False,
+        process_group: Optional[Any] = None,
+        dist_sync_fn: Callable = None,
+    ) -> None:
+        super().__init__(
+            empty_target_action=empty_target_action,
+            compute_on_step=compute_on_step,
+            dist_sync_on_step=dist_sync_on_step,
+            process_group=process_group,
+            dist_sync_fn=dist_sync_fn,
+        )
+
+        if (k is not None) and not (isinstance(k, int) and k > 0):
+            raise ValueError("`k` has to be a positive integer or None")
+        self.k = k
+
+    def _metric(self, preds: Tensor, target: Tensor) -> Tensor:
+        return retrieval_recall(preds, target, k=self.k)
diff --git a/RE/paddlemetric/src/paddlemetrics/setup_tools.py b/RE/paddlemetric/src/paddlemetrics/setup_tools.py
new file mode 100644
index 00000000..e3233cef
--- /dev/null
+++ b/RE/paddlemetric/src/paddlemetrics/setup_tools.py
@@ -0,0 +1,74 @@
+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import os
+import re
+from typing import List
+
+_PROJECT_ROOT = os.path.dirname(os.path.dirname(__file__))
+
+
+def _load_requirements(path_dir: str, file_name: str = "requirements.txt", comment_char: str = "#") -> List[str]:
+    """Load requirements from a file.
+
+    >>> _load_requirements(_PROJECT_ROOT)  # doctest: +ELLIPSIS +NORMALIZE_WHITESPACE
+    ['numpy...', 'B...']
+    """
+    with open(os.path.join(path_dir, file_name)) as file:
+        lines = [ln.strip() for ln in file.readlines()]
+    reqs = []
+    for ln in lines:
+        # filer all comments
+        if comment_char in ln:
+            ln = ln[: ln.index(comment_char)].strip()
+        # skip directly installed dependencies
+        if ln.startswith("http"):
+            continue
+        if ln:  # if requirement is not empty
+            reqs.append(ln)
+    return reqs
+
+
+def _load_readme_description(path_dir: str, homepage: str, version: str) -> str:
+    """Load readme as decribtion.
+
+    >>> _load_readme_description(_PROJECT_ROOT, "",  "")  # doctest: +ELLIPSIS +NORMALIZE_WHITESPACE
+    '<div align="center">...'
+    """
+    path_readme = os.path.join(path_dir, "README.md")
+    with open(path_readme, encoding="utf-8") as fp:
+        text = fp.read()
+
+    # https://github.com/PyTorchLightning/paddlemetrics/raw/master/docs/source/_static/images/lightning_module/pt_to_pl.png
+    github_source_url = os.path.join(homepage, "raw", version)
+    # replace relative repository path to absolute link to the release
+    #  do not replace all "docs" as in the readme we reger some other sources with particular path to docs
+    text = text.replace("docs/source/_static/", f"{os.path.join(github_source_url, 'docs/source/_static/')}")
+
+    # readthedocs badge
+    text = text.replace("badge/?version=stable", f"badge/?version={version}")
+    text = text.replace("paddlemetrics.readthedocs.io/en/stable/", f"paddlemetrics.readthedocs.io/en/{version}")
+    # codecov badge
+    text = text.replace("/branch/master/graph/badge.svg", f"/release/{version}/graph/badge.svg")
+    # replace github badges for release ones
+    text = text.replace("badge.svg?branch=master&event=push", f"badge.svg?tag={version}")
+    # Azure...
+    text = text.replace("?branchName=master", f"?branchName=refs%2Ftags%2F{version}")
+    text = re.sub(r"\?definitionId=\d+&branchName=master", f"?definitionId=2&branchName=refs%2Ftags%2F{version}", text)
+
+    skip_begin = r"<!-- following section will be skipped from PyPI description -->"
+    skip_end = r"<!-- end skipping PyPI description -->"
+    # todo: wrap content as commented description
+    text = re.sub(rf"{skip_begin}.+?{skip_end}", "<!--  -->", text, flags=re.IGNORECASE + re.DOTALL)
+
+    return text
diff --git a/RE/paddlemetric/src/paddlemetrics/text/__init__.py b/RE/paddlemetric/src/paddlemetrics/text/__init__.py
new file mode 100644
index 00000000..782ca295
--- /dev/null
+++ b/RE/paddlemetric/src/paddlemetrics/text/__init__.py
@@ -0,0 +1,18 @@
+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#from paddlemetrics.text.bert import BERTScore  # noqa: F401
+from paddlemetrics.text.bleu import BLEUScore  # noqa: F401
+from paddlemetrics.text.rouge import ROUGEScore  # noqa: F401
+from paddlemetrics.text.sacre_bleu import SacreBLEUScore  # noqa: F401
+from paddlemetrics.text.wer import WER  # noqa: F401
diff --git a/RE/paddlemetric/src/paddlemetrics/text/bert.py b/RE/paddlemetric/src/paddlemetrics/text/bert.py
new file mode 100644
index 00000000..0f602f30
--- /dev/null
+++ b/RE/paddlemetric/src/paddlemetrics/text/bert.py
@@ -0,0 +1,251 @@
+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import warnings
+from typing import Any, Callable, Dict, List, Optional, Union
+
+import paddleext.torchapi as B
+
+from paddlemetrics.functional import bert_score
+from paddlemetrics.functional.text.bert import _preprocess_text
+from paddlemetrics.metric import Metric
+from paddlemetrics.utilities.imports import _TRANSFORMERS_AVAILABLE
+
+if _TRANSFORMERS_AVAILABLE:
+    from transformers import AutoTokenizer
+
+
+# Default model recommended in the original implementation.
+_DEFAULT_MODEL = "roberta-large"
+
+
+def _concatenate(d: Dict[str, List[B.Tensor]]) -> Dict[str, B.Tensor]:
+    """Concatenate list of tensors within a given dictionary."""
+    output_dict: Dict[str, B.Tensor] = {}
+    for k, v in d.items():
+        output_dict[k] = B.cat(v)
+    return output_dict
+
+
+class BERTScore(Metric):
+    """`Bert_score Evaluating Text Generation`_ leverages the pre-trained contextual embeddings from BERT and
+    matches words in candidate and reference sentences by cosine similarity. It has been shown to correlate with
+    human judgment on sentence-level and system-level evaluation. Moreover, BERTScore computes precision, recall,
+    and F1 measure, which can be useful for evaluating different language generation tasks.
+
+    This implemenation follows the original implementation from `BERT_score`_.
+
+    Args:
+        predictions:
+            An iterable of predicted sentences.
+        references:
+            An iterable of target sentences.
+        model_type:
+            A name or a model path used to load `transformers` pretrained model.
+        num_layers:
+            A layer of representation to use.
+        all_layers:
+            An indication of whether the representation from all model's layers should be used.
+            If `all_layers = True`, the argument `num_layers` is ignored.
+        model:
+            A user's own model. Must be of `B.nn.Module` instance.
+        user_tokenizer:
+            A user's own tokenizer used with the own model. This must be an instance with the `__call__` method.
+            This method must take an iterable of sentences (`List[str]`) and must return a python dictionary
+            containing `"input_ids"` and `"attention_mask"` represented by `B.Tensor`. It is up to the user's model
+            of whether `"input_ids"` is a `B.Tensor` of input ids or embedding vectors.
+            This tokenizer must prepend an equivalent of `[CLS]` token and append an equivalent of `[SEP]` token
+            as `transformers` tokenizer does.
+        user_forward_fn:
+            A user's own forward function used in a combination with `user_model`. This function must take `user_model`
+            and a python dictionary of containing `"input_ids"` and `"attention_mask"` represented by `B.Tensor`
+            as an input and return the model's output represented by the single `B.Tensor`.
+        verbose:
+            An indication of whether a progress bar to be displayed during the embeddings calculation.
+        idf:
+            An indication whether normalization using inverse document frequencies should be used.
+        device:
+            A device to be used for calculation.
+        max_length:
+            A maximum length of input sequences. Sequences longer than `max_length` are to be trimmed.
+        batch_size:
+            A batch size used for model processing.
+        num_threads:
+            A number of threads to use for a dataloader.
+        return_hash:
+            An indication of whether the correspodning `hash_code` should be returned.
+        lang:
+            A language of input sentences.
+        rescale_with_baseline:
+            An indication of whether bertscore should be rescaled with a pre-computed baseline.
+            When a pretrained model from `transformers` model is used, the corresponding baseline is downloaded
+            from the original `bert-score` package from `BERT_score`_ if available.
+            In other cases, please specify a path to the baseline csv/tsv file, which must follow the formatting
+            of the files from `BERT_score`_.
+        baseline_path:
+            A path to the user's own local csv/tsv file with the baseline scale.
+        baseline_url:
+            A url path to the user's own  csv/tsv file with the baseline scale.
+        compute_on_step:
+            Forward only calls ``update()`` and return None if this is set to False. default: True
+        dist_sync_on_step:
+            Synchronize metric state across processes at each ``forward()``
+            before returning the value at the step. default: False
+        process_group:
+            Specify the process group on which synchronization is called. default: None (which selects the entire world)
+        dist_sync_fn:
+            Callback that performs the allgather operation on the metric state. When ``None``, DDP
+            will be used to perform the allgather
+
+    Returns:
+        Python dictionary containing the keys `precision`, `recall` and `f1` with corresponding values.
+
+    Example:
+        >>> predictions = ["hello there", "general kenobi"]
+        >>> references = ["hello there", "master kenobi"]
+        >>> bertscore = BERTScore()
+        >>> bertscore.update(predictions=predictions,references=references)
+        >>> bertscore.compute()  # doctest: +SKIP
+        {'precision': [0.99..., 0.99...],
+         'recall': [0.99..., 0.99...],
+         'f1': [0.99..., 0.99...]}
+    """
+
+    higher_is_better = True
+
+    def __init__(
+        self,
+        model_name_or_path: Optional[str] = None,
+        num_layers: Optional[int] = None,
+        all_layers: bool = False,
+        model: Optional[B.nn.Module] = None,
+        user_tokenizer: Optional[Any] = None,
+        user_forward_fn: Callable[[B.nn.Module, Dict[str, B.Tensor]], B.Tensor] = None,
+        verbose: bool = False,
+        idf: bool = False,
+        device: Optional[Union[str, B.device]] = None,
+        max_length: int = 512,
+        batch_size: int = 64,
+        num_threads: int = 4,
+        return_hash: bool = False,
+        lang: str = "en",
+        rescale_with_baseline: bool = False,
+        baseline_path: Optional[str] = None,
+        baseline_url: Optional[str] = None,
+        compute_on_step: bool = True,
+        dist_sync_on_step: bool = False,
+        process_group: Optional[Any] = None,
+        dist_sync_fn: Callable = None,
+    ):
+        super().__init__(
+            compute_on_step=compute_on_step,
+            dist_sync_on_step=dist_sync_on_step,
+            process_group=process_group,
+            dist_sync_fn=dist_sync_fn,
+        )
+        self.model_name_or_path = model_name_or_path
+        self.num_layers = num_layers
+        self.all_layers = all_layers
+        self.model = model
+        self.user_forward_fn = user_forward_fn
+        self.verbose = verbose
+        self.idf = idf
+        self.embedding_device = device
+        self.max_length = max_length
+        self.batch_size = batch_size
+        self.num_threads = num_threads
+        self.return_hash = return_hash
+        self.lang = lang
+        self.rescale_with_baseline = rescale_with_baseline
+        self.baseline_path = baseline_path
+        self.baseline_url = baseline_url
+        self.predictions: Dict[str, List[B.Tensor]] = {"input_ids": [], "attention_mask": []}
+        self.references: Dict[str, List[B.Tensor]] = {"input_ids": [], "attention_mask": []}
+
+        if user_tokenizer:
+            self.tokenizer = user_tokenizer
+            self.user_tokenizer = True
+        else:
+            if not _TRANSFORMERS_AVAILABLE:
+                raise ValueError(
+                    "`BERTScore` metric with default tokenizers requires `transformers` package be installed. "
+                    "Either install with `pip install transformers>=4.0` or `pip install paddlemetrics[text]`"
+                )
+            if not model_name_or_path:
+                model_name_or_path = _DEFAULT_MODEL
+                warnings.warn(
+                    "The argument `model_name_or_path` was not specified while it is required when default "
+                    " `transformers` model are used."
+                    f"It is, therefore, used the default recommended model - {_DEFAULT_MODEL}."
+                )
+            self.tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
+            self.user_tokenizer = False
+
+    def update(self, predictions: List[str], references: List[str]) -> None:  # type: ignore
+        """Store predictions/references for computing BERT scores. It is necessary to store sentences in a
+        tokenized form to ensure the DDP mode working.
+
+        Args:
+            predictions:
+                An iterable of predicted sentences.
+            references:
+                An iterable of predicted sentences.
+        """
+        predictions_dict = _preprocess_text(
+            predictions,
+            self.tokenizer,
+            self.max_length,
+            truncation=False,
+            sort_according_length=False,
+            own_tokenizer=self.user_tokenizer,
+        )
+        references_dict = _preprocess_text(
+            references,
+            self.tokenizer,
+            self.max_length,
+            truncation=False,
+            sort_according_length=False,
+            own_tokenizer=self.user_tokenizer,
+        )
+        self.predictions["input_ids"].append(predictions_dict["input_ids"])
+        self.predictions["attention_mask"].append(predictions_dict["attention_mask"])
+        self.references["input_ids"].append(references_dict["input_ids"])
+        self.references["attention_mask"].append(references_dict["attention_mask"])
+
+    def compute(self) -> Dict[str, Union[List[float], str]]:
+        """Calculate BERT scores.
+
+        Return:
+            Python dictionary containing the keys `precision`, `recall` and `f1` with corresponding values.
+        """
+        return bert_score(
+            predictions=_concatenate(self.predictions),
+            references=_concatenate(self.references),
+            model_name_or_path=self.model_name_or_path,
+            num_layers=self.num_layers,
+            all_layers=self.all_layers,
+            model=self.model,
+            user_tokenizer=self.tokenizer if self.user_tokenizer else None,
+            user_forward_fn=self.user_forward_fn,
+            verbose=self.verbose,
+            idf=self.idf,
+            device=self.embedding_device,
+            max_length=self.max_length,
+            batch_size=self.batch_size,
+            num_threads=self.num_threads,
+            return_hash=self.return_hash,
+            lang=self.lang,
+            rescale_with_baseline=self.rescale_with_baseline,
+            baseline_path=self.baseline_path,
+            baseline_url=self.baseline_url,
+        )
diff --git a/RE/paddlemetric/src/paddlemetrics/text/bleu.py b/RE/paddlemetric/src/paddlemetrics/text/bleu.py
new file mode 100644
index 00000000..46937d98
--- /dev/null
+++ b/RE/paddlemetric/src/paddlemetrics/text/bleu.py
@@ -0,0 +1,120 @@
+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# referenced from
+# Library Name: torchtext
+# Authors: torchtext authors and @sluks
+# Date: 2020-07-18
+# Link: https://pyB.org/text/_modules/torchtext/data/metrics.html#bleu_score
+from typing import Any, Callable, Optional, Sequence
+
+import paddleext.torchapi as B
+from paddleext.torchapi import  Tensor, tensor
+
+from paddlemetrics import Metric
+from paddlemetrics.functional.text.bleu import _bleu_score_compute, _bleu_score_update
+
+
+class BLEUScore(Metric):
+    """Calculate `BLEU score`_ of machine translated text with one or more references.
+
+    Args:
+        n_gram:
+            Gram value ranged from 1 to 4 (Default 4)
+        smooth:
+            Whether or not to apply smoothing – see [2]
+        compute_on_step:
+            Forward only calls ``update()`` and returns None if this is set to False. default: True
+        dist_sync_on_step:
+            Synchronize metric state across processes at each ``forward()``
+            before returning the value at the step.
+        process_group:
+            Specify the process group on which synchronization is called. default: None (which selects the entire world)
+        dist_sync_fn:
+            Callback that performs the allgather operation on the metric state. When `None`, DDP
+            will be used to perform the allgather.
+
+    Example:
+        >>> translate_corpus = ['the cat is on the mat'.split()]
+        >>> reference_corpus = [['there is a cat on the mat'.split(), 'a cat is on the mat'.split()]]
+        >>> metric = BLEUScore()
+        >>> metric(reference_corpus, translate_corpus)
+        tensor(0.7598)
+
+    References:
+        [1] BLEU: a Method for Automatic Evaluation of Machine Translation by Papineni,
+        Kishore, Salim Roukos, Todd Ward, and Wei-Jing Zhu `BLEU`_
+
+        [2] Automatic Evaluation of Machine Translation Quality Using Longest Common Subsequence
+        and Skip-Bigram Statistics by Chin-Yew Lin and Franz Josef Och `Machine Translation Evolution`_
+    """
+
+    is_differentiable = False
+    higher_is_better = True
+    trans_len: Tensor
+    ref_len: Tensor
+    numerator: Tensor
+    denominator: Tensor
+
+    def __init__(
+        self,
+        n_gram: int = 4,
+        smooth: bool = False,
+        compute_on_step: bool = True,
+        dist_sync_on_step: bool = False,
+        process_group: Optional[Any] = None,
+        dist_sync_fn: Optional[Callable] = None,
+    ):
+        super().__init__(
+            compute_on_step=compute_on_step,
+            dist_sync_on_step=dist_sync_on_step,
+            process_group=process_group,
+            dist_sync_fn=dist_sync_fn,
+        )
+
+        self.n_gram = n_gram
+        self.smooth = smooth
+
+        self.add_state("trans_len", tensor(0, dtype=B.float), dist_reduce_fx="sum")
+        self.add_state("ref_len", tensor(0, dtype=B.float), dist_reduce_fx="sum")
+        self.add_state("numerator", B.zeros(self.n_gram), dist_reduce_fx="sum")
+        self.add_state("denominator", B.zeros(self.n_gram), dist_reduce_fx="sum")
+
+    def update(  # type: ignore
+        self, reference_corpus: Sequence[Sequence[Sequence[str]]], translate_corpus: Sequence[Sequence[str]]
+    ) -> None:
+        """Compute Precision Scores.
+
+        Args:
+            reference_corpus: An iterable of iterables of reference corpus
+            translate_corpus: An iterable of machine translated corpus
+        """
+        self.trans_len, self.ref_len = _bleu_score_update(
+            reference_corpus,
+            translate_corpus,
+            self.numerator,
+            self.denominator,
+            self.trans_len,
+            self.ref_len,
+            self.n_gram,
+        )
+
+    def compute(self) -> Tensor:
+        """Calculate BLEU score.
+
+        Return:
+            Tensor with BLEU Score
+        """
+        return _bleu_score_compute(
+            self.trans_len, self.ref_len, self.numerator, self.denominator, self.n_gram, self.smooth
+        )
diff --git a/RE/paddlemetric/src/paddlemetrics/text/rouge.py b/RE/paddlemetric/src/paddlemetrics/text/rouge.py
new file mode 100644
index 00000000..254f366d
--- /dev/null
+++ b/RE/paddlemetric/src/paddlemetrics/text/rouge.py
@@ -0,0 +1,171 @@
+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import warnings
+from typing import Any, Callable, Dict, List, Optional, Tuple, Union
+
+from paddleext.torchapi import  Tensor
+
+from paddlemetrics import Metric
+from paddlemetrics.functional.text.rouge import ALLOWED_ROUGE_KEYS, _rouge_score_compute, _rouge_score_update
+from paddlemetrics.utilities.imports import _NLTK_AVAILABLE
+
+
+class ROUGEScore(Metric):
+    """`Calculate Rouge Score`_, used for automatic summarization. This implementation should imitate the behaviour
+    of the `rouge-score` package `Python ROUGE Implementation`
+
+    Args:
+        newline_sep:
+            New line separate the inputs.
+            This argument has not been in use any more. It is deprecated in v0.6 and will be removed in v0.7.
+        use_stemmer:
+            Use Porter stemmer to strip word suffixes to improve matching.
+        rouge_keys:
+            A list of rouge types to calculate.
+            Keys that are allowed are ``rougeL``, ``rougeLsum``, and ``rouge1`` through ``rouge9``.
+        decimal_places:
+            The number of digits to round the computed the values to.
+            This argument has not been in usd any more. It is deprecated in v0.6 and will be removed in v0.7.
+        compute_on_step:
+            Forward only calls ``update()`` and returns None if this is set to False. default: True
+        dist_sync_on_step:
+            Synchronize metric state across processes at each ``forward()``
+            before returning the value at the step.
+        process_group:
+            Specify the process group on which synchronization is called. default: None (which selects the entire world)
+        dist_sync_fn:
+            Callback that performs the allgather operation on the metric state. When `None`, DDP
+            will be used to perform the allgather.
+
+    Example:
+
+        >>> targets = "Is your name John".split()
+        >>> preds = "My name is John".split()
+        >>> rouge = ROUGEScore()   # doctest: +SKIP
+        >>> from pprint import pprint
+        >>> pprint(rouge(preds, targets))  # doctest: +NORMALIZE_WHITESPACE +SKIP
+        {'rouge1_fmeasure': 0.25,
+         'rouge1_precision': 0.25,
+         'rouge1_recall': 0.25,
+         'rouge2_fmeasure': 0.0,
+         'rouge2_precision': 0.0,
+         'rouge2_recall': 0.0,
+         'rougeL_fmeasure': 0.25,
+         'rougeL_precision': 0.25,
+         'rougeL_recall': 0.25,
+         'rougeLsum_fmeasure': 0.25,
+         'rougeLsum_precision': 0.25,
+         'rougeLsum_recall': 0.25}
+
+    Raises:
+        ValueError:
+            If the python packages ``nltk`` is not installed.
+        ValueError:
+            If any of the ``rouge_keys`` does not belong to the allowed set of keys.
+
+    References:
+        [1] ROUGE: A Package for Automatic Evaluation of Summaries by Chin-Yew Lin `Rouge Detail`_
+    """
+
+    higher_is_better = True
+
+    def __init__(
+        self,
+        newline_sep: Optional[bool] = None,  # remove in v0.7
+        use_stemmer: bool = False,
+        rouge_keys: Union[str, Tuple[str, ...]] = ("rouge1", "rouge2", "rougeL", "rougeLsum"),  # type: ignore
+        decimal_places: Optional[bool] = None,  # remove in v0.7
+        compute_on_step: bool = True,
+        dist_sync_on_step: bool = False,
+        process_group: Optional[Any] = None,
+        dist_sync_fn: Optional[Callable] = None,
+    ):
+        super().__init__(
+            compute_on_step=compute_on_step,
+            dist_sync_on_step=dist_sync_on_step,
+            process_group=process_group,
+            dist_sync_fn=dist_sync_fn,
+        )
+        if newline_sep is not None:
+            warnings.warn("Argument `newline_sep` is deprecated in v0.6 and will be removed in v0.7")
+        if decimal_places is not None:
+            warnings.warn("Argument `decimal_places` is deprecated in v0.6 and will be removed in v0.7")
+
+        if use_stemmer or "rougeLsum" in rouge_keys:
+            if not _NLTK_AVAILABLE:
+                raise ValueError("Stemmer and/or `rougeLsum` requires that nltk is installed. Use `pip install nltk`.")
+            import nltk
+
+        if not isinstance(rouge_keys, tuple):
+            rouge_keys = tuple([rouge_keys])
+        for key in rouge_keys:
+            if key not in ALLOWED_ROUGE_KEYS:
+                raise ValueError(f"Got unknown rouge key {key}. Expected to be one of {ALLOWED_ROUGE_KEYS}")
+
+        self.rouge_keys = rouge_keys
+        self.rouge_keys_values = [ALLOWED_ROUGE_KEYS[key] for key in rouge_keys]
+        self.stemmer = nltk.stem.porter.PorterStemmer() if use_stemmer else None
+
+        # Adding stated dynamically to prevent IndexError during sync function as some lists can be empty.
+        for rouge_key in self.rouge_keys:
+            for score in ["fmeasure", "precision", "recall"]:
+                self.add_state(f"{rouge_key}_{score}", [], dist_reduce_fx=None)
+
+    def update(self, preds: Union[str, List[str]], targets: Union[str, List[str]]) -> None:  # type: ignore
+        """Compute rouge scores.
+
+        Args:
+            preds: An iterable of predicted sentences.
+            targets: An iterable of target sentences.
+        """
+
+        if isinstance(preds, str):
+            preds = [preds]
+
+        if isinstance(targets, str):
+            targets = [targets]
+
+        output: Dict[Union[int, str], List[Dict[str, Tensor]]] = _rouge_score_update(
+            preds, targets, self.rouge_keys_values, stemmer=self.stemmer
+        )
+        for rouge_key, metrics in output.items():
+            for metric in metrics:
+                for type, value in metric.items():
+                    getattr(self, f"rouge{rouge_key}_{type}").append(value.to(self.device))
+
+    def compute(self) -> Dict[str, Tensor]:
+        """Calculate (Aggregate and provide confidence intervals) ROUGE score.
+
+        Return:
+            Python dictionary of rouge scores for each input rouge key.
+        """
+        update_output = {}
+        for rouge_key in self.rouge_keys_values:
+            for type in ["fmeasure", "precision", "recall"]:
+                update_output[f"rouge{rouge_key}_{type}"] = getattr(self, f"rouge{rouge_key}_{type}")
+
+        return _rouge_score_compute(update_output)
+
+    def __hash__(self) -> int:
+        # override to hash list objects.
+        # this is a bug in the upstream pytorch release.
+        hash_vals = [self.__class__.__name__]
+
+        for key in self._defaults:
+            value = getattr(self, key)
+            if isinstance(value, list):
+                value = tuple(value)
+            hash_vals.append(value)
+
+        return hash(tuple(hash_vals))
diff --git a/RE/paddlemetric/src/paddlemetrics/text/sacre_bleu.py b/RE/paddlemetric/src/paddlemetrics/text/sacre_bleu.py
new file mode 100644
index 00000000..4f4d99e8
--- /dev/null
+++ b/RE/paddlemetric/src/paddlemetrics/text/sacre_bleu.py
@@ -0,0 +1,134 @@
+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# referenced from
+# Library Name: torchtext
+# Authors: torchtext authors and @sluks
+# Date: 2020-07-18
+# Link: https://pyB.org/text/_modules/torchtext/data/metrics.html#bleu_score
+from typing import Any, Callable, Optional, Sequence
+
+from typing_extensions import Literal
+
+from paddlemetrics.functional.text.bleu import _bleu_score_update
+from paddlemetrics.functional.text.sacre_bleu import _SacreBLEUTokenizer
+from paddlemetrics.text.bleu import BLEUScore
+from paddlemetrics.utilities.imports import _REGEX_AVAILABLE
+
+AVAILABLE_TOKENIZERS = ("none", "13a", "zh", "intl", "char")
+
+
+class SacreBLEUScore(BLEUScore):
+    """Calculate `BLEU score`_ [1] of machine translated text with one or more references. This implementation
+    follows the behaviour of SacreBLEU [2] implementation from https://github.com/mjpost/sacrebleu.
+
+    The SacreBLEU implementation differs from the NLTK BLEU implementation in tokenization techniques.
+
+    Args:
+        n_gram:
+            Gram value ranged from 1 to 4 (Default 4)
+        smooth:
+            Whether or not to apply smoothing – see [2]
+        tokenize:
+            Tokenization technique to be used. (Default '13a')
+            Supported tokenization: ['none', '13a', 'zh', 'intl', 'char']
+        lowercase:
+            If ``True``, BLEU score over lowercased text is calculated.
+        compute_on_step:
+            Forward only calls ``update()`` and returns None if this is set to False. default: True
+        dist_sync_on_step:
+            Synchronize metric state across processes at each ``forward()``
+            before returning the value at the step.
+        process_group:
+            Specify the process group on which synchronization is called. default: None (which selects the entire world)
+        dist_sync_fn:
+            Callback that performs the allgather operation on the metric state. When `None`, DDP
+            will be used to perform the allgather.
+
+     Raises:
+        ValueError:
+            If ``tokenize`` not one of 'none', '13a', 'zh', 'intl' or 'char'
+        ValueError:
+            If ``tokenize`` is set to 'intl' and `regex` is not installed
+
+
+    Example:
+        >>> translate_corpus = ['the cat is on the mat']
+        >>> reference_corpus = [['there is a cat on the mat', 'a cat is on the mat']]
+        >>> metric = SacreBLEUScore()
+        >>> metric(reference_corpus, translate_corpus)
+        tensor(0.7598)
+
+    References:
+        [1] BLEU: a Method for Automatic Evaluation of Machine Translation by Papineni,
+        Kishore, Salim Roukos, Todd Ward, and Wei-Jing Zhu `BLEU`_
+
+        [2] A Call for Clarity in Reporting BLEU Scores by Matt Post.
+
+        [3] Automatic Evaluation of Machine Translation Quality Using Longest Common Subsequence
+        and Skip-Bigram Statistics by Chin-Yew Lin and Franz Josef Och `Machine Translation Evolution`_
+    """
+
+    def __init__(
+        self,
+        n_gram: int = 4,
+        smooth: bool = False,
+        tokenize: Literal["none", "13a", "zh", "intl", "char"] = "13a",
+        lowercase: bool = False,
+        compute_on_step: bool = True,
+        dist_sync_on_step: bool = False,
+        process_group: Optional[Any] = None,
+        dist_sync_fn: Optional[Callable] = None,
+    ):
+        super().__init__(
+            n_gram=n_gram,
+            smooth=smooth,
+            compute_on_step=compute_on_step,
+            dist_sync_on_step=dist_sync_on_step,
+            process_group=process_group,
+            dist_sync_fn=dist_sync_fn,
+        )
+        if tokenize not in AVAILABLE_TOKENIZERS:
+            raise ValueError(f"Argument `tokenize` expected to be one of {AVAILABLE_TOKENIZERS} but got {tokenize}.")
+
+        if tokenize == "intl" and not _REGEX_AVAILABLE:
+            raise ValueError(
+                "`'intl'` tokenization requires `regex` installed. Use `pip install regex` or `pip install "
+                "paddlemetrics[text]`."
+            )
+        self.tokenizer = _SacreBLEUTokenizer(tokenize, lowercase)
+
+    def update(  # type: ignore
+        self, reference_corpus: Sequence[Sequence[str]], translate_corpus: Sequence[str]
+    ) -> None:
+        """Compute Precision Scores.
+
+        Args:
+            reference_corpus: An iterable of iterables of reference corpus
+            translate_corpus: An iterable of machine translated corpus
+        """
+        reference_corpus_: Sequence[Sequence[Sequence[str]]] = [
+            [self.tokenizer(line) for line in reference] for reference in reference_corpus
+        ]
+        translate_corpus_: Sequence[Sequence[str]] = [self.tokenizer(line) for line in translate_corpus]
+
+        self.trans_len, self.ref_len = _bleu_score_update(
+            reference_corpus_,
+            translate_corpus_,
+            self.numerator,
+            self.denominator,
+            self.trans_len,
+            self.ref_len,
+            self.n_gram,
+        )
diff --git a/RE/paddlemetric/src/paddlemetrics/text/wer.py b/RE/paddlemetric/src/paddlemetrics/text/wer.py
new file mode 100644
index 00000000..7bb69740
--- /dev/null
+++ b/RE/paddlemetric/src/paddlemetrics/text/wer.py
@@ -0,0 +1,109 @@
+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from typing import Any, Callable, List, Optional, Union
+from warnings import warn
+
+import paddleext.torchapi as B
+from paddleext.torchapi import  Tensor, tensor
+
+from paddlemetrics.functional.text.wer import _wer_compute, _wer_update
+from paddlemetrics.metric import Metric
+
+
+class WER(Metric):
+    r"""
+    Word error rate (WER_) is a common metric of the performance of an automatic speech recognition system.
+    This value indicates the percentage of words that were incorrectly predicted.
+    The lower the value, the better the performance of the ASR system with a WER of 0 being a perfect score.
+    Word error rate can then be computed as:
+
+    .. math::
+        WER = \frac{S + D + I}{N} = \frac{S + D + I}{S + D + C}
+
+    where:
+        - S is the number of substitutions,
+        - D is the number of deletions,
+        - I is the number of insertions,
+        - C is the number of correct words,
+        - N is the number of words in the reference (N=S+D+C).
+
+    Compute WER score of transcribed segments against references.
+
+    Args:
+        concatenate_texts: Whether to concatenate all input texts or compute WER iteratively.
+            This argument is deprecated in v0.6 and it will be removed in v0.7.
+        compute_on_step:
+            Forward only calls ``update()`` and return None if this is set to False. default: True
+        dist_sync_on_step:
+            Synchronize metric state across processes at each ``forward()``
+            before returning the value at the step. default: False
+        process_group:
+            Specify the process group on which synchronization is called. default: None (which selects the entire world)
+        dist_sync_fn:
+            Callback that performs the allgather operation on the metric state. When ``None``, DDP
+            will be used to perform the allgather
+
+    Returns:
+        (Tensor) Word error rate
+
+    Examples:
+        >>> predictions = ["this is the prediction", "there is an other sample"]
+        >>> references = ["this is the reference", "there is another one"]
+        >>> metric = WER()
+        >>> metric(predictions, references)
+        tensor(0.5000)
+    """
+    is_differentiable = False
+    higher_is_better = False
+    error: Tensor
+    total: Tensor
+
+    def __init__(
+        self,
+        concatenate_texts: Optional[bool] = None,  # TODO: remove in v0.7
+        compute_on_step: bool = True,
+        dist_sync_on_step: bool = False,
+        process_group: Optional[Any] = None,
+        dist_sync_fn: Callable = None,
+    ):
+        super().__init__(
+            compute_on_step=compute_on_step,
+            dist_sync_on_step=dist_sync_on_step,
+            process_group=process_group,
+            dist_sync_fn=dist_sync_fn,
+        )
+        if concatenate_texts is not None:
+            warn("`concatenate_texts` has been deprecated in v0.6 and it will be removed in v0.7", DeprecationWarning)
+        self.add_state("errors", tensor(0, dtype=B.float), dist_reduce_fx="sum")
+        self.add_state("total", tensor(0, dtype=B.float), dist_reduce_fx="sum")
+
+    def update(self, predictions: Union[str, List[str]], references: Union[str, List[str]]) -> None:  # type: ignore
+        """Store references/predictions for computing Word Error Rate scores.
+
+        Args:
+            predictions: Transcription(s) to score as a string or list of strings
+            references: Reference(s) for each speech input as a string or list of strings
+        """
+        errors, total = _wer_update(predictions, references)
+        self.errors += errors
+        self.total += total
+
+    def compute(self) -> Tensor:
+        """Calculate the word error rate.
+
+        Returns:
+            (Tensor) Word error rate
+        """
+        return _wer_compute(self.errors, self.total)
diff --git a/RE/paddlemetric/src/paddlemetrics/utilities/__init__.py b/RE/paddlemetric/src/paddlemetrics/utilities/__init__.py
new file mode 100644
index 00000000..b1b2a506
--- /dev/null
+++ b/RE/paddlemetric/src/paddlemetrics/utilities/__init__.py
@@ -0,0 +1,3 @@
+from paddlemetrics.utilities.data import apply_to_collection  # noqa: F401
+from paddlemetrics.utilities.distributed import class_reduce, reduce  # noqa: F401
+from paddlemetrics.utilities.prints import rank_zero_debug, rank_zero_info, rank_zero_warn  # noqa: F401
diff --git a/RE/paddlemetric/src/paddlemetrics/utilities/checks.py b/RE/paddlemetric/src/paddlemetrics/utilities/checks.py
new file mode 100644
index 00000000..b948f103
--- /dev/null
+++ b/RE/paddlemetric/src/paddlemetrics/utilities/checks.py
@@ -0,0 +1,582 @@
+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from typing import Optional, Tuple
+
+import paddleext.torchapi as B
+from paddleext.torchapi import  Tensor
+
+from paddlemetrics.utilities.data import select_topk, to_onehot
+from paddlemetrics.utilities.enums import DataType
+
+
+def _check_same_shape(preds: Tensor, target: Tensor) -> None:
+    """Check that predictions and target have the same shape, else raise error."""
+    if preds.shape != target.shape:
+        raise RuntimeError("Predictions and targets are expected to have the same shape")
+
+
+def _basic_input_validation(preds: Tensor, target: Tensor, threshold: float, multiclass: Optional[bool]) -> None:
+    """Perform basic validation of inputs that does not require deducing any information of the type of inputs."""
+
+    if target.is_floating_point():
+        raise ValueError("The `target` has to be an integer tensor.")
+    if target.min() < 0:
+        raise ValueError("The `target` has to be a non-negative tensor.")
+
+    preds_float = preds.is_floating_point()
+    if not preds_float and preds.min() < 0:
+        raise ValueError("If `preds` are integers, they have to be non-negative.")
+
+    if not preds.shape[0] == target.shape[0]:
+        raise ValueError("The `preds` and `target` should have the same first dimension.")
+
+    if multiclass is False and target.max() > 1:
+        raise ValueError("If you set `multiclass=False`, then `target` should not exceed 1.")
+
+    if multiclass is False and not preds_float and preds.max() > 1:
+        raise ValueError("If you set `multiclass=False` and `preds` are integers, then `preds` should not exceed 1.")
+
+
+def _check_shape_and_type_consistency(preds: Tensor, target: Tensor) -> Tuple[DataType, int]:
+    """This checks that the shape and type of inputs are consistent with each other and fall into one of the
+    allowed input types (see the documentation of docstring of ``_input_format_classification``). It does not check
+    for consistency of number of classes, other functions take care of that.
+
+    It returns the name of the case in which the inputs fall, and the implied number of classes (from the ``C`` dim for
+    multi-class data, or extra dim(s) for multi-label data).
+    """
+
+    preds_float = preds.is_floating_point()
+
+    if preds.ndim == target.ndim:
+        if preds.shape != target.shape:
+            raise ValueError(
+                "The `preds` and `target` should have the same shape,",
+                f" got `preds` with shape={preds.shape} and `target` with shape={target.shape}.",
+            )
+        if preds_float and target.max() > 1:
+            raise ValueError(
+                "If `preds` and `target` are of shape (N, ...) and `preds` are floats, `target` should be binary."
+            )
+
+        # Get the case
+        if preds.ndim == 1 and preds_float:
+            case = DataType.BINARY
+        elif preds.ndim == 1 and not preds_float:
+            case = DataType.MULTICLASS
+        elif preds.ndim > 1 and preds_float:
+            case = DataType.MULTILABEL
+        else:
+            case = DataType.MULTIDIM_MULTICLASS
+
+        implied_classes = preds[0].numel()
+
+    elif preds.ndim == target.ndim + 1:
+        if not preds_float:
+            raise ValueError("If `preds` have one dimension more than `target`, `preds` should be a float tensor.")
+        if preds.shape[2:] != target.shape[1:]:
+            raise ValueError(
+                "If `preds` have one dimension more than `target`, the shape of `preds` should be"
+                " (N, C, ...), and the shape of `target` should be (N, ...)."
+            )
+
+        implied_classes = preds.shape[1]
+
+        if preds.ndim == 2:
+            case = DataType.MULTICLASS
+        else:
+            case = DataType.MULTIDIM_MULTICLASS
+    else:
+        raise ValueError(
+            "Either `preds` and `target` both should have the (same) shape (N, ...), or `target` should be (N, ...)"
+            " and `preds` should be (N, C, ...)."
+        )
+
+    return case, implied_classes
+
+
+def _check_num_classes_binary(num_classes: int, multiclass: Optional[bool]) -> None:
+    """This checks that the consistency of `num_classes` with the data and `multiclass` param for binary data."""
+
+    if num_classes > 2:
+        raise ValueError("Your data is binary, but `num_classes` is larger than 2.")
+    if num_classes == 2 and not multiclass:
+        raise ValueError(
+            "Your data is binary and `num_classes=2`, but `multiclass` is not True."
+            " Set it to True if you want to transform binary data to multi-class format."
+        )
+    if num_classes == 1 and multiclass:
+        raise ValueError(
+            "You have binary data and have set `multiclass=True`, but `num_classes` is 1."
+            " Either set `multiclass=None`(default) or set `num_classes=2`"
+            " to transform binary data to multi-class format."
+        )
+
+
+def _check_num_classes_mc(
+    preds: Tensor,
+    target: Tensor,
+    num_classes: int,
+    multiclass: Optional[bool],
+    implied_classes: int,
+) -> None:
+    """This checks that the consistency of `num_classes` with the data and `multiclass` param for (multi-
+    dimensional) multi-class data."""
+
+    if num_classes == 1 and multiclass is not False:
+        raise ValueError(
+            "You have set `num_classes=1`, but predictions are integers."
+            " If you want to convert (multi-dimensional) multi-class data with 2 classes"
+            " to binary/multi-label, set `multiclass=False`."
+        )
+    if num_classes > 1:
+        if multiclass is False and implied_classes != num_classes:
+            raise ValueError(
+                "You have set `multiclass=False`, but the implied number of classes "
+                " (from shape of inputs) does not match `num_classes`. If you are trying to"
+                " transform multi-dim multi-class data with 2 classes to multi-label, `num_classes`"
+                " should be either None or the product of the size of extra dimensions (...)."
+                " See Input Types in Metrics documentation."
+            )
+        if num_classes <= target.max():
+            raise ValueError("The highest label in `target` should be smaller than `num_classes`.")
+        if preds.shape != target.shape and num_classes != implied_classes:
+            raise ValueError("The size of C dimension of `preds` does not match `num_classes`.")
+
+
+def _check_num_classes_ml(num_classes: int, multiclass: Optional[bool], implied_classes: int) -> None:
+    """This checks that the consistency of `num_classes` with the data and `multiclass` param for multi-label
+    data."""
+
+    if multiclass and num_classes != 2:
+        raise ValueError(
+            "Your have set `multiclass=True`, but `num_classes` is not equal to 2."
+            " If you are trying to transform multi-label data to 2 class multi-dimensional"
+            " multi-class, you should set `num_classes` to either 2 or None."
+        )
+    if not multiclass and num_classes != implied_classes:
+        raise ValueError("The implied number of classes (from shape of inputs) does not match num_classes.")
+
+
+def _check_top_k(top_k: int, case: str, implied_classes: int, multiclass: Optional[bool], preds_float: bool) -> None:
+    if case == DataType.BINARY:
+        raise ValueError("You can not use `top_k` parameter with binary data.")
+    if not isinstance(top_k, int) or top_k <= 0:
+        raise ValueError("The `top_k` has to be an integer larger than 0.")
+    if not preds_float:
+        raise ValueError("You have set `top_k`, but you do not have probability predictions.")
+    if multiclass is False:
+        raise ValueError("If you set `multiclass=False`, you can not set `top_k`.")
+    if case == DataType.MULTILABEL and multiclass:
+        raise ValueError(
+            "If you want to transform multi-label data to 2 class multi-dimensional"
+            "multi-class data using `multiclass=True`, you can not use `top_k`."
+        )
+    if top_k >= implied_classes:
+        raise ValueError("The `top_k` has to be strictly smaller than the `C` dimension of `preds`.")
+
+
+def _check_classification_inputs(
+    preds: Tensor,
+    target: Tensor,
+    threshold: float,
+    num_classes: Optional[int],
+    multiclass: Optional[bool],
+    top_k: Optional[int],
+) -> DataType:
+    """Performs error checking on inputs for classification.
+
+    This ensures that preds and target take one of the shape/type combinations that are
+    specified in ``_input_format_classification`` docstring. It also checks the cases of
+    over-rides with ``multiclass`` by checking (for multi-class and multi-dim multi-class
+    cases) that there are only up to 2 distinct labels.
+
+    In case where preds are floats (probabilities), it is checked whether they are in [0,1] interval.
+
+    When ``num_classes`` is given, it is checked that it is consistent with input cases (binary,
+    multi-label, ...), and that, if available, the implied number of classes in the ``C``
+    dimension is consistent with it (as well as that max label in target is smaller than it).
+
+    When ``num_classes`` is not specified in these cases, consistency of the highest target
+    value against ``C`` dimension is checked for (multi-dimensional) multi-class cases.
+
+    If ``top_k`` is set (not None) for inputs that do not have probability predictions (and
+    are not binary), an error is raised. Similarly if ``top_k`` is set to a number that
+    is higher than or equal to the ``C`` dimension of ``preds``, an error is raised.
+
+    Preds and target tensors are expected to be squeezed already - all dimensions should be
+    greater than 1, except perhaps the first one (``N``).
+
+    Args:
+        preds: Tensor with predictions (labels or probabilities)
+        target: Tensor with ground truth labels, always integers (labels)
+        threshold:
+            Threshold value for transforming probability/logit predictions to binary
+            (0,1) predictions, in the case of binary or multi-label inputs.
+        num_classes:
+            Number of classes. If not explicitly set, the number of classes will be inferred
+            either from the shape of inputs, or the maximum label in the ``target`` and ``preds``
+            tensor, where applicable.
+        top_k:
+            Number of highest probability entries for each sample to convert to 1s - relevant
+            only for inputs with probability predictions. The default value (``None``) will be
+            interpreted as 1 for these inputs. If this parameter is set for multi-label inputs,
+            it will take precedence over threshold.
+
+            Should be left unset (``None``) for inputs with label predictions.
+        multiclass:
+            Used only in certain special cases, where you want to treat inputs as a different type
+            than what they appear to be. See the parameter's
+            :ref:`documentation section <pages/overview:using the multiclass parameter>`
+            for a more detailed explanation and examples.
+
+
+    Return:
+        case: The case the inputs fall in, one of 'binary', 'multi-class', 'multi-label' or
+            'multi-dim multi-class'
+    """
+
+    # Basic validation (that does not need case/type information)
+    _basic_input_validation(preds, target, threshold, multiclass)
+
+    # Check that shape/types fall into one of the cases
+    case, implied_classes = _check_shape_and_type_consistency(preds, target)
+
+    # Check consistency with the `C` dimension in case of multi-class data
+    if preds.shape != target.shape:
+        if multiclass is False and implied_classes != 2:
+            raise ValueError(
+                "You have set `multiclass=False`, but have more than 2 classes in your data,"
+                " based on the C dimension of `preds`."
+            )
+        if target.max() >= implied_classes:
+            raise ValueError(
+                "The highest label in `target` should be smaller than the size of the `C` dimension of `preds`."
+            )
+
+    # Check that num_classes is consistent
+    if num_classes:
+        if case == DataType.BINARY:
+            _check_num_classes_binary(num_classes, multiclass)
+        elif case in (DataType.MULTICLASS, DataType.MULTIDIM_MULTICLASS):
+            _check_num_classes_mc(preds, target, num_classes, multiclass, implied_classes)
+        elif case.MULTILABEL:
+            _check_num_classes_ml(num_classes, multiclass, implied_classes)
+
+    # Check that top_k is consistent
+    if top_k is not None:
+        _check_top_k(top_k, case, implied_classes, multiclass, preds.is_floating_point())
+
+    return case
+
+
+def _input_squeeze(
+    preds: Tensor,
+    target: Tensor,
+) -> Tuple[Tensor, Tensor]:
+    """Remove excess dimensions."""
+    if preds.shape[0] == 1:
+        preds, target = preds.squeeze().unsqueeze(0), target.squeeze().unsqueeze(0)
+    else:
+        preds, target = preds.squeeze(), target.squeeze()
+    return preds, target
+
+
+def _input_format_classification(
+    preds: Tensor,
+    target: Tensor,
+    threshold: float = 0.5,
+    top_k: Optional[int] = None,
+    num_classes: Optional[int] = None,
+    multiclass: Optional[bool] = None,
+) -> Tuple[Tensor, Tensor, DataType]:
+    """Convert preds and target tensors into common format.
+
+    Preds and targets are supposed to fall into one of these categories (and are
+    validated to make sure this is the case):
+
+    * Both preds and target are of shape ``(N,)``, and both are integers (multi-class)
+    * Both preds and target are of shape ``(N,)``, and target is binary, while preds
+      are a float (binary)
+    * preds are of shape ``(N, C)`` and are floats, and target is of shape ``(N,)`` and
+      is integer (multi-class)
+    * preds and target are of shape ``(N, ...)``, target is binary and preds is a float
+      (multi-label)
+    * preds are of shape ``(N, C, ...)`` and are floats, target is of shape ``(N, ...)``
+      and is integer (multi-dimensional multi-class)
+    * preds and target are of shape ``(N, ...)`` both are integers (multi-dimensional
+      multi-class)
+
+    To avoid ambiguities, all dimensions of size 1, except the first one, are squeezed out.
+
+    The returned output tensors will be binary tensors of the same shape, either ``(N, C)``
+    of ``(N, C, X)``, the details for each case are described below. The function also returns
+    a ``case`` string, which describes which of the above cases the inputs belonged to - regardless
+    of whether this was "overridden" by other settings (like ``multiclass``).
+
+    In binary case, targets are normally returned as ``(N,1)`` tensor, while preds are transformed
+    into a binary tensor (elements become 1 if the probability is greater than or equal to
+    ``threshold`` or 0 otherwise). If ``multiclass=True``, then then both targets are preds
+    become ``(N, 2)`` tensors by a one-hot transformation; with the thresholding being applied to
+    preds first.
+
+    In multi-class case, normally both preds and targets become ``(N, C)`` binary tensors; targets
+    by a one-hot transformation and preds by selecting ``top_k`` largest entries (if their original
+    shape was ``(N,C)``). However, if ``multiclass=False``, then targets and preds will be
+    returned as ``(N,1)`` tensor.
+
+    In multi-label case, normally targets and preds are returned as ``(N, C)`` binary tensors, with
+    preds being binarized as in the binary case. Here the ``C`` dimension is obtained by flattening
+    all dimensions after the first one. However if ``multiclass=True``, then both are returned as
+    ``(N, 2, C)``, by an equivalent transformation as in the binary case.
+
+    In multi-dimensional multi-class case, normally both target and preds are returned as
+    ``(N, C, X)`` tensors, with ``X`` resulting from flattening of all dimensions except ``N`` and
+    ``C``. The transformations performed here are equivalent to the multi-class case. However, if
+    ``multiclass=False`` (and there are up to two classes), then the data is returned as
+    ``(N, X)`` binary tensors (multi-label).
+
+    Note:
+        Where a one-hot transformation needs to be performed and the number of classes
+        is not implicitly given by a ``C`` dimension, the new ``C`` dimension will either be
+        equal to ``num_classes``, if it is given, or the maximum label value in preds and
+        target.
+
+    Args:
+        preds: Tensor with predictions (labels or probabilities)
+        target: Tensor with ground truth labels, always integers (labels)
+        threshold:
+            Threshold value for transforming probability/logit predictions to binary
+            (0 or 1) predictions, in the case of binary or multi-label inputs.
+        num_classes:
+            Number of classes. If not explicitly set, the number of classes will be inferred
+            either from the shape of inputs, or the maximum label in the ``target`` and ``preds``
+            tensor, where applicable.
+        top_k:
+            Number of highest probability entries for each sample to convert to 1s - relevant
+            only for (multi-dimensional) multi-class inputs with probability predictions. The
+            default value (``None``) will be interepreted as 1 for these inputs.
+
+            Should be left unset (``None``) for all other types of inputs.
+        multiclass:
+            Used only in certain special cases, where you want to treat inputs as a different type
+            than what they appear to be. See the parameter's
+            :ref:`documentation section <pages/overview:using the multiclass parameter>`
+            for a more detailed explanation and examples.
+
+    Returns:
+        preds: binary tensor of shape ``(N, C)`` or ``(N, C, X)``
+        target: binary tensor of shape ``(N, C)`` or ``(N, C, X)``
+        case: The case the inputs fall in, one of ``'binary'``, ``'multi-class'``, ``'multi-label'`` or
+            ``'multi-dim multi-class'``
+    """
+    # Remove excess dimensions
+    preds, target = _input_squeeze(preds, target)
+
+    # Convert half precision tensors to full precision, as not all ops are supported
+    # for example, min() is not supported
+    if preds.dtype == B.float16:
+        preds = preds.float()
+
+    case = _check_classification_inputs(
+        preds,
+        target,
+        threshold=threshold,
+        num_classes=num_classes,
+        multiclass=multiclass,
+        top_k=top_k,
+    )
+
+    if case in (DataType.BINARY, DataType.MULTILABEL) and not top_k:
+        preds = (preds >= threshold).int()
+        num_classes = num_classes if not multiclass else 2
+
+    if case == DataType.MULTILABEL and top_k:
+        preds = select_topk(preds, top_k)
+
+    if case in (DataType.MULTICLASS, DataType.MULTIDIM_MULTICLASS) or multiclass:
+        if preds.is_floating_point():
+            num_classes = preds.shape[1]
+            preds = select_topk(preds, top_k or 1)
+        else:
+            num_classes = num_classes if num_classes else max(preds.max(), target.max()) + 1
+            preds = to_onehot(preds, max(2, num_classes))
+
+        target = to_onehot(target, max(2, num_classes))  # type: ignore
+
+        if multiclass is False:
+            preds, target = preds[:, 1, ...], target[:, 1, ...]
+
+    if (case in (DataType.MULTICLASS, DataType.MULTIDIM_MULTICLASS) and multiclass is not False) or multiclass:
+        target = target.reshape(target.shape[0], target.shape[1], -1)
+        preds = preds.reshape(preds.shape[0], preds.shape[1], -1)
+    else:
+        target = target.reshape(target.shape[0], -1)
+        preds = preds.reshape(preds.shape[0], -1)
+
+    # Some operations above create an extra dimension for MC/binary case - this removes it
+    if preds.ndim > 2:
+        preds, target = preds.squeeze(-1), target.squeeze(-1)
+
+    return preds.int(), target.int(), case
+
+
+def _input_format_classification_one_hot(
+    num_classes: int,
+    preds: Tensor,
+    target: Tensor,
+    threshold: float = 0.5,
+    multilabel: bool = False,
+) -> Tuple[Tensor, Tensor]:
+    """Convert preds and target tensors into one hot spare label tensors.
+
+    Args:
+        num_classes: number of classes
+        preds: either tensor with labels, tensor with probabilities/logits or multilabel tensor
+        target: tensor with ground true labels
+        threshold: float used for thresholding multilabel input
+        multilabel: boolean flag indicating if input is multilabel
+
+    Raises:
+        ValueError:
+            If ``preds`` and ``target`` don't have the same number of dimensions
+            or one additional dimension for ``preds``.
+
+    Returns:
+        preds: one hot tensor of shape [num_classes, -1] with predicted labels
+        target: one hot tensors of shape [num_classes, -1] with true labels
+    """
+    if preds.ndim not in (target.ndim, target.ndim + 1):
+        raise ValueError("preds and target must have same number of dimensions, or one additional dimension for preds")
+
+    if preds.ndim == target.ndim + 1:
+        # multi class probabilities
+        preds = B.argmax(preds, dim=1)
+
+    if preds.ndim == target.ndim and preds.dtype in (B.long, B.int) and num_classes > 1 and not multilabel:
+        # multi-class
+        preds = to_onehot(preds, num_classes=num_classes)
+        target = to_onehot(target, num_classes=num_classes)
+
+    elif preds.ndim == target.ndim and preds.is_floating_point():
+        # binary or multilabel probabilities
+        preds = (preds >= threshold).long()
+
+    # transpose class as first dim and reshape
+    if preds.ndim > 1:
+        preds = preds.transpose(1, 0)
+        target = target.transpose(1, 0)
+
+    return preds.reshape(num_classes, -1), target.reshape(num_classes, -1)
+
+
+def _check_retrieval_functional_inputs(
+    preds: Tensor,
+    target: Tensor,
+    allow_non_binary_target: bool = False,
+) -> Tuple[Tensor, Tensor]:
+    """Check ``preds`` and ``target`` tensors are of the same shape and of the correct dtype.
+
+    Args:
+        preds: either tensor with scores/logits
+        target: tensor with ground true labels
+        allow_non_binary_target: whether to allow target to contain non-binary values
+
+    Raises:
+        ValueError:
+            If ``preds`` and ``target`` don't have the same shape, if they are empty
+            or not of the correct ``dtypes``.
+
+    Returns:
+        preds: as B.float32
+        target: as B.long if not floating point else B.float32
+    """
+    if preds.shape != target.shape:
+        raise ValueError("`preds` and `target` must be of the same shape")
+
+    if not preds.numel() or not preds.size():
+        raise ValueError("`preds` and `target` must be non-empty and non-scalar tensors")
+
+    return _check_retrieval_target_and_prediction_types(preds, target, allow_non_binary_target=allow_non_binary_target)
+
+
+def _check_retrieval_inputs(
+    indexes: Tensor,
+    preds: Tensor,
+    target: Tensor,
+    allow_non_binary_target: bool = False,
+) -> Tuple[Tensor, Tensor, Tensor]:
+    """Check ``indexes``, ``preds`` and ``target`` tensors are of the same shape and of the correct dtype.
+
+    Args:
+        indexes: tensor with queries indexes
+        preds: tensor with scores/logits
+        target: tensor with ground true labels
+
+    Raises:
+        ValueError:
+            If ``preds`` and ``target`` don't have the same shape, if they are empty
+            or not of the correct ``dtypes``.
+
+    Returns:
+        indexes: as B.long
+        preds: as B.float32
+        target: as B.long
+    """
+    if indexes.shape != preds.shape or preds.shape != target.shape:
+        raise ValueError("`indexes`, `preds` and `target` must be of the same shape")
+
+    if not indexes.numel() or not indexes.size():
+        raise ValueError(
+            "`indexes`, `preds` and `target` must be non-empty and non-scalar tensors",
+        )
+
+    if indexes.dtype is not B.long:
+        raise ValueError("`indexes` must be a tensor of long integers")
+
+    preds, target = _check_retrieval_target_and_prediction_types(
+        preds, target, allow_non_binary_target=allow_non_binary_target
+    )
+
+    return indexes.long().flatten(), preds, target
+
+
+def _check_retrieval_target_and_prediction_types(
+    preds: Tensor,
+    target: Tensor,
+    allow_non_binary_target: bool = False,
+) -> Tuple[Tensor, Tensor]:
+    """Check ``preds`` and ``target`` tensors are of the same shape and of the correct dtype.
+
+    Args:
+        preds: either tensor with scores/logits
+        target: tensor with ground true labels
+        allow_non_binary_target: whether to allow target to contain non-binary values
+
+    Raises:
+        ValueError:
+            If ``preds`` and ``target`` don't have the same shape, if they are empty
+            or not of the correct ``dtypes``.
+    """
+    if target.dtype not in (B.bool, B.long, B.int) and not B.is_floating_point(target):
+        raise ValueError("`target` must be a tensor of booleans, integers or floats")
+
+    if not preds.is_floating_point():
+        raise ValueError("`preds` must be a tensor of floats")
+
+    if not allow_non_binary_target and (target.max() > 1 or target.min() < 0):
+        raise ValueError("`target` must contain `binary` values")
+
+    target = target.float().flatten() if target.is_floating_point() else target.long().flatten()
+    return preds.float().flatten(), target
diff --git a/RE/paddlemetric/src/paddlemetrics/utilities/data.py b/RE/paddlemetric/src/paddlemetrics/utilities/data.py
new file mode 100644
index 00000000..13e43fb6
--- /dev/null
+++ b/RE/paddlemetric/src/paddlemetrics/utilities/data.py
@@ -0,0 +1,240 @@
+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from typing import Any, Callable, List, Mapping, Optional, Sequence, Union
+
+import paddleext.torchapi as B
+from paddleext.torchapi import Tensor, tensor
+
+from paddlemetrics.utilities.prints import rank_zero_warn
+
+METRIC_EPS = 1e-6
+
+
+def dim_zero_cat(x: Union[Tensor, List[Tensor]]) -> Tensor:
+    """concatenation along the zero dimension."""
+    x = x if isinstance(x, (list, tuple)) else [x]
+    x = [y.unsqueeze(0) if y.numel() == 1 and y.ndim == 0 else y for y in x]
+    if not x:  # empty list
+        raise ValueError("No samples to concatenate")
+    return B.cat(x, dim=0)
+
+
+def dim_zero_sum(x: Tensor) -> Tensor:
+    """summation along the zero dimension."""
+    return B.sum(x, dim=0)
+
+
+def dim_zero_mean(x: Tensor) -> Tensor:
+    """average along the zero dimension."""
+    return B.mean(x, dim=0)
+
+
+def dim_zero_max(x: Tensor) -> Tensor:
+    """max along the zero dimension."""
+    return B.max(x, dim=0).values
+
+
+def dim_zero_min(x: Tensor) -> Tensor:
+    """min along the zero dimension."""
+    return B.min(x, dim=0).values
+
+
+def _flatten(x: Sequence) -> list:
+    return [item for sublist in x for item in sublist]
+
+
+def to_onehot(
+    label_tensor: Tensor,
+    num_classes: Optional[int] = None,
+) -> Tensor:
+    """Converts a dense label tensor to one-hot format.
+
+    Args:
+        label_tensor: dense label tensor, with shape [N, d1, d2, ...]
+        num_classes: number of classes C
+
+    Returns:
+        A sparse label tensor with shape [N, C, d1, d2, ...]
+
+    Example:
+        >>> x = B.tensor([1, 2, 3])
+        >>> to_onehot(x)
+        tensor([[0, 1, 0, 0],
+                [0, 0, 1, 0],
+                [0, 0, 0, 1]])
+    """
+    if num_classes is None:
+        num_classes = int(label_tensor.max().detach().item() + 1)
+
+    tensor_onehot = B.zeros(
+        label_tensor.shape[0],
+        num_classes,
+        *label_tensor.shape[1:],
+        dtype=label_tensor.dtype,
+        device=label_tensor.device,
+    )
+    index = label_tensor.long().unsqueeze(1).expand_as(tensor_onehot)
+    return (tensor_onehot.scatter_(1, index, 1.0) > 0).to(label_tensor.dtype)
+
+
+def select_topk(prob_tensor: Tensor, topk: int = 1, dim: int = 1) -> Tensor:
+    """Convert a probability tensor to binary by selecting top-k highest entries.
+
+    Args:
+        prob_tensor: dense tensor of shape ``[..., C, ...]``, where ``C`` is in the
+            position defined by the ``dim`` argument
+        topk: number of highest entries to turn into 1s
+        dim: dimension on which to compare entries
+
+    Returns:
+        A binary tensor of the same shape as the input tensor of type B.int32
+
+    Example:
+        >>> x = B.tensor([[1.1, 2.0, 3.0], [2.0, 1.0, 0.5]])
+        >>> select_topk(x, topk=2)
+        tensor([[0, 1, 1],
+                [1, 1, 0]], dtype=B.int32)
+    """
+    zeros = B.zeros_like(prob_tensor)
+    if topk == 1:  # argmax has better performance than topk
+        topk_tensor = zeros.scatter(dim, prob_tensor.argmax(dim=dim, keepdim=True), 1.0)
+    else:
+        topk_tensor = zeros.scatter(dim, prob_tensor.topk(k=topk, dim=dim).indices, 1.0)
+    return topk_tensor.int()
+
+
+def to_categorical(x: Tensor, argmax_dim: int = 1) -> Tensor:
+    """Converts a tensor of probabilities to a dense label tensor.
+
+    Args:
+        x: probabilities to get the categorical label [N, d1, d2, ...]
+        argmax_dim: dimension to apply
+
+    Return:
+        A tensor with categorical labels [N, d2, ...]
+
+    Example:
+        >>> x = B.tensor([[0.2, 0.5], [0.9, 0.1]])
+        >>> to_categorical(x)
+        tensor([1, 0])
+    """
+    return B.argmax(x, dim=argmax_dim)
+
+
+def get_num_classes(
+    preds: Tensor,
+    target: Tensor,
+    num_classes: Optional[int] = None,
+) -> int:
+    """Calculates the number of classes for a given prediction and target tensor.
+
+    Args:
+        preds: predicted values
+        target: true labels
+        num_classes: number of classes if known
+
+    Return:
+        An integer that represents the number of classes.
+    """
+    num_target_classes = int(target.max().detach().item() + 1)
+    num_pred_classes = int(preds.max().detach().item() + 1)
+    num_all_classes = max(num_target_classes, num_pred_classes)
+
+    if num_classes is None:
+        num_classes = num_all_classes
+    elif num_classes != num_all_classes:
+        rank_zero_warn(
+            f"You have set {num_classes} number of classes which is"
+            f" different from predicted ({num_pred_classes}) and"
+            f" target ({num_target_classes}) number of classes",
+            RuntimeWarning,
+        )
+    return num_classes
+
+
+def apply_to_collection(
+    data: Any,
+    dtype: Union[type, tuple],
+    function: Callable,
+    *args: Any,
+    wrong_dtype: Optional[Union[type, tuple]] = None,
+    **kwargs: Any,
+) -> Any:
+    """Recursively applies a function to all elements of a certain dtype.
+
+    Args:
+        data: the collection to apply the function to
+        dtype: the given function will be applied to all elements of this dtype
+        function: the function to apply
+        *args: positional arguments (will be forwarded to calls of ``function``)
+        wrong_dtype: the given function won't be applied if this type is specified and the given collections is of
+            the :attr:`wrong_type` even if it is of type :attr`dtype`
+        **kwargs: keyword arguments (will be forwarded to calls of ``function``)
+
+    Returns:
+        the resulting collection
+
+    Example:
+        >>> apply_to_collection(B.tensor([8, 0, 2, 6, 7]), dtype=Tensor, function=lambda x: x ** 2)
+        tensor([64,  0,  4, 36, 49])
+        >>> apply_to_collection([8, 0, 2, 6, 7], dtype=int, function=lambda x: x ** 2)
+        [64, 0, 4, 36, 49]
+        >>> apply_to_collection(dict(abc=123), dtype=int, function=lambda x: x ** 2)
+        {'abc': 15129}
+    """
+    elem_type = type(data)
+
+    # Breaking condition
+    if isinstance(data, dtype) and (wrong_dtype is None or not isinstance(data, wrong_dtype)):
+        return function(data, *args, **kwargs)
+
+    # Recursively apply to collection items
+    if isinstance(data, Mapping):
+        return elem_type({k: apply_to_collection(v, dtype, function, *args, **kwargs) for k, v in data.items()})
+
+    if isinstance(data, tuple) and hasattr(data, "_fields"):  # named tuple
+        return elem_type(*(apply_to_collection(d, dtype, function, *args, **kwargs) for d in data))
+
+    if isinstance(data, Sequence) and not isinstance(data, str):
+        return elem_type([apply_to_collection(d, dtype, function, *args, **kwargs) for d in data])
+
+    # data is neither of dtype, nor a collection
+    return data
+
+
+def get_group_indexes(indexes: Tensor) -> List[Tensor]:
+    """Given an integer `B.Tensor` `indexes`, return a `B.Tensor` of indexes for each different value in
+    `indexes`.
+
+    Args:
+        indexes: a `B.Tensor`
+
+    Return:
+        A list of integer `B.Tensor`s
+
+    Example:
+        >>> indexes = B.tensor([0, 0, 0, 1, 1, 1, 1])
+        >>> get_group_indexes(indexes)
+        [tensor([0, 1, 2]), tensor([3, 4, 5, 6])]
+    """
+
+    res: dict = {}
+    for i, _id in enumerate(indexes):
+        _id = _id.item()
+        if _id in res:
+            res[_id] += [i]
+        else:
+            res[_id] = [i]
+
+    return [tensor(x, dtype=B.long) for x in res.values()]
diff --git a/RE/paddlemetric/src/paddlemetrics/utilities/distributed.py b/RE/paddlemetric/src/paddlemetrics/utilities/distributed.py
new file mode 100644
index 00000000..aec42872
--- /dev/null
+++ b/RE/paddlemetric/src/paddlemetrics/utilities/distributed.py
@@ -0,0 +1,145 @@
+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from typing import Any, List, Optional
+
+import paddleext.torchapi as B
+#import torchapi.nn.functional as F
+from paddleext.torchapi import Tensor
+
+
+def reduce(to_reduce: Tensor, reduction: str) -> Tensor:
+    """Reduces a given tensor by a given reduction method.
+
+    Args:
+        to_reduce: the tensor, which shall be reduced
+        reduction:  a string specifying the reduction method ('elementwise_mean', 'none', 'sum')
+
+    Return:
+        reduced Tensor
+
+    Raise:
+        ValueError if an invalid reduction parameter was given
+    """
+    if reduction == "elementwise_mean":
+        return B.mean(to_reduce)
+    if reduction == "none":
+        return to_reduce
+    if reduction == "sum":
+        return B.sum(to_reduce)
+    raise ValueError("Reduction parameter unknown.")
+
+
+def class_reduce(num: Tensor, denom: Tensor, weights: Tensor, class_reduction: str = "none") -> Tensor:
+    """
+    Function used to reduce classification metrics of the form `num / denom * weights`.
+    For example for calculating standard accuracy the num would be number of
+    true positives per class, denom would be the support per class, and weights
+    would be a tensor of 1s
+
+    Args:
+        num: numerator tensor
+        denom: denominator tensor
+        weights: weights for each class
+        class_reduction: reduction method for multiclass problems
+
+            - ``'micro'``: calculate metrics globally (default)
+            - ``'macro'``: calculate metrics for each label, and find their unweighted mean.
+            - ``'weighted'``: calculate metrics for each label, and find their weighted mean.
+            - ``'none'`` or ``None``: returns calculated metric per class
+
+    Raises:
+        ValueError:
+            If ``class_reduction`` is none of ``"micro"``, ``"macro"``, ``"weighted"``, ``"none"`` or ``None``.
+
+    """
+    valid_reduction = ("micro", "macro", "weighted", "none", None)
+    if class_reduction == "micro":
+        fraction = B.sum(num) / B.sum(denom)
+    else:
+        fraction = num / denom
+
+    # We need to take care of instances where the denom can be 0
+    # for some (or all) classes which will produce nans
+    fraction[fraction != fraction] = 0
+
+    if class_reduction == "micro":
+        return fraction
+    if class_reduction == "macro":
+        return B.mean(fraction)
+    if class_reduction == "weighted":
+        return B.sum(fraction * (weights.float() / B.sum(weights)))
+    if class_reduction == "none" or class_reduction is None:
+        return fraction
+
+    raise ValueError(
+        f"Reduction parameter {class_reduction} unknown." f" Choose between one of these: {valid_reduction}"
+    )
+
+
+def _simple_gather_all_tensors(result: Tensor, group: Any, world_size: int) -> List[Tensor]:
+    gathered_result = [B.zeros_like(result) for _ in range(world_size)]
+    B.distributed.all_gather(gathered_result, result, group)
+    return gathered_result
+
+
+def gather_all_tensors(result: Tensor, group: Optional[Any] = None) -> List[Tensor]:
+    """Function to gather all tensors from several ddp processes onto a list that is broadcasted to all processes.
+    Works on tensors that have the same number of dimensions, but where each dimension may differ. In this case
+    tensors are padded, gathered and then trimmed to secure equal workload for all processes.
+
+    Args:
+        result: the value to sync
+        group: the process group to gather results from. Defaults to all processes (world)
+
+    Return:
+        gathered_result: list with size equal to the process group where
+            gathered_result[i] corresponds to result tensor from process i
+    """
+    if group is None:
+        group = B.distributed.group.WORLD
+
+    # convert tensors to contiguous format
+    result = result.contiguous()
+
+    world_size = B.distributed.get_world_size(group)
+    B.distributed.barrier(group=group)
+
+    # if the tensor is scalar, things are easy
+    if result.ndim == 0:
+        return _simple_gather_all_tensors(result, group, world_size)
+
+    # 1. Gather sizes of all tensors
+    local_size = B.tensor(result.shape, device=result.device)
+    local_sizes = [B.zeros_like(local_size) for _ in range(world_size)]
+    B.distributed.all_gather(local_sizes, local_size, group=group)
+    max_size = B.stack(local_sizes).max(dim=0).values
+    all_sizes_equal = all(all(ls == max_size) for ls in local_sizes)
+
+    # 2. If shapes are all the same, then do a simple gather:
+    if all_sizes_equal:
+        return _simple_gather_all_tensors(result, group, world_size)
+
+    # 3. If not, we need to pad each local tensor to maximum size, gather and then truncate
+    pad_dims = []
+    pad_by = (max_size - local_size).detach().cpu()
+    for val in reversed(pad_by):
+        pad_dims.append(0)
+        pad_dims.append(val.item())
+    result_padded = B.pad(result, pad_dims)
+    gathered_result = [B.zeros_like(result_padded) for _ in range(world_size)]
+    B.distributed.all_gather(gathered_result, result_padded, group)
+    for idx, item_size in enumerate(local_sizes):
+        slice_param = [slice(dim_size) for dim_size in item_size]
+        gathered_result[idx] = gathered_result[idx][slice_param]
+    return gathered_result
diff --git a/RE/paddlemetric/src/paddlemetrics/utilities/enums.py b/RE/paddlemetric/src/paddlemetrics/utilities/enums.py
new file mode 100644
index 00000000..7476c051
--- /dev/null
+++ b/RE/paddlemetric/src/paddlemetrics/utilities/enums.py
@@ -0,0 +1,83 @@
+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from enum import Enum
+from typing import Optional, Union
+
+
+class EnumStr(str, Enum):
+    """Type of any enumerator with allowed comparison to string invariant to cases.
+
+    Example:
+        >>> class MyEnum(EnumStr):
+        ...     ABC = 'abc'
+        >>> MyEnum.from_str('Abc')
+        <MyEnum.ABC: 'abc'>
+        >>> {MyEnum.ABC: 123}
+        {<MyEnum.ABC: 'abc'>: 123}
+    """
+
+    @classmethod
+    def from_str(cls, value: str) -> Optional["EnumStr"]:
+        statuses = [status for status in dir(cls) if not status.startswith("_")]
+        for st in statuses:
+            if st.lower() == value.lower():
+                return getattr(cls, st)
+        return None
+
+    def __eq__(self, other: Union[str, "EnumStr", None]) -> bool:  # type: ignore
+        other = other.value if isinstance(other, Enum) else str(other)
+        return self.value.lower() == other.lower()
+
+    def __hash__(self) -> int:
+        # re-enable hashtable so it can be used as a dict key or in a set
+        # example: set(LightningEnum)
+        return hash(self.name)
+
+
+class DataType(EnumStr):
+    """Enum to represent data type.
+
+    >>> "Binary" in list(DataType)
+    True
+    """
+
+    BINARY = "binary"
+    MULTILABEL = "multi-label"
+    MULTICLASS = "multi-class"
+    MULTIDIM_MULTICLASS = "multi-dim multi-class"
+
+
+class AverageMethod(EnumStr):
+    """Enum to represent average method.
+
+    >>> None in list(AverageMethod)
+    True
+    >>> AverageMethod.NONE == None
+    True
+    >>> AverageMethod.NONE == 'none'
+    True
+    """
+
+    MICRO = "micro"
+    MACRO = "macro"
+    WEIGHTED = "weighted"
+    NONE = None
+    SAMPLES = "samples"
+
+
+class MDMCAverageMethod(EnumStr):
+    """Enum to represent multi-dim multi-class average method."""
+
+    GLOBAL = "global"
+    SAMPLEWISE = "samplewise"
diff --git a/RE/paddlemetric/src/paddlemetrics/utilities/exceptions.py b/RE/paddlemetric/src/paddlemetrics/utilities/exceptions.py
new file mode 100644
index 00000000..767fe901
--- /dev/null
+++ b/RE/paddlemetric/src/paddlemetrics/utilities/exceptions.py
@@ -0,0 +1,17 @@
+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+class paddlemetricsUserError(Exception):
+    """Error used to inform users of a wrong combinison of Metric API calls."""
diff --git a/RE/paddlemetric/src/paddlemetrics/utilities/imports.py b/RE/paddlemetric/src/paddlemetrics/utilities/imports.py
new file mode 100644
index 00000000..f3794801
--- /dev/null
+++ b/RE/paddlemetric/src/paddlemetrics/utilities/imports.py
@@ -0,0 +1,90 @@
+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Import utilities."""
+import operator
+from importlib import import_module
+from importlib.util import find_spec
+from typing import Callable, Optional
+
+from packaging.version import Version
+from pkg_resources import DistributionNotFound, get_distribution
+
+
+def _module_available(module_path: str) -> bool:
+    """Check if a path is available in your environment.
+
+    >>> _module_available('os')
+    True
+    >>> _module_available('bla.bla')
+    False
+    """
+    try:
+        return find_spec(module_path) is not None
+    except AttributeError:
+        # Python 3.6
+        return False
+    except ModuleNotFoundError:
+        # Python 3.7+
+        return False
+
+
+def _compare_version(package: str, op: Callable, version: str) -> Optional[bool]:
+    """Compare package version with some requirements.
+
+    >>> import operator
+    >>> _compare_version("torch", operator.ge, "0.1")
+    True
+    >>> _compare_version("any_module", operator.ge, "0.0")  # is None
+    """
+    if not _module_available(package):
+        return None
+    try:
+        pkg = import_module(package)
+        pkg_version = pkg.__version__  # type: ignore
+    except (ModuleNotFoundError, DistributionNotFound):
+        return None
+    except AttributeError:
+        pkg_version = get_distribution(package).version
+    except ImportError:
+        # catches cyclic imports - the case with integrated libs
+        # see: https://stackoverflow.com/a/32965521
+        pkg_version = get_distribution(package).version
+    try:
+        pkg_version = Version(pkg_version)
+    except TypeError:
+        # this is mock by sphinx, so it shall return True ro generate all summaries
+        return True
+    return op(pkg_version, Version(version))
+
+
+_TORCH_LOWER_1_4: Optional[bool] = False
+_TORCH_LOWER_1_5: Optional[bool] = False
+_TORCH_LOWER_1_6: Optional[bool] = False
+_TORCH_GREATER_EQUAL_1_6: Optional[bool] = True
+_TORCH_GREATER_EQUAL_1_7: Optional[bool] = True
+
+_LIGHTNING_AVAILABLE: bool = False
+_JIWER_AVAILABLE: bool = _module_available("jiwer")
+_NLTK_AVAILABLE: bool = _module_available("nltk")
+_ROUGE_SCORE_AVAILABLE: bool = _module_available("rouge_score")
+_BERTSCORE_AVAILABLE: bool = _module_available("bert_score")
+_SCIPY_AVAILABLE: bool = _module_available("scipy")
+_TORCH_FIDELITY_AVAILABLE: bool = _module_available("torch_fidelity")
+_LPIPS_AVAILABLE: bool = _module_available("lpips")
+_TQDM_AVAILABLE: bool = _module_available("tqdm")
+_TRANSFORMERS_AVAILABLE: bool = _module_available("transformers")
+_PESQ_AVAILABLE: bool = _module_available("pesq")
+_SACREBLEU_AVAILABLE: bool = _module_available("sacrebleu")
+_REGEX_AVAILABLE: bool = _module_available("regex")
+_PYSTOI_AVAILABLE: bool = _module_available("pystoi")
diff --git a/RE/paddlemetric/src/paddlemetrics/utilities/prints.py b/RE/paddlemetric/src/paddlemetrics/utilities/prints.py
new file mode 100644
index 00000000..ff4b1b35
--- /dev/null
+++ b/RE/paddlemetric/src/paddlemetrics/utilities/prints.py
@@ -0,0 +1,49 @@
+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import os
+import warnings
+from functools import wraps
+from typing import Any, Callable
+
+from paddlemetrics import _logger as log
+
+
+def rank_zero_only(fn: Callable) -> Callable:
+    @wraps(fn)
+    def wrapped_fn(*args: Any, **kwargs: Any) -> Any:
+        if rank_zero_only.rank == 0:  # type: ignore
+            return fn(*args, **kwargs)
+
+    return wrapped_fn
+
+
+# add the attribute to the function but don't overwrite in case Trainer has already set it
+rank_zero_only.rank = getattr(rank_zero_only, "rank", int(os.environ.get("LOCAL_RANK", 0)))  # type: ignore
+
+
+def _warn(*args: Any, **kwargs: Any) -> None:
+    warnings.warn(*args, **kwargs)
+
+
+def _info(*args: Any, **kwargs: Any) -> None:
+    log.info(*args, **kwargs)
+
+
+def _debug(*args: Any, **kwargs: Any) -> None:
+    log.debug(*args, **kwargs)
+
+
+rank_zero_debug = rank_zero_only(_debug)
+rank_zero_info = rank_zero_only(_info)
+rank_zero_warn = rank_zero_only(_warn)
diff --git a/RE/paddlemetric/src/paddlemetrics/wrappers/__init__.py b/RE/paddlemetric/src/paddlemetrics/wrappers/__init__.py
new file mode 100644
index 00000000..d74928f6
--- /dev/null
+++ b/RE/paddlemetric/src/paddlemetrics/wrappers/__init__.py
@@ -0,0 +1,16 @@
+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from paddlemetrics.wrappers.bootstrapping import BootStrapper  # noqa: F401
+from paddlemetrics.wrappers.multioutput import MultioutputWrapper  # noqa: F401
+from paddlemetrics.wrappers.tracker import MetricTracker  # noqa: F401
diff --git a/RE/paddlemetric/src/paddlemetrics/wrappers/bootstrapping.py b/RE/paddlemetric/src/paddlemetrics/wrappers/bootstrapping.py
new file mode 100644
index 00000000..6a3e7b16
--- /dev/null
+++ b/RE/paddlemetric/src/paddlemetrics/wrappers/bootstrapping.py
@@ -0,0 +1,173 @@
+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from copy import deepcopy
+from typing import Any, Callable, Dict, Optional, Union
+
+import paddleext.torchapi as B
+from paddleext.torchapi import  Tensor, nn
+
+from paddlemetrics.metric import Metric
+from paddlemetrics.utilities import apply_to_collection
+from paddlemetrics.utilities.imports import _TORCH_GREATER_EQUAL_1_7
+
+
+def _bootstrap_sampler(
+    size: int,
+    sampling_strategy: str = "poisson",
+) -> Tensor:
+    """Resample a tensor along its first dimension with replacement
+    Args:
+        size: number of samples
+        sampling_strategy: the strategy to use for sampling, either ``'poisson'`` or ``'multinomial'``
+        generator: a instance of ``B.Generator`` that controls the sampling
+
+    Returns:
+        resampled tensor
+
+    """
+    if sampling_strategy == "poisson":
+        p = B.distributions.Poisson(1)
+        n = p.sample((size,))
+        return B.arange(size).repeat_interleave(n.long(), dim=0)
+    if sampling_strategy == "multinomial":
+        idx = B.multinomial(B.ones(size), num_samples=size, replacement=True)
+        return idx
+    raise ValueError("Unknown sampling strategy")
+
+
+class BootStrapper(Metric):
+    def __init__(
+        self,
+        base_metric: Metric,
+        num_bootstraps: int = 10,
+        mean: bool = True,
+        std: bool = True,
+        quantile: Optional[Union[float, Tensor]] = None,
+        raw: bool = False,
+        sampling_strategy: str = "poisson",
+        compute_on_step: bool = True,
+        dist_sync_on_step: bool = False,
+        process_group: Optional[Any] = None,
+        dist_sync_fn: Callable = None,
+    ) -> None:
+        r"""
+        Using `Turn a Metric into a Bootstrapped`_
+        That can automate the process of getting confidence intervals for metric values. This wrapper
+        class basically keeps multiple copies of the same base metric in memory and whenever ``update`` or
+        ``forward`` is called, all input tensors are resampled (with replacement) along the first dimension.
+
+        Args:
+            base_metric:
+                base metric class to wrap
+            num_bootstraps:
+                number of copies to make of the base metric for bootstrapping
+            mean:
+                if ``True`` return the mean of the bootstraps
+            std:
+                if ``True`` return the standard diviation of the bootstraps
+            quantile:
+                if given, returns the quantile of the bootstraps. Can only be used with
+                pytorch version 1.6 or higher
+            raw:
+                if ``True``, return all bootstrapped values
+            sampling_strategy:
+                Determines how to produce bootstrapped samplings. Either ``'poisson'`` or ``multinomial``.
+                If ``'possion'`` is chosen, the number of times each sample will be included in the bootstrap
+                will be given by :math:`n\sim Poisson(\lambda=1)`, which approximates the true bootstrap distribution
+                when the number of samples is large. If ``'multinomial'`` is chosen, we will apply true bootstrapping
+                at the batch level to approximate bootstrapping over the hole dataset.
+            compute_on_step:
+                Forward only calls ``update()`` and return ``None`` if this is set to ``False``.
+            dist_sync_on_step:
+                Synchronize metric state across processes at each ``forward()``
+                before returning the value at the step
+            process_group:
+                Specify the process group on which synchronization is called.
+                default: ``None`` (which selects the entire world)
+            dist_sync_fn:
+                Callback that performs the allgather operation on the metric state. When ``None``, DDP
+                will be used to perform the allgather.
+
+        Example::
+            >>> from pprint import pprint
+            >>> from paddlemetrics import Accuracy, BootStrapper
+            >>> _ = B.manual_seed(123)
+            >>> base_metric = Accuracy()
+            >>> bootstrap = BootStrapper(base_metric, num_bootstraps=20)
+            >>> bootstrap.update(B.randint(5, (20,)), B.randint(5, (20,)))
+            >>> output = bootstrap.compute()
+            >>> pprint(output)
+            {'mean': tensor(0.2205), 'std': tensor(0.0859)}
+
+        """
+        super().__init__(compute_on_step, dist_sync_on_step, process_group, dist_sync_fn)
+        if not isinstance(base_metric, Metric):
+            raise ValueError(
+                "Expected base metric to be an instance of paddlemetrics.Metric" f" but received {base_metric}"
+            )
+
+        self.metrics = nn.ModuleList([deepcopy(base_metric) for _ in range(num_bootstraps)])
+        self.num_bootstraps = num_bootstraps
+
+        self.mean = mean
+        self.std = std
+        if quantile is not None and not _TORCH_GREATER_EQUAL_1_7:
+            raise ValueError("quantile argument can only be used with pytorch v1.7 or higher")
+        self.quantile = quantile
+        self.raw = raw
+
+        allowed_sampling = ("poisson", "multinomial")
+        if sampling_strategy not in allowed_sampling:
+            raise ValueError(
+                f"Expected argument ``sampling_strategy`` to be one of {allowed_sampling}"
+                f" but recieved {sampling_strategy}"
+            )
+        self.sampling_strategy = sampling_strategy
+
+    def update(self, *args: Any, **kwargs: Any) -> None:
+        """Updates the state of the base metric.
+
+        Any tensor passed in will be bootstrapped along dimension 0
+        """
+        for idx in range(self.num_bootstraps):
+            args_sizes = apply_to_collection(args, Tensor, len)
+            kwargs_sizes = list(apply_to_collection(kwargs, Tensor, len))
+            if len(args_sizes) > 0:
+                size = args_sizes[0]
+            elif len(kwargs_sizes) > 0:
+                size = kwargs_sizes[0]
+            else:
+                raise ValueError("None of the input contained tensors, so could not determine the sampling size")
+            sample_idx = _bootstrap_sampler(size, sampling_strategy=self.sampling_strategy).to(self.device)
+            new_args = apply_to_collection(args, Tensor, B.index_select, dim=0, index=sample_idx)
+            new_kwargs = apply_to_collection(kwargs, Tensor, B.index_select, dim=0, index=sample_idx)
+            self.metrics[idx].update(*new_args, **new_kwargs)
+
+    def compute(self) -> Dict[str, Tensor]:
+        """Computes the bootstrapped metric values.
+
+        Allways returns a dict of tensors, which can contain the following keys: ``mean``, ``std``, ``quantile`` and
+        ``raw`` depending on how the class was initialized
+        """
+        computed_vals = B.stack([m.compute() for m in self.metrics], dim=0)
+        output_dict = {}
+        if self.mean:
+            output_dict["mean"] = computed_vals.mean(dim=0)
+        if self.std:
+            output_dict["std"] = computed_vals.std(dim=0)
+        if self.quantile is not None:
+            output_dict["quantile"] = B.quantile(computed_vals, self.quantile)
+        if self.raw:
+            output_dict["raw"] = computed_vals
+        return output_dict
diff --git a/RE/paddlemetric/src/paddlemetrics/wrappers/multioutput.py b/RE/paddlemetric/src/paddlemetrics/wrappers/multioutput.py
new file mode 100644
index 00000000..789445be
--- /dev/null
+++ b/RE/paddlemetric/src/paddlemetrics/wrappers/multioutput.py
@@ -0,0 +1,165 @@
+from copy import deepcopy
+from typing import Any, Callable, List, Optional, Tuple
+
+import paddleext.torchapi as B
+from paddleext.torchapi import  nn
+
+from paddlemetrics import Metric
+from paddlemetrics.utilities import apply_to_collection
+
+
+def _get_nan_indices(*tensors: B.Tensor) -> B.Tensor:
+    """Get indices of rows along dim 0 which have NaN values."""
+    if len(tensors) == 0:
+        raise ValueError("Must pass at least one tensor as argument")
+    sentinel = tensors[0]
+    nan_idxs = B.zeros(len(sentinel), dtype=B.bool, device=sentinel.device)
+    for tensor in tensors:
+        permuted_tensor = tensor.flatten(start_dim=1)
+        nan_idxs |= B.any(B.isnan(permuted_tensor), dim=1)
+    return nan_idxs
+
+
+class MultioutputWrapper(Metric):
+    """Wrap a base metric to enable it to support multiple outputs.
+
+    Several paddlemetrics metrics, such as :class:`paddlemetrics.regression.spearman.SpearmanCorrcoef` lack support for
+    multioutput mode. This class wraps such metrics to support computing one metric per output.
+    Unlike specific torchmetric metrics, it doesn't support any aggregation across outputs.
+    This means if you set `num_outputs` to 2, `compute()` will return a Tensor of dimension
+    (2, ...) where ... represents the dimensions the metric returns when not wrapped.
+
+    In addition to enabling multioutput support for metrics that lack it, this class also supports, albeit in a crude
+    fashion, dealing with missing labels (or other data). When ``remove_nans`` is passed, the class will remove the
+    intersection of NaN containing "rows" upon each update for each output. For example, suppose a user uses
+    `MultioutputWrapper` to wrap :class:`paddlemetrics.regression.r2.R2Score` with 2 outputs, one of which occasionally
+    has missing labels for classes like ``R2Score`` is that this class supports removing NaN values
+    (parameter ``remove_nans``) on a per-output basis. When ``remove_nans`` is passed the wrapper will remove all rows
+
+    Args:
+        base_metric:
+            Metric being wrapped.
+        num_outputs:
+            Expected dimensionality of the output dimension. This parameter is
+            used to determine the number of distinct metrics we need to track.
+        output_dim:
+            Dimension on which output is expected. Note that while this provides some flexibility, the output dimension
+            must be the same for all inputs to update. This applies even for metrics such as `Accuracy` where the labels
+            can have a different number of dimensions than the predictions. This can be worked around if the output
+            dimension can be set to -1 for both, even if -1 corresponds to different dimensions in different inputs.
+        remove_nans:
+            Whether to remove the intersection of rows containing NaNs from the values passed through to each underlying
+            metric. Proper operation requires all tensors passed to update to have dimension `(N, ...)` where N
+            represents the length of the batch or dataset being passed in.
+        squeeze_outputs:
+            If true, will squeeze the 1-item dimensions left after `index_select` is applied.
+            This is sometimes unnecessary but harmless for metrics such as `R2Score` but useful
+            for certain classification metrics that can't handle additional 1-item dimensions.
+        compute_on_step:
+            Whether to recompute the metric value on each update step.
+        dist_sync_on_step:
+            Required for distributed training support.
+        process_group:
+            Specify the process group on which synchronization is called.
+            The default: None (which selects the entire world)
+        dist_sync_fn:
+            Required for distributed training support.
+
+    Example:
+
+         >>> # Mimic R2Score in `multioutput`, `raw_values` mode:
+         >>> import torchapi as B
+         >>> from paddlemetrics import MultioutputWrapper, R2Score
+         >>> target = B.tensor([[0.5, 1], [-1, 1], [7, -6]])
+         >>> preds = B.tensor([[0, 2], [-1, 2], [8, -5]])
+         >>> r2score = MultioutputWrapper(R2Score(), 2)
+         >>> r2score(preds, target)
+         [tensor(0.9654), tensor(0.9082)]
+         >>> # Classification metric where prediction and label tensors have different shapes.
+         >>> from paddlemetrics import BinnedAveragePrecision
+         >>> target = B.tensor([[1, 2], [2, 0], [1, 2]])
+         >>> preds = B.tensor([
+         ...     [[.1, .8], [.8, .05], [.1, .15]],
+         ...     [[.1, .1], [.2, .3], [.7, .6]],
+         ...     [[.002, .4], [.95, .45], [.048, .15]]
+         ... ])
+         >>> binned_avg_precision = MultioutputWrapper(BinnedAveragePrecision(3, thresholds=5), 2)
+         >>> binned_avg_precision(preds, target)
+         [[tensor(-0.), tensor(1.0000), tensor(1.0000)], [tensor(0.3333), tensor(-0.), tensor(0.6667)]]
+    """
+
+    is_differentiable = False
+
+    def __init__(
+        self,
+        base_metric: Metric,
+        num_outputs: int,
+        output_dim: int = -1,
+        remove_nans: bool = True,
+        squeeze_outputs: bool = True,
+        compute_on_step: bool = True,
+        dist_sync_on_step: bool = False,
+        process_group: Optional[Any] = None,
+        dist_sync_fn: Callable = None,
+    ):
+        super().__init__(
+            compute_on_step=compute_on_step,
+            dist_sync_on_step=dist_sync_on_step,
+            process_group=process_group,
+            dist_sync_fn=dist_sync_fn,
+        )
+        self.metrics = nn.ModuleList([deepcopy(base_metric) for _ in range(num_outputs)])
+        self.output_dim = output_dim
+        self.remove_nans = remove_nans
+        self.squeeze_outputs = squeeze_outputs
+
+    def _get_args_kwargs_by_output(
+        self, *args: B.Tensor, **kwargs: B.Tensor
+    ) -> List[Tuple[B.Tensor, B.Tensor]]:
+        """Get args and kwargs reshaped to be output-specific and (maybe) having NaNs stripped out."""
+        args_kwargs_by_output = []
+        for i in range(len(self.metrics)):
+            selected_args = apply_to_collection(
+                args, B.Tensor, B.index_select, dim=self.output_dim, index=B.tensor(i, device=self.device)
+            )
+            selected_kwargs = apply_to_collection(
+                kwargs, B.Tensor, B.index_select, dim=self.output_dim, index=B.tensor(i, device=self.device)
+            )
+            if self.remove_nans:
+                args_kwargs = selected_args + tuple(selected_kwargs.values())
+                nan_idxs = _get_nan_indices(*args_kwargs)
+                selected_args = [arg[~nan_idxs] for arg in selected_args]
+                selected_kwargs = {k: v[~nan_idxs] for k, v in selected_kwargs.items()}
+
+            if self.squeeze_outputs:
+                selected_args = [arg.squeeze(self.output_dim) for arg in selected_args]
+            args_kwargs_by_output.append((selected_args, selected_kwargs))
+        return args_kwargs_by_output
+
+    def update(self, *args: Any, **kwargs: Any) -> None:
+        """Update each underlying metric with the corresponding output."""
+        reshaped_args_kwargs = self._get_args_kwargs_by_output(*args, **kwargs)
+        for metric, (selected_args, selected_kwargs) in zip(self.metrics, reshaped_args_kwargs):
+            metric.update(*selected_args, **selected_kwargs)
+
+    def compute(self) -> List[B.Tensor]:
+        """Compute metrics."""
+        return [m.compute() for m in self.metrics]
+
+    def forward(self, *args: Any, **kwargs: Any) -> Any:
+        """Call underlying forward methods and aggregate the results if they're non-null.
+
+        We override this method to ensure that state variables get copied over on the underlying metrics.
+        """
+        results = []
+        reshaped_args_kwargs = self._get_args_kwargs_by_output(*args, **kwargs)
+        for metric, (selected_args, selected_kwargs) in zip(self.metrics, reshaped_args_kwargs):
+            results.append(metric(*selected_args, **selected_kwargs))
+        if results[0] is None:
+            return None
+        return results
+
+    def reset(self) -> None:
+        """Reset all underlying metrics."""
+        for metric in self.metrics:
+            metric.reset()
diff --git a/RE/paddlemetric/src/paddlemetrics/wrappers/tracker.py b/RE/paddlemetric/src/paddlemetrics/wrappers/tracker.py
new file mode 100644
index 00000000..b2b939d9
--- /dev/null
+++ b/RE/paddlemetric/src/paddlemetrics/wrappers/tracker.py
@@ -0,0 +1,127 @@
+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from copy import deepcopy
+from typing import Any, Tuple, Union
+
+import paddleext.torchapi as B
+from paddleext.torchapi import  Tensor, nn
+
+from paddlemetrics.metric import Metric
+
+
+class MetricTracker(nn.ModuleList):
+    """A wrapper class that can help keeping track of a metric over time and implement useful methods. The wrapper
+    implements the standard `update`, `compute`, `reset` methods that just calls corresponding method of the
+    currently tracked metric. However, the following additional methods are provided:
+
+        -``MetricTracker.n_steps``: number of metrics being tracked
+
+        -``MetricTracker.increment()``: initialize a new metric for being tracked
+
+        -``MetricTracker.compute_all()``: get the metric value for all steps
+
+        -``MetricTracker.best_metric()``: returns the best value
+
+    Args:
+        metric: instance of a torchmetric modular to keep track of at each timestep.
+        maximize: bool indicating if higher metric values are better (`True`) or lower
+            is better (`False`)
+
+    Example:
+
+        >>> from paddlemetrics import Accuracy, MetricTracker
+        >>> _ = B.manual_seed(42)
+        >>> tracker = MetricTracker(Accuracy(num_classes=10))
+        >>> for epoch in range(5):
+        ...     tracker.increment()
+        ...     for batch_idx in range(5):
+        ...         preds, target = B.randint(10, (100,)), B.randint(10, (100,))
+        ...         tracker.update(preds, target)
+        ...     print(f"current acc={tracker.compute()}")  # doctest: +NORMALIZE_WHITESPACE
+        current acc=0.1120000034570694
+        current acc=0.08799999952316284
+        current acc=0.12600000202655792
+        current acc=0.07999999821186066
+        current acc=0.10199999809265137
+        >>> best_acc, which_epoch = tracker.best_metric(return_step=True)
+        >>> tracker.compute_all()
+        tensor([0.1120, 0.0880, 0.1260, 0.0800, 0.1020])
+    """
+
+    def __init__(self, metric: Metric, maximize: bool = True) -> None:
+        super().__init__()
+        if not isinstance(metric, Metric):
+            raise TypeError("metric arg need to be an instance of a paddlemetrics metric" f" but got {metric}")
+        self._base_metric = metric
+        self.maximize = maximize
+
+        self._increment_called = False
+
+    @property
+    def n_steps(self) -> int:
+        """Returns the number of times the tracker has been incremented."""
+        return len(self) - 1  # subtract the base metric
+
+    def increment(self) -> None:
+        """Creates a new instace of the input metric that will be updated next."""
+        self._increment_called = True
+        self.append(deepcopy(self._base_metric))
+
+    def forward(self, *args, **kwargs) -> None:  # type: ignore
+        """Calls forward of the current metric being tracked."""
+        self._check_for_increment("forward")
+        return self[-1](*args, **kwargs)
+
+    def update(self, *args, **kwargs) -> None:  # type: ignore
+        """Updates the current metric being tracked."""
+        self._check_for_increment("update")
+        self[-1].update(*args, **kwargs)
+
+    def compute(self) -> Any:
+        """Call compute of the current metric being tracked."""
+        self._check_for_increment("compute")
+        return self[-1].compute()
+
+    def compute_all(self) -> Tensor:
+        """Compute the metric value for all tracked metrics."""
+        self._check_for_increment("compute_all")
+        return B.stack([metric.compute() for i, metric in enumerate(self) if i != 0], dim=0)
+
+    def reset(self) -> None:
+        """Resets the current metric being tracked."""
+        self[-1].reset()
+
+    def reset_all(self) -> None:
+        """Resets all metrics being tracked."""
+        for metric in self:
+            metric.reset()
+
+    def best_metric(self, return_step: bool = False) -> Union[float, Tuple[int, float]]:
+        """Returns the highest metric out of all tracked.
+
+        Args:
+            return_step: If `True` will also return the step with the highest metric value.
+
+        Returns:
+            The best metric value, and optionally the timestep.
+        """
+        fn = B.max if self.maximize else B.min
+        idx, max = fn(self.compute_all(), 0)
+        if return_step:
+            return idx.item(), max.item()
+        return max.item()
+
+    def _check_for_increment(self, method: str) -> None:
+        if not self._increment_called:
+            raise ValueError(f"`{method}` cannot be called before `.increment()` has been called")
diff --git a/RE/paddlemetric/src/setup.py b/RE/paddlemetric/src/setup.py
new file mode 100644
index 00000000..8d7dd2d0
--- /dev/null
+++ b/RE/paddlemetric/src/setup.py
@@ -0,0 +1,39 @@
+#!/usr/bin/env python
+# -*- coding: UTF-8 -*-
+################################################################################
+#
+# Copyright (c) 2020 Baidu.com, Inc. All Rights Reserved
+#
+################################################################################
+"""
+Setup script.
+
+Authors: sunmingming01(sunmingming01@baidu.com)
+Date:    2020/12/31 12:33:34
+"""
+
+from setuptools import setup, find_packages
+
+with open('README.md') as readme_file:
+    README = readme_file.read()
+
+setup_args = dict(
+    name='paddlemetrics',
+    version='1.0.0-beta',
+    description='Metrics library for paddle, porting from torch metrics.',
+    long_description_content_type="text/markdown",
+    long_description=README,
+    license='Apache',
+    packages=find_packages(include=["paddlemetrics", "paddlemetrics.*"]),
+    author='Mingming Sun',
+    author_email='sunmingming01@baidu.com',
+    keywords=['Deep Learning', 'Paddlepaddle'],
+    url='',
+    download_url=''
+)
+
+install_requires = [
+]
+
+if __name__ == '__main__':
+    setup(**setup_args, install_requires=install_requires)
\ No newline at end of file
diff --git a/RE/paddlemetric/src/tests/__init__.py b/RE/paddlemetric/src/tests/__init__.py
new file mode 100644
index 00000000..b56a9064
--- /dev/null
+++ b/RE/paddlemetric/src/tests/__init__.py
@@ -0,0 +1 @@
+from tests.helpers.testers import BATCH_SIZE, NUM_BATCHES, NUM_PROCESSES, DummyMetric, MetricTester  # noqa: F401
diff --git a/RE/paddlemetric/src/tests/audio/__init__.py b/RE/paddlemetric/src/tests/audio/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/RE/paddlemetric/src/tests/audio/examples/audio_speech.wav b/RE/paddlemetric/src/tests/audio/examples/audio_speech.wav
new file mode 100644
index 00000000..0fa4e9e7
Binary files /dev/null and b/RE/paddlemetric/src/tests/audio/examples/audio_speech.wav differ
diff --git a/RE/paddlemetric/src/tests/audio/examples/audio_speech_bab_0dB.wav b/RE/paddlemetric/src/tests/audio/examples/audio_speech_bab_0dB.wav
new file mode 100644
index 00000000..1bed1071
Binary files /dev/null and b/RE/paddlemetric/src/tests/audio/examples/audio_speech_bab_0dB.wav differ
diff --git a/RE/paddlemetric/src/tests/audio/test_pesq.py b/RE/paddlemetric/src/tests/audio/test_pesq.py
new file mode 100644
index 00000000..33ca39c3
--- /dev/null
+++ b/RE/paddlemetric/src/tests/audio/test_pesq.py
@@ -0,0 +1,138 @@
+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from collections import namedtuple
+from functools import partial
+
+import pytest
+import paddleext.torchapi as B
+from pesq import pesq as pesq_backend
+from paddleext.torchapi import Tensor
+
+from tests.helpers import seed_all
+from tests.helpers.testers import MetricTester
+from paddlemetrics.audio import PESQ
+from paddlemetrics.functional import pesq
+from paddlemetrics.utilities.imports import _TORCH_GREATER_EQUAL_1_6
+
+seed_all(42)
+
+Input = namedtuple("Input", ["preds", "target"])
+
+# for 8k sample rate, need at least 8k/4=2000 samples
+inputs_8k = Input(
+    preds=B.rand(2, 3, 2100),
+    target=B.rand(2, 3, 2100),
+)
+# for 16k sample rate, need at least 16k/4=4000 samples
+inputs_16k = Input(
+    preds=B.rand(2, 3, 4100),
+    target=B.rand(2, 3, 4100),
+)
+
+
+def pesq_original_batch(preds: Tensor, target: Tensor, fs: int, mode: str):
+    # shape: preds [BATCH_SIZE, Time] , target [BATCH_SIZE, Time]
+    # or shape: preds [NUM_BATCHES*BATCH_SIZE, Time] , target [NUM_BATCHES*BATCH_SIZE, Time]
+    target = target.detach().cpu().numpy()
+    preds = preds.detach().cpu().numpy()
+    mss = []
+    for b in range(preds.shape[0]):
+        pesq_val = pesq_backend(fs, target[b, ...], preds[b, ...], mode)
+        mss.append(pesq_val)
+    return B.tensor(mss)
+
+
+def average_metric(preds, target, metric_func):
+    # shape: preds [BATCH_SIZE, 1, Time] , target [BATCH_SIZE, 1, Time]
+    # or shape: preds [NUM_BATCHES*BATCH_SIZE, 1, Time] , target [NUM_BATCHES*BATCH_SIZE, 1, Time]
+    return metric_func(preds, target).mean()
+
+
+pesq_original_batch_8k_nb = partial(pesq_original_batch, fs=8000, mode="nb")
+pesq_original_batch_16k_nb = partial(pesq_original_batch, fs=16000, mode="nb")
+pesq_original_batch_16k_wb = partial(pesq_original_batch, fs=16000, mode="wb")
+
+
+@pytest.mark.parametrize(
+    "preds, target, sk_metric, fs, mode",
+    [
+        (inputs_8k.preds, inputs_8k.target, pesq_original_batch_8k_nb, 8000, "nb"),
+        (inputs_16k.preds, inputs_16k.target, pesq_original_batch_16k_nb, 16000, "nb"),
+        (inputs_16k.preds, inputs_16k.target, pesq_original_batch_16k_wb, 16000, "wb"),
+    ],
+)
+class TestPESQ(MetricTester):
+    atol = 1e-2
+
+    @pytest.mark.parametrize("ddp", [True, False])
+    @pytest.mark.parametrize("dist_sync_on_step", [True, False])
+    def test_pesq(self, preds, target, sk_metric, fs, mode, ddp, dist_sync_on_step):
+        self.run_class_metric_test(
+            ddp,
+            preds,
+            target,
+            PESQ,
+            sk_metric=partial(average_metric, metric_func=sk_metric),
+            dist_sync_on_step=dist_sync_on_step,
+            metric_args=dict(fs=fs, mode=mode),
+        )
+
+    def test_pesq_functional(self, preds, target, sk_metric, fs, mode):
+        self.run_functional_metric_test(
+            preds,
+            target,
+            pesq,
+            sk_metric,
+            metric_args=dict(fs=fs, mode=mode),
+        )
+
+    def test_pesq_differentiability(self, preds, target, sk_metric, fs, mode):
+        self.run_differentiability_test(
+            preds=preds, target=target, metric_module=PESQ, metric_functional=pesq, metric_args=dict(fs=fs, mode=mode)
+        )
+
+    @pytest.mark.skipif(
+        not _TORCH_GREATER_EQUAL_1_6, reason="half support of core operations on not support before pytorch v1.6"
+    )
+    def test_pesq_half_cpu(self, preds, target, sk_metric, fs, mode):
+        pytest.xfail("PESQ metric does not support cpu + half precision")
+
+    @pytest.mark.skipif(not B.cuda.is_available(), reason="test requires cuda")
+    def test_pesq_half_gpu(self, preds, target, sk_metric, fs, mode):
+        self.run_precision_test_gpu(
+            preds=preds,
+            target=target,
+            metric_module=PESQ,
+            metric_functional=partial(pesq, fs=fs, mode=mode),
+            metric_args=dict(fs=fs, mode=mode),
+        )
+
+
+def test_error_on_different_shape(metric_class=PESQ):
+    metric = metric_class(16000, "nb")
+    with pytest.raises(RuntimeError, match="Predictions and targets are expected to have the same shape"):
+        metric(B.randn(100), B.randn(50))
+
+
+def test_on_real_audio():
+    import os
+
+    from scipy.io import wavfile
+
+    current_file_dir = os.path.dirname(__file__)
+
+    rate, ref = wavfile.read(os.path.join(current_file_dir, "examples/audio_speech.wav"))
+    rate, deg = wavfile.read(os.path.join(current_file_dir, "examples/audio_speech_bab_0dB.wav"))
+    assert pesq(B.from_numpy(deg), B.from_numpy(ref), rate, "wb") == 1.0832337141036987
+    assert pesq(B.from_numpy(deg), B.from_numpy(ref), rate, "nb") == 1.6072081327438354
diff --git a/RE/paddlemetric/src/tests/audio/test_pit.py b/RE/paddlemetric/src/tests/audio/test_pit.py
new file mode 100644
index 00000000..54d2b5ac
--- /dev/null
+++ b/RE/paddlemetric/src/tests/audio/test_pit.py
@@ -0,0 +1,196 @@
+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from collections import namedtuple
+from functools import partial
+from typing import Callable, Tuple
+
+import numpy as np
+import pytest
+import paddleext.torchapi as B
+from scipy.optimize import linear_sum_assignment
+from paddleext.torchapi import Tensor
+
+from tests.helpers import seed_all
+from tests.helpers.testers import BATCH_SIZE, NUM_BATCHES, MetricTester
+from paddlemetrics.audio import PIT
+from paddlemetrics.functional import pit, si_sdr, snr
+from paddlemetrics.utilities.imports import _TORCH_GREATER_EQUAL_1_6
+
+seed_all(42)
+
+TIME = 10
+
+Input = namedtuple("Input", ["preds", "target"])
+
+# three speaker examples to test _find_best_perm_by_linear_sum_assignment
+inputs1 = Input(
+    preds=B.rand(NUM_BATCHES, BATCH_SIZE, 3, TIME),
+    target=B.rand(NUM_BATCHES, BATCH_SIZE, 3, TIME),
+)
+# two speaker examples to test _find_best_perm_by_exhuastive_method
+inputs2 = Input(
+    preds=B.rand(NUM_BATCHES, BATCH_SIZE, 2, TIME),
+    target=B.rand(NUM_BATCHES, BATCH_SIZE, 2, TIME),
+)
+
+
+def naive_implementation_pit_scipy(
+    preds: Tensor,
+    target: Tensor,
+    metric_func: Callable,
+    eval_func: str,
+) -> Tuple[Tensor, Tensor]:
+    """A naive implementation of `Permutation Invariant Training` based on Scipy.
+
+    Args:
+        preds: predictions, shape[batch, spk, time]
+        target: targets, shape[batch, spk, time]
+        metric_func: which metric
+        eval_func: min or max
+
+    Returns:
+        best_metric:
+            shape [batch]
+        best_perm:
+            shape [batch, spk]
+    """
+    batch_size, spk_num = target.shape[0:2]
+    metric_mtx = B.empty((batch_size, spk_num, spk_num), device=target.device)
+    for t in range(spk_num):
+        for e in range(spk_num):
+            metric_mtx[:, t, e] = metric_func(preds[:, e, ...], target[:, t, ...])
+
+    # pit_r = PIT(metric_func, eval_func)(preds, target)
+    metric_mtx = metric_mtx.detach().cpu().numpy()
+    best_metrics = []
+    best_perms = []
+    for b in range(batch_size):
+        row_idx, col_idx = linear_sum_assignment(metric_mtx[b, ...], eval_func == "max")
+        best_metrics.append(metric_mtx[b, row_idx, col_idx].mean())
+        best_perms.append(col_idx)
+    return B.from_numpy(np.stack(best_metrics)), B.from_numpy(np.stack(best_perms))
+
+
+def _average_metric(preds: Tensor, target: Tensor, metric_func: Callable) -> Tensor:
+    """average the metric values.
+
+    Args:
+        preds: predictions, shape[batch, spk, time]
+        target: targets, shape[batch, spk, time]
+        metric_func: a function which return best_metric and best_perm
+
+    Returns:
+        the average of best_metric
+    """
+    return metric_func(preds, target)[0].mean()
+
+
+snr_pit_scipy = partial(naive_implementation_pit_scipy, metric_func=snr, eval_func="max")
+si_sdr_pit_scipy = partial(naive_implementation_pit_scipy, metric_func=si_sdr, eval_func="max")
+
+
+@pytest.mark.parametrize(
+    "preds, target, sk_metric, metric_func, eval_func",
+    [
+        (inputs1.preds, inputs1.target, snr_pit_scipy, snr, "max"),
+        (inputs1.preds, inputs1.target, si_sdr_pit_scipy, si_sdr, "max"),
+        (inputs2.preds, inputs2.target, snr_pit_scipy, snr, "max"),
+        (inputs2.preds, inputs2.target, si_sdr_pit_scipy, si_sdr, "max"),
+    ],
+)
+class TestPIT(MetricTester):
+    atol = 1e-2
+
+    @pytest.mark.parametrize("ddp", [True, False])
+    @pytest.mark.parametrize("dist_sync_on_step", [True, False])
+    def test_pit(self, preds, target, sk_metric, metric_func, eval_func, ddp, dist_sync_on_step):
+        self.run_class_metric_test(
+            ddp,
+            preds,
+            target,
+            PIT,
+            sk_metric=partial(_average_metric, metric_func=sk_metric),
+            dist_sync_on_step=dist_sync_on_step,
+            metric_args=dict(metric_func=metric_func, eval_func=eval_func),
+        )
+
+    def test_pit_functional(self, preds, target, sk_metric, metric_func, eval_func):
+        self.run_functional_metric_test(
+            preds=preds,
+            target=target,
+            metric_functional=pit,
+            sk_metric=sk_metric,
+            metric_args=dict(metric_func=metric_func, eval_func=eval_func),
+        )
+
+    def test_pit_differentiability(self, preds, target, sk_metric, metric_func, eval_func):
+        def pit_diff(preds, target, metric_func, eval_func):
+            return pit(preds, target, metric_func, eval_func)[0]
+
+        self.run_differentiability_test(
+            preds=preds,
+            target=target,
+            metric_module=PIT,
+            metric_functional=pit_diff,
+            metric_args={"metric_func": metric_func, "eval_func": eval_func},
+        )
+
+    @pytest.mark.skipif(
+        not _TORCH_GREATER_EQUAL_1_6, reason="half support of core operations on not support before pytorch v1.6"
+    )
+    def test_pit_half_cpu(self, preds, target, sk_metric, metric_func, eval_func):
+        pytest.xfail("PIT metric does not support cpu + half precision")
+
+    @pytest.mark.skipif(not B.cuda.is_available(), reason="test requires cuda")
+    def test_pit_half_gpu(self, preds, target, sk_metric, metric_func, eval_func):
+        self.run_precision_test_gpu(
+            preds=preds,
+            target=target,
+            metric_module=PIT,
+            metric_functional=partial(pit, metric_func=metric_func, eval_func=eval_func),
+            metric_args={"metric_func": metric_func, "eval_func": eval_func},
+        )
+
+
+def test_error_on_different_shape() -> None:
+    metric = PIT(snr, "max")
+    with pytest.raises(RuntimeError, match="Predictions and targets are expected to have the same shape"):
+        metric(B.randn(3, 3, 10), B.randn(3, 2, 10))
+
+
+def test_error_on_wrong_eval_func() -> None:
+    metric = PIT(snr, "xxx")
+    with pytest.raises(ValueError, match='eval_func can only be "max" or "min"'):
+        metric(B.randn(3, 3, 10), B.randn(3, 3, 10))
+
+
+def test_error_on_wrong_shape() -> None:
+    metric = PIT(snr, "max")
+    with pytest.raises(ValueError, match="Inputs must be of shape *"):
+        metric(B.randn(3), B.randn(3))
+
+
+def test_consistency_of_two_implementations() -> None:
+    from paddlemetrics.functional.audio.pit import (
+        _find_best_perm_by_exhuastive_method,
+        _find_best_perm_by_linear_sum_assignment,
+    )
+
+    shapes_test = [(5, 2, 2), (4, 3, 3), (4, 4, 4), (3, 5, 5)]
+    for shp in shapes_test:
+        metric_mtx = B.randn(size=shp)
+        bm1, bp1 = _find_best_perm_by_linear_sum_assignment(metric_mtx, B.max)
+        bm2, bp2 = _find_best_perm_by_exhuastive_method(metric_mtx, B.max)
+        assert B.allclose(bm1, bm2)
+        assert (bp1 == bp2).all()
diff --git a/RE/paddlemetric/src/tests/audio/test_si_sdr.py b/RE/paddlemetric/src/tests/audio/test_si_sdr.py
new file mode 100644
index 00000000..f7647b49
--- /dev/null
+++ b/RE/paddlemetric/src/tests/audio/test_si_sdr.py
@@ -0,0 +1,131 @@
+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from collections import namedtuple
+from functools import partial
+
+import pytest
+import speechmetrics
+import paddleext.torchapi as B
+from paddleext.torchapi import Tensor
+
+from tests.helpers import seed_all
+from tests.helpers.testers import BATCH_SIZE, NUM_BATCHES, MetricTester
+from paddlemetrics.audio import SI_SDR
+from paddlemetrics.functional import si_sdr
+from paddlemetrics.utilities.imports import _TORCH_GREATER_EQUAL_1_6
+
+seed_all(42)
+
+Time = 100
+
+Input = namedtuple("Input", ["preds", "target"])
+
+inputs = Input(
+    preds=B.rand(NUM_BATCHES, BATCH_SIZE, 1, Time),
+    target=B.rand(NUM_BATCHES, BATCH_SIZE, 1, Time),
+)
+
+speechmetrics_sisdr = speechmetrics.load("sisdr")
+
+
+def speechmetrics_si_sdr(preds: Tensor, target: Tensor, zero_mean: bool):
+    # shape: preds [BATCH_SIZE, 1, Time] , target [BATCH_SIZE, 1, Time]
+    # or shape: preds [NUM_BATCHES*BATCH_SIZE, 1, Time] , target [NUM_BATCHES*BATCH_SIZE, 1, Time]
+    if zero_mean:
+        preds = preds - preds.mean(dim=2, keepdim=True)
+        target = target - target.mean(dim=2, keepdim=True)
+    target = target.detach().cpu().numpy()
+    preds = preds.detach().cpu().numpy()
+    mss = []
+    for i in range(preds.shape[0]):
+        ms = []
+        for j in range(preds.shape[1]):
+            metric = speechmetrics_sisdr(preds[i, j], target[i, j], rate=16000)
+            ms.append(metric["sisdr"][0])
+        mss.append(ms)
+    return B.tensor(mss)
+
+
+def average_metric(preds, target, metric_func):
+    # shape: preds [BATCH_SIZE, 1, Time] , target [BATCH_SIZE, 1, Time]
+    # or shape: preds [NUM_BATCHES*BATCH_SIZE, 1, Time] , target [NUM_BATCHES*BATCH_SIZE, 1, Time]
+    return metric_func(preds, target).mean()
+
+
+speechmetrics_si_sdr_zero_mean = partial(speechmetrics_si_sdr, zero_mean=True)
+speechmetrics_si_sdr_no_zero_mean = partial(speechmetrics_si_sdr, zero_mean=False)
+
+
+@pytest.mark.parametrize(
+    "preds, target, sk_metric, zero_mean",
+    [
+        (inputs.preds, inputs.target, speechmetrics_si_sdr_zero_mean, True),
+        (inputs.preds, inputs.target, speechmetrics_si_sdr_no_zero_mean, False),
+    ],
+)
+class TestSISDR(MetricTester):
+    atol = 1e-2
+
+    @pytest.mark.parametrize("ddp", [True, False])
+    @pytest.mark.parametrize("dist_sync_on_step", [True, False])
+    def test_si_sdr(self, preds, target, sk_metric, zero_mean, ddp, dist_sync_on_step):
+        self.run_class_metric_test(
+            ddp,
+            preds,
+            target,
+            SI_SDR,
+            sk_metric=partial(average_metric, metric_func=sk_metric),
+            dist_sync_on_step=dist_sync_on_step,
+            metric_args=dict(zero_mean=zero_mean),
+        )
+
+    def test_si_sdr_functional(self, preds, target, sk_metric, zero_mean):
+        self.run_functional_metric_test(
+            preds,
+            target,
+            si_sdr,
+            sk_metric,
+            metric_args=dict(zero_mean=zero_mean),
+        )
+
+    def test_si_sdr_differentiability(self, preds, target, sk_metric, zero_mean):
+        self.run_differentiability_test(
+            preds=preds,
+            target=target,
+            metric_module=SI_SDR,
+            metric_functional=si_sdr,
+            metric_args={"zero_mean": zero_mean},
+        )
+
+    @pytest.mark.skipif(
+        not _TORCH_GREATER_EQUAL_1_6, reason="half support of core operations on not support before pytorch v1.6"
+    )
+    def test_si_sdr_half_cpu(self, preds, target, sk_metric, zero_mean):
+        pytest.xfail("SI-SDR metric does not support cpu + half precision")
+
+    @pytest.mark.skipif(not B.cuda.is_available(), reason="test requires cuda")
+    def test_si_sdr_half_gpu(self, preds, target, sk_metric, zero_mean):
+        self.run_precision_test_gpu(
+            preds=preds,
+            target=target,
+            metric_module=SI_SDR,
+            metric_functional=si_sdr,
+            metric_args={"zero_mean": zero_mean},
+        )
+
+
+def test_error_on_different_shape(metric_class=SI_SDR):
+    metric = metric_class()
+    with pytest.raises(RuntimeError, match="Predictions and targets are expected to have the same shape"):
+        metric(B.randn(100), B.randn(50))
diff --git a/RE/paddlemetric/src/tests/audio/test_si_snr.py b/RE/paddlemetric/src/tests/audio/test_si_snr.py
new file mode 100644
index 00000000..96745117
--- /dev/null
+++ b/RE/paddlemetric/src/tests/audio/test_si_snr.py
@@ -0,0 +1,112 @@
+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from collections import namedtuple
+from functools import partial
+
+import pytest
+import speechmetrics
+import paddleext.torchapi as B
+from paddleext.torchapi import Tensor
+
+from tests.helpers import seed_all
+from tests.helpers.testers import BATCH_SIZE, NUM_BATCHES, MetricTester
+from paddlemetrics.audio import SI_SNR
+from paddlemetrics.functional import si_snr
+from paddlemetrics.utilities.imports import _TORCH_GREATER_EQUAL_1_6
+
+seed_all(42)
+
+Time = 100
+
+Input = namedtuple("Input", ["preds", "target"])
+
+inputs = Input(
+    preds=B.rand(NUM_BATCHES, BATCH_SIZE, 1, Time),
+    target=B.rand(NUM_BATCHES, BATCH_SIZE, 1, Time),
+)
+
+speechmetrics_sisdr = speechmetrics.load("sisdr")
+
+
+def speechmetrics_si_sdr(preds: Tensor, target: Tensor, zero_mean: bool = True):
+    # shape: preds [BATCH_SIZE, 1, Time] , target [BATCH_SIZE, 1, Time]
+    # or shape: preds [NUM_BATCHES*BATCH_SIZE, 1, Time] , target [NUM_BATCHES*BATCH_SIZE, 1, Time]
+    if zero_mean:
+        preds = preds - preds.mean(dim=2, keepdim=True)
+        target = target - target.mean(dim=2, keepdim=True)
+    target = target.detach().cpu().numpy()
+    preds = preds.detach().cpu().numpy()
+    mss = []
+    for i in range(preds.shape[0]):
+        ms = []
+        for j in range(preds.shape[1]):
+            metric = speechmetrics_sisdr(preds[i, j], target[i, j], rate=16000)
+            ms.append(metric["sisdr"][0])
+        mss.append(ms)
+    return B.tensor(mss)
+
+
+def average_metric(preds, target, metric_func):
+    # shape: preds [BATCH_SIZE, 1, Time] , target [BATCH_SIZE, 1, Time]
+    # or shape: preds [NUM_BATCHES*BATCH_SIZE, 1, Time] , target [NUM_BATCHES*BATCH_SIZE, 1, Time]
+    return metric_func(preds, target).mean()
+
+
+@pytest.mark.parametrize(
+    "preds, target, sk_metric",
+    [
+        (inputs.preds, inputs.target, speechmetrics_si_sdr),
+    ],
+)
+class TestSISNR(MetricTester):
+    atol = 1e-2
+
+    @pytest.mark.parametrize("ddp", [True, False])
+    @pytest.mark.parametrize("dist_sync_on_step", [True, False])
+    def test_si_snr(self, preds, target, sk_metric, ddp, dist_sync_on_step):
+        self.run_class_metric_test(
+            ddp,
+            preds,
+            target,
+            SI_SNR,
+            sk_metric=partial(average_metric, metric_func=sk_metric),
+            dist_sync_on_step=dist_sync_on_step,
+        )
+
+    def test_si_snr_functional(self, preds, target, sk_metric):
+        self.run_functional_metric_test(
+            preds,
+            target,
+            si_snr,
+            sk_metric,
+        )
+
+    def test_si_snr_differentiability(self, preds, target, sk_metric):
+        self.run_differentiability_test(preds=preds, target=target, metric_module=SI_SNR, metric_functional=si_snr)
+
+    @pytest.mark.skipif(
+        not _TORCH_GREATER_EQUAL_1_6, reason="half support of core operations on not support before pytorch v1.6"
+    )
+    def test_si_snr_half_cpu(self, preds, target, sk_metric):
+        pytest.xfail("SI-SNR metric does not support cpu + half precision")
+
+    @pytest.mark.skipif(not B.cuda.is_available(), reason="test requires cuda")
+    def test_si_snr_half_gpu(self, preds, target, sk_metric):
+        self.run_precision_test_gpu(preds=preds, target=target, metric_module=SI_SNR, metric_functional=si_snr)
+
+
+def test_error_on_different_shape(metric_class=SI_SNR):
+    metric = metric_class()
+    with pytest.raises(RuntimeError, match="Predictions and targets are expected to have the same shape"):
+        metric(B.randn(100), B.randn(50))
diff --git a/RE/paddlemetric/src/tests/audio/test_snr.py b/RE/paddlemetric/src/tests/audio/test_snr.py
new file mode 100644
index 00000000..86d28837
--- /dev/null
+++ b/RE/paddlemetric/src/tests/audio/test_snr.py
@@ -0,0 +1,125 @@
+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from collections import namedtuple
+from functools import partial
+from typing import Callable
+
+import pytest
+import paddleext.torchapi as B
+from mir_eval.separation import bss_eval_images as mir_eval_bss_eval_images
+from paddleext.torchapi import Tensor
+
+from tests.helpers import seed_all
+from tests.helpers.testers import BATCH_SIZE, NUM_BATCHES, MetricTester
+from paddlemetrics.audio import SNR
+from paddlemetrics.functional import snr
+from paddlemetrics.utilities.imports import _TORCH_GREATER_EQUAL_1_6
+
+seed_all(42)
+
+Time = 100
+
+Input = namedtuple("Input", ["preds", "target"])
+
+inputs = Input(
+    preds=B.rand(NUM_BATCHES, BATCH_SIZE, 1, Time),
+    target=B.rand(NUM_BATCHES, BATCH_SIZE, 1, Time),
+)
+
+
+def bss_eval_images_snr(preds: Tensor, target: Tensor, metric_func: Callable, zero_mean: bool):
+    # shape: preds [BATCH_SIZE, 1, Time] , target [BATCH_SIZE, 1, Time]
+    # or shape: preds [NUM_BATCHES*BATCH_SIZE, 1, Time] , target [NUM_BATCHES*BATCH_SIZE, 1, Time]
+    if zero_mean:
+        target = target - B.mean(target, dim=-1, keepdim=True)
+        preds = preds - B.mean(preds, dim=-1, keepdim=True)
+    target = target.detach().cpu().numpy()
+    preds = preds.detach().cpu().numpy()
+    mss = []
+    for i in range(preds.shape[0]):
+        ms = []
+        for j in range(preds.shape[1]):
+            if metric_func == mir_eval_bss_eval_images:
+                snr_v = metric_func([target[i, j]], [preds[i, j]])[0][0]
+            else:
+                snr_v = metric_func([target[i, j]], [preds[i, j]])[0][0][0]
+            ms.append(snr_v)
+        mss.append(ms)
+    return B.tensor(mss)
+
+
+def average_metric(preds: Tensor, target: Tensor, metric_func: Callable):
+    # shape: preds [BATCH_SIZE, 1, Time] , target [BATCH_SIZE, 1, Time]
+    # or shape: preds [NUM_BATCHES*BATCH_SIZE, 1, Time] , target [NUM_BATCHES*BATCH_SIZE, 1, Time]
+    return metric_func(preds, target).mean()
+
+
+mireval_snr_zeromean = partial(bss_eval_images_snr, metric_func=mir_eval_bss_eval_images, zero_mean=True)
+mireval_snr_nozeromean = partial(bss_eval_images_snr, metric_func=mir_eval_bss_eval_images, zero_mean=False)
+
+
+@pytest.mark.parametrize(
+    "preds, target, sk_metric, zero_mean",
+    [
+        (inputs.preds, inputs.target, mireval_snr_zeromean, True),
+        (inputs.preds, inputs.target, mireval_snr_nozeromean, False),
+    ],
+)
+class TestSNR(MetricTester):
+    atol = 1e-2
+
+    @pytest.mark.parametrize("ddp", [True, False])
+    @pytest.mark.parametrize("dist_sync_on_step", [True, False])
+    def test_snr(self, preds, target, sk_metric, zero_mean, ddp, dist_sync_on_step):
+        self.run_class_metric_test(
+            ddp,
+            preds,
+            target,
+            SNR,
+            sk_metric=partial(average_metric, metric_func=sk_metric),
+            dist_sync_on_step=dist_sync_on_step,
+            metric_args=dict(zero_mean=zero_mean),
+        )
+
+    def test_snr_functional(self, preds, target, sk_metric, zero_mean):
+        self.run_functional_metric_test(
+            preds,
+            target,
+            snr,
+            sk_metric,
+            metric_args=dict(zero_mean=zero_mean),
+        )
+
+    def test_snr_differentiability(self, preds, target, sk_metric, zero_mean):
+        self.run_differentiability_test(
+            preds=preds, target=target, metric_module=SNR, metric_functional=snr, metric_args={"zero_mean": zero_mean}
+        )
+
+    @pytest.mark.skipif(
+        not _TORCH_GREATER_EQUAL_1_6, reason="half support of core operations on not support before pytorch v1.6"
+    )
+    def test_snr_half_cpu(self, preds, target, sk_metric, zero_mean):
+        pytest.xfail("SNR metric does not support cpu + half precision")
+
+    @pytest.mark.skipif(not B.cuda.is_available(), reason="test requires cuda")
+    def test_snr_half_gpu(self, preds, target, sk_metric, zero_mean):
+        self.run_precision_test_gpu(
+            preds=preds, target=target, metric_module=SNR, metric_functional=snr, metric_args={"zero_mean": zero_mean}
+        )
+
+
+def test_error_on_different_shape(metric_class=SNR):
+    metric = metric_class()
+    with pytest.raises(RuntimeError, match="Predictions and targets are expected to have the same shape"):
+        metric(B.randn(100), B.randn(50))
diff --git a/RE/paddlemetric/src/tests/audio/test_stoi.py b/RE/paddlemetric/src/tests/audio/test_stoi.py
new file mode 100644
index 00000000..70c7208b
--- /dev/null
+++ b/RE/paddlemetric/src/tests/audio/test_stoi.py
@@ -0,0 +1,146 @@
+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from collections import namedtuple
+from functools import partial
+
+import pytest
+import paddleext.torchapi as B
+from pystoi import stoi as stoi_backend
+from paddleext.torchapi import Tensor
+
+from tests.helpers import seed_all
+from tests.helpers.testers import MetricTester
+from paddlemetrics.audio import STOI
+from paddlemetrics.functional import stoi
+from paddlemetrics.utilities.imports import _TORCH_GREATER_EQUAL_1_6
+
+seed_all(42)
+
+Input = namedtuple("Input", ["preds", "target"])
+
+inputs_8k = Input(
+    preds=B.rand(2, 3, 8000),
+    target=B.rand(2, 3, 8000),
+)
+inputs_16k = Input(
+    preds=B.rand(2, 3, 16000),
+    target=B.rand(2, 3, 16000),
+)
+
+
+def stoi_original_batch(preds: Tensor, target: Tensor, fs: int, extended: bool):
+    # shape: preds [BATCH_SIZE, Time] , target [BATCH_SIZE, Time]
+    # or shape: preds [NUM_BATCHES*BATCH_SIZE, Time] , target [NUM_BATCHES*BATCH_SIZE, Time]
+    target = target.detach().cpu().numpy()
+    preds = preds.detach().cpu().numpy()
+    mss = []
+    for b in range(preds.shape[0]):
+        pesq_val = stoi_backend(target[b, ...], preds[b, ...], fs, extended)
+        mss.append(pesq_val)
+    return B.tensor(mss)
+
+
+def average_metric(preds, target, metric_func):
+    # shape: preds [BATCH_SIZE, 1, Time] , target [BATCH_SIZE, 1, Time]
+    # or shape: preds [NUM_BATCHES*BATCH_SIZE, 1, Time] , target [NUM_BATCHES*BATCH_SIZE, 1, Time]
+    return metric_func(preds, target).mean()
+
+
+stoi_original_batch_8k_ext = partial(stoi_original_batch, fs=8000, extended=True)
+stoi_original_batch_16k_ext = partial(stoi_original_batch, fs=16000, extended=True)
+stoi_original_batch_8k_noext = partial(stoi_original_batch, fs=8000, extended=False)
+stoi_original_batch_16k_noext = partial(stoi_original_batch, fs=16000, extended=False)
+
+
+@pytest.mark.parametrize(
+    "preds, target, sk_metric, fs, extended",
+    [
+        (inputs_8k.preds, inputs_8k.target, stoi_original_batch_8k_ext, 8000, True),
+        (inputs_16k.preds, inputs_16k.target, stoi_original_batch_16k_ext, 16000, True),
+        (inputs_8k.preds, inputs_8k.target, stoi_original_batch_8k_noext, 8000, False),
+        (inputs_16k.preds, inputs_16k.target, stoi_original_batch_16k_noext, 16000, False),
+    ],
+)
+class TestSTOI(MetricTester):
+    atol = 1e-2
+
+    @pytest.mark.parametrize("ddp", [True, False])
+    @pytest.mark.parametrize("dist_sync_on_step", [True, False])
+    def test_stoi(self, preds, target, sk_metric, fs, extended, ddp, dist_sync_on_step):
+        self.run_class_metric_test(
+            ddp,
+            preds,
+            target,
+            STOI,
+            sk_metric=partial(average_metric, metric_func=sk_metric),
+            dist_sync_on_step=dist_sync_on_step,
+            metric_args=dict(fs=fs, extended=extended),
+        )
+
+    def test_stoi_functional(self, preds, target, sk_metric, fs, extended):
+        self.run_functional_metric_test(
+            preds,
+            target,
+            stoi,
+            sk_metric,
+            metric_args=dict(fs=fs, extended=extended),
+        )
+
+    def test_stoi_differentiability(self, preds, target, sk_metric, fs, extended):
+        self.run_differentiability_test(
+            preds=preds,
+            target=target,
+            metric_module=STOI,
+            metric_functional=stoi,
+            metric_args=dict(fs=fs, extended=extended),
+        )
+
+    @pytest.mark.skipif(
+        not _TORCH_GREATER_EQUAL_1_6, reason="half support of core operations on not support before pytorch v1.6"
+    )
+    def test_stoi_half_cpu(self, preds, target, sk_metric, fs, extended):
+        pytest.xfail("STOI metric does not support cpu + half precision")
+
+    @pytest.mark.skipif(not B.cuda.is_available(), reason="test requires cuda")
+    def test_stoi_half_gpu(self, preds, target, sk_metric, fs, extended):
+        self.run_precision_test_gpu(
+            preds=preds,
+            target=target,
+            metric_module=STOI,
+            metric_functional=partial(stoi, fs=fs, extended=extended),
+            metric_args=dict(fs=fs, extended=extended),
+        )
+
+
+def test_error_on_different_shape(metric_class=STOI):
+    metric = metric_class(16000)
+    with pytest.raises(RuntimeError, match="Predictions and targets are expected to have the same shape"):
+        metric(B.randn(100), B.randn(50))
+
+
+def test_on_real_audio():
+    import os
+
+    from scipy.io import wavfile
+
+    current_file_dir = os.path.dirname(__file__)
+
+    rate, ref = wavfile.read(os.path.join(current_file_dir, "examples/audio_speech.wav"))
+    rate, deg = wavfile.read(os.path.join(current_file_dir, "examples/audio_speech_bab_0dB.wav"))
+    assert B.allclose(
+        stoi(B.from_numpy(deg), B.from_numpy(ref), rate).float(),
+        B.tensor(0.6739177),
+        rtol=0.0001,
+        atol=1e-4,
+    )
diff --git a/RE/paddlemetric/src/tests/bases/__init__.py b/RE/paddlemetric/src/tests/bases/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/RE/paddlemetric/src/tests/bases/test.log b/RE/paddlemetric/src/tests/bases/test.log
new file mode 100644
index 00000000..eb8e22c4
--- /dev/null
+++ b/RE/paddlemetric/src/tests/bases/test.log
@@ -0,0 +1,2764 @@
+============================= test session starts ==============================
+platform darwin -- Python 3.8.12, pytest-7.1.2, pluggy-1.0.0
+rootdir: /Users/sun/Projects/oix/baidu/ccl/paddlemetric/src/tests/bases
+plugins: hydra-core-1.1.0.dev5
+collected 86 items
+
+test_composition.py .....FFF.....FF.................FFFFFFFFFFFFFF.F....FFFFF.FFFFFFFFFFFFFFFFFF.F....FF..
+
+=================================== FAILURES ===================================
+_____________________ test_metrics_and[3-expected_result1] _____________________
+
+second_operand = 3
+expected_result = Tensor(shape=[], dtype=int32, place=Place(cpu), stop_gradient=True,
+       2)
+
+    @pytest.mark.parametrize(
+        ["second_operand", "expected_result"],
+        [(DummyMetric(3), tensor(2)), (3, tensor(2)), (3, tensor(2)), (tensor(3), tensor(2))],
+    )
+    @pytest.mark.skipif(**_MARK_TORCH_MIN_1_5)
+    def test_metrics_and(second_operand, expected_result):
+        first_metric = DummyMetric(2)
+    
+        final_and = first_metric & second_operand
+        final_rand = second_operand & first_metric
+    
+        assert isinstance(final_and, CompositionalMetric)
+        assert isinstance(final_rand, CompositionalMetric)
+    
+        final_and.update()
+        final_rand.update()
+>       assert B.allclose(expected_result, final_and.compute())
+
+test_composition.py:83: 
+_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
+../../paddlemetrics/metric.py:378: in wrapped_func
+    self._computed = compute(*args, **kwargs)
+../../paddlemetrics/metric.py:756: in compute
+    return self.op(val_a, val_b)
+../../../../../../torch2paddle/paddleext/torchapi/core.py:91: in paddle_func
+    return func(*args, **kwargs)
+/Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/tensor/logic.py:559: in bitwise_and
+    return _bitwise_op(
+_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
+
+op_name = 'bitwise_and'
+x = Tensor(shape=[], dtype=int32, place=Place(cpu), stop_gradient=True,
+       2)
+y = 3, out = None, name = None, binary_op = True
+
+    def _bitwise_op(op_name, x, y, out=None, name=None, binary_op=True):
+        if paddle.in_dynamic_mode():
+            op = getattr(_C_ops, op_name)
+            if binary_op:
+>               return op(x, y)
+E               ValueError: (InvalidArgument) bitwise_and(): argument 'Y' (position 1) must be Tensor, but got int (at /Users/paddle/work/pengyuqi/Paddle/paddle/fluid/pybind/op_function_common.cc:737)
+
+/Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/tensor/logic.py:505: ValueError
+_____________________ test_metrics_and[3-expected_result2] _____________________
+
+second_operand = 3
+expected_result = Tensor(shape=[], dtype=int32, place=Place(cpu), stop_gradient=True,
+       2)
+
+    @pytest.mark.parametrize(
+        ["second_operand", "expected_result"],
+        [(DummyMetric(3), tensor(2)), (3, tensor(2)), (3, tensor(2)), (tensor(3), tensor(2))],
+    )
+    @pytest.mark.skipif(**_MARK_TORCH_MIN_1_5)
+    def test_metrics_and(second_operand, expected_result):
+        first_metric = DummyMetric(2)
+    
+        final_and = first_metric & second_operand
+        final_rand = second_operand & first_metric
+    
+        assert isinstance(final_and, CompositionalMetric)
+        assert isinstance(final_rand, CompositionalMetric)
+    
+        final_and.update()
+        final_rand.update()
+>       assert B.allclose(expected_result, final_and.compute())
+
+test_composition.py:83: 
+_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
+../../paddlemetrics/metric.py:378: in wrapped_func
+    self._computed = compute(*args, **kwargs)
+../../paddlemetrics/metric.py:756: in compute
+    return self.op(val_a, val_b)
+../../../../../../torch2paddle/paddleext/torchapi/core.py:91: in paddle_func
+    return func(*args, **kwargs)
+/Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/tensor/logic.py:559: in bitwise_and
+    return _bitwise_op(
+_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
+
+op_name = 'bitwise_and'
+x = Tensor(shape=[], dtype=int32, place=Place(cpu), stop_gradient=True,
+       2)
+y = 3, out = None, name = None, binary_op = True
+
+    def _bitwise_op(op_name, x, y, out=None, name=None, binary_op=True):
+        if paddle.in_dynamic_mode():
+            op = getattr(_C_ops, op_name)
+            if binary_op:
+>               return op(x, y)
+E               ValueError: (InvalidArgument) bitwise_and(): argument 'Y' (position 1) must be Tensor, but got int (at /Users/paddle/work/pengyuqi/Paddle/paddle/fluid/pybind/op_function_common.cc:737)
+
+/Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/tensor/logic.py:505: ValueError
+______________ test_metrics_and[second_operand3-expected_result3] ______________
+
+second_operand = Tensor(shape=[], dtype=int32, place=Place(cpu), stop_gradient=True,
+       3)
+expected_result = Tensor(shape=[], dtype=int32, place=Place(cpu), stop_gradient=True,
+       2)
+
+    @pytest.mark.parametrize(
+        ["second_operand", "expected_result"],
+        [(DummyMetric(3), tensor(2)), (3, tensor(2)), (3, tensor(2)), (tensor(3), tensor(2))],
+    )
+    @pytest.mark.skipif(**_MARK_TORCH_MIN_1_5)
+    def test_metrics_and(second_operand, expected_result):
+        first_metric = DummyMetric(2)
+    
+        final_and = first_metric & second_operand
+>       final_rand = second_operand & first_metric
+
+test_composition.py:76: 
+_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
+../../../../../../torch2paddle/paddleext/torchapi/tensor_.py:361: in __and__
+    return paddle.logical_or(self.bool(), other.bool())
+_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
+
+self = DummyMetric(), name = 'bool'
+
+    def __getattr__(self, name):
+        if '_parameters' in self.__dict__:
+            _parameters = self.__dict__['_parameters']
+            if name in self._parameters:
+                if in_declarative_mode():
+                    return _convert_into_variable(self._parameters[name])
+                return self._parameters[name]
+        if '_sub_layers' in self.__dict__:
+            _sub_layers = self.__dict__['_sub_layers']
+            if name in self._sub_layers:
+                return self._sub_layers[name]
+        if '_buffers' in self.__dict__:
+            _buffers = self.__dict__['_buffers']
+            if name in _buffers:
+                if in_declarative_mode():
+                    return _convert_into_variable(_buffers[name])
+                return _buffers[name]
+>       return object.__getattribute__(self, name)
+E       AttributeError: 'DummyMetric' object has no attribute 'bool'
+
+/Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/dygraph/layers.py:1123: AttributeError
+__________________ test_metrics_floordiv[2-expected_result1] ___________________
+
+second_operand = 2
+expected_result = Tensor(shape=[], dtype=int32, place=Place(cpu), stop_gradient=True,
+       2)
+
+    @pytest.mark.parametrize(
+        ["second_operand", "expected_result"],
+        [
+            (DummyMetric(2), tensor(2)),
+            (2, tensor(2)),
+            (2.0, tensor(2.0)),
+            (tensor(2), tensor(2)),
+        ],
+    )
+    @pytest.mark.skipif(**_MARK_TORCH_MIN_1_5)
+    def test_metrics_floordiv(second_operand, expected_result):
+        first_metric = DummyMetric(5)
+    
+        final_floordiv = first_metric // second_operand
+    
+        assert isinstance(final_floordiv, CompositionalMetric)
+    
+        final_floordiv.update()
+>       assert B.allclose(expected_result, final_floordiv.compute())
+
+test_composition.py:126: 
+_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
+../../paddlemetrics/metric.py:378: in wrapped_func
+    self._computed = compute(*args, **kwargs)
+../../paddlemetrics/metric.py:756: in compute
+    return self.op(val_a, val_b)
+../../../../../../torch2paddle/paddleext/torchapi/core.py:91: in paddle_func
+    return func(*args, **kwargs)
+/Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/tensor/math.py:460: in floor_divide
+    return _elementwise_op_in_dygraph(
+/Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/decorator.py:232: in fun
+    return caller(func, *(extras + args), **kw)
+/Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/wrapped_decorator.py:25: in __impl__
+    return wrapped_func(*args, **kwargs)
+/Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/framework.py:434: in __impl__
+    return func(*args, **kwargs)
+_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
+
+x = Tensor(shape=[], dtype=int32, place=Place(cpu), stop_gradient=True,
+       5)
+y = 2, axis = -1, act = None, use_mkldnn = False
+op_name = 'elementwise_floordiv'
+
+    @dygraph_only
+    def _elementwise_op_in_dygraph(x,
+                                   y,
+                                   axis=-1,
+                                   act=None,
+                                   use_mkldnn=False,
+                                   op_name=None):
+        def is_inplace(op_name):
+            return  op_name[-1] == "_"
+    
+        if op_name not in OP_NAMEMAPPING.keys():
+            op = getattr(_C_ops, op_name)
+            out = op(x, y, 'axis', axis, 'use_mkldnn', use_mkldnn)
+        else:
+            if in_dygraph_mode():
+                op = getattr(_C_ops, OP_NAMEMAPPING[op_name] if not is_inplace(op_name) else op_name)
+                out = op(x, y)
+    
+            if _in_legacy_dygraph():
+                op = getattr(_C_ops, op_name)
+>               out = op(x, y, 'axis', axis, 'use_mkldnn', use_mkldnn)
+E               ValueError: (InvalidArgument) elementwise_floordiv(): argument 'Y' (position 1) must be Tensor, but got int (at /Users/paddle/work/pengyuqi/Paddle/paddle/fluid/pybind/op_function_common.cc:737)
+
+/Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/tensor/math.py:218: ValueError
+_________________ test_metrics_floordiv[2.0-expected_result2] __________________
+
+second_operand = 2.0
+expected_result = Tensor(shape=[], dtype=float32, place=Place(cpu), stop_gradient=True,
+       2.)
+
+    @pytest.mark.parametrize(
+        ["second_operand", "expected_result"],
+        [
+            (DummyMetric(2), tensor(2)),
+            (2, tensor(2)),
+            (2.0, tensor(2.0)),
+            (tensor(2), tensor(2)),
+        ],
+    )
+    @pytest.mark.skipif(**_MARK_TORCH_MIN_1_5)
+    def test_metrics_floordiv(second_operand, expected_result):
+        first_metric = DummyMetric(5)
+    
+        final_floordiv = first_metric // second_operand
+    
+        assert isinstance(final_floordiv, CompositionalMetric)
+    
+        final_floordiv.update()
+>       assert B.allclose(expected_result, final_floordiv.compute())
+
+test_composition.py:126: 
+_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
+../../paddlemetrics/metric.py:378: in wrapped_func
+    self._computed = compute(*args, **kwargs)
+../../paddlemetrics/metric.py:756: in compute
+    return self.op(val_a, val_b)
+../../../../../../torch2paddle/paddleext/torchapi/core.py:91: in paddle_func
+    return func(*args, **kwargs)
+/Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/tensor/math.py:460: in floor_divide
+    return _elementwise_op_in_dygraph(
+/Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/decorator.py:232: in fun
+    return caller(func, *(extras + args), **kw)
+/Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/wrapped_decorator.py:25: in __impl__
+    return wrapped_func(*args, **kwargs)
+/Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/framework.py:434: in __impl__
+    return func(*args, **kwargs)
+_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
+
+x = Tensor(shape=[], dtype=int32, place=Place(cpu), stop_gradient=True,
+       5)
+y = 2.0, axis = -1, act = None, use_mkldnn = False
+op_name = 'elementwise_floordiv'
+
+    @dygraph_only
+    def _elementwise_op_in_dygraph(x,
+                                   y,
+                                   axis=-1,
+                                   act=None,
+                                   use_mkldnn=False,
+                                   op_name=None):
+        def is_inplace(op_name):
+            return  op_name[-1] == "_"
+    
+        if op_name not in OP_NAMEMAPPING.keys():
+            op = getattr(_C_ops, op_name)
+            out = op(x, y, 'axis', axis, 'use_mkldnn', use_mkldnn)
+        else:
+            if in_dygraph_mode():
+                op = getattr(_C_ops, OP_NAMEMAPPING[op_name] if not is_inplace(op_name) else op_name)
+                out = op(x, y)
+    
+            if _in_legacy_dygraph():
+                op = getattr(_C_ops, op_name)
+>               out = op(x, y, 'axis', axis, 'use_mkldnn', use_mkldnn)
+E               ValueError: (InvalidArgument) elementwise_floordiv(): argument 'Y' (position 1) must be Tensor, but got float (at /Users/paddle/work/pengyuqi/Paddle/paddle/fluid/pybind/op_function_common.cc:737)
+
+/Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/tensor/math.py:218: ValueError
+____________ test_metrics_matmul[second_operand0-expected_result0] _____________
+
+second_operand = DummyMetric()
+expected_result = Tensor(shape=[], dtype=int32, place=Place(cpu), stop_gradient=True,
+       12)
+
+    @pytest.mark.parametrize(
+        ["second_operand", "expected_result"],
+        [(DummyMetric([2, 2, 2]), tensor(12)), (tensor([2, 2, 2]), tensor(12))],
+    )
+    def test_metrics_matmul(second_operand, expected_result):
+        first_metric = DummyMetric([2, 2, 2])
+    
+        final_matmul = first_metric @ second_operand
+    
+        assert isinstance(final_matmul, CompositionalMetric)
+    
+        final_matmul.update()
+>       assert B.allclose(expected_result, final_matmul.compute())
+
+test_composition.py:225: 
+_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
+../../paddlemetrics/metric.py:378: in wrapped_func
+    self._computed = compute(*args, **kwargs)
+../../paddlemetrics/metric.py:756: in compute
+    return self.op(val_a, val_b)
+../../../../../../torch2paddle/paddleext/torchapi/core.py:91: in paddle_func
+    return func(*args, **kwargs)
+_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
+
+x = Tensor(shape=[3], dtype=int64, place=Place(cpu), stop_gradient=True,
+       [2, 2, 2])
+y = Tensor(shape=[3], dtype=int64, place=Place(cpu), stop_gradient=True,
+       [2, 2, 2])
+transpose_x = False, transpose_y = False, name = None
+
+    def matmul(x, y, transpose_x=False, transpose_y=False, name=None):
+        """
+        Applies matrix multiplication to two tensors. `matmul` follows
+        the complete broadcast rules,
+        and its behavior is consistent with `np.matmul`.
+    
+        Currently, the input tensors' number of dimensions can be any, `matmul` can be used to
+        achieve the `dot`, `matmul` and `batchmatmul`.
+    
+        The actual behavior depends on the shapes of :math:`x`, :math:`y` and the
+        flag values of :attr:`transpose_x`, :attr:`transpose_y`. Specifically:
+    
+        - If a transpose flag is specified, the last two dimensions of the tensor
+          are transposed. If the tensor is ndim-1 of shape, the transpose is invalid. If the tensor
+          is ndim-1 of shape :math:`[D]`, then for :math:`x` it is treated as :math:`[1, D]`, whereas
+          for :math:`y` it is the opposite: It is treated as :math:`[D, 1]`.
+    
+        The multiplication behavior depends on the dimensions of `x` and `y`. Specifically:
+    
+        - If both tensors are 1-dimensional, the dot product result is obtained.
+    
+        - If both tensors are 2-dimensional, the matrix-matrix product is obtained.
+    
+        - If the `x` is 1-dimensional and the `y` is 2-dimensional,
+          a `1` is prepended to its dimension in order to conduct the matrix multiply.
+          After the matrix multiply, the prepended dimension is removed.
+    
+        - If the `x` is 2-dimensional and `y` is 1-dimensional,
+          the matrix-vector product is obtained.
+    
+        - If both arguments are at least 1-dimensional and at least one argument
+          is N-dimensional (where N > 2), then a batched matrix multiply is obtained.
+          If the first argument is 1-dimensional, a 1 is prepended to its dimension
+          in order to conduct the batched matrix multiply and removed after.
+          If the second argument is 1-dimensional, a 1 is appended to its
+          dimension for the purpose of the batched matrix multiple and removed after.
+          The non-matrix (exclude the last two dimensions) dimensions are
+          broadcasted according the broadcast rule.
+          For example, if input is a (j, 1, n, m) tensor and the other is a (k, m, p) tensor,
+          out will be a (j, k, n, p) tensor.
+    
+        Args:
+            x (Tensor): The input tensor which is a Tensor.
+            y (Tensor): The input tensor which is a Tensor.
+            transpose_x (bool): Whether to transpose :math:`x` before multiplication.
+            transpose_y (bool): Whether to transpose :math:`y` before multiplication.
+            name(str|None): A name for this layer(optional). If set None, the layer
+                will be named automatically.
+    
+        Returns:
+            Tensor: The output Tensor.
+    
+        Examples:
+    
+        .. code-block:: python
+    
+            import paddle
+            import numpy as np
+    
+            # vector * vector
+            x_data = np.random.random([10]).astype(np.float32)
+            y_data = np.random.random([10]).astype(np.float32)
+            x = paddle.to_tensor(x_data)
+            y = paddle.to_tensor(y_data)
+            z = paddle.matmul(x, y)
+            print(z.numpy().shape)
+            # [1]
+    
+            # matrix * vector
+            x_data = np.random.random([10, 5]).astype(np.float32)
+            y_data = np.random.random([5]).astype(np.float32)
+            x = paddle.to_tensor(x_data)
+            y = paddle.to_tensor(y_data)
+            z = paddle.matmul(x, y)
+            print(z.numpy().shape)
+            # [10]
+    
+            # batched matrix * broadcasted vector
+            x_data = np.random.random([10, 5, 2]).astype(np.float32)
+            y_data = np.random.random([2]).astype(np.float32)
+            x = paddle.to_tensor(x_data)
+            y = paddle.to_tensor(y_data)
+            z = paddle.matmul(x, y)
+            print(z.numpy().shape)
+            # [10, 5]
+    
+            # batched matrix * batched matrix
+            x_data = np.random.random([10, 5, 2]).astype(np.float32)
+            y_data = np.random.random([10, 2, 5]).astype(np.float32)
+            x = paddle.to_tensor(x_data)
+            y = paddle.to_tensor(y_data)
+            z = paddle.matmul(x, y)
+            print(z.numpy().shape)
+            # [10, 5, 5]
+    
+            # batched matrix * broadcasted matrix
+            x_data = np.random.random([10, 1, 5, 2]).astype(np.float32)
+            y_data = np.random.random([1, 3, 2, 5]).astype(np.float32)
+            x = paddle.to_tensor(x_data)
+            y = paddle.to_tensor(y_data)
+            z = paddle.matmul(x, y)
+            print(z.numpy().shape)
+            # [10, 3, 5, 5]
+    
+        """
+        if in_dygraph_mode():
+            return _C_ops.final_state_matmul(x, y, transpose_x, transpose_y)
+    
+        if _in_legacy_dygraph():
+            op_type = 'matmul_v2'
+            op = getattr(_C_ops, op_type)
+>           return op(x, y, 'trans_x', transpose_x, 'trans_y', transpose_y)
+E           RuntimeError: (NotFound) There are no kernels which are registered in the matmul_v2 operator.
+E             [Hint: Expected kernels_iter != all_op_kernels.end(), but received kernels_iter == all_op_kernels.end().] (at /Users/paddle/work/pengyuqi/Paddle/paddle/fluid/imperative/prepared_operator.cc:327)
+E             [operator < matmul_v2 > error]
+
+/Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/tensor/linalg.py:145: RuntimeError
+____________ test_metrics_matmul[second_operand1-expected_result1] _____________
+
+second_operand = Tensor(shape=[3], dtype=int64, place=Place(cpu), stop_gradient=True,
+       [2, 2, 2])
+expected_result = Tensor(shape=[], dtype=int32, place=Place(cpu), stop_gradient=True,
+       12)
+
+    @pytest.mark.parametrize(
+        ["second_operand", "expected_result"],
+        [(DummyMetric([2, 2, 2]), tensor(12)), (tensor([2, 2, 2]), tensor(12))],
+    )
+    def test_metrics_matmul(second_operand, expected_result):
+        first_metric = DummyMetric([2, 2, 2])
+    
+        final_matmul = first_metric @ second_operand
+    
+        assert isinstance(final_matmul, CompositionalMetric)
+    
+        final_matmul.update()
+>       assert B.allclose(expected_result, final_matmul.compute())
+
+test_composition.py:225: 
+_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
+../../paddlemetrics/metric.py:378: in wrapped_func
+    self._computed = compute(*args, **kwargs)
+../../paddlemetrics/metric.py:756: in compute
+    return self.op(val_a, val_b)
+../../../../../../torch2paddle/paddleext/torchapi/core.py:91: in paddle_func
+    return func(*args, **kwargs)
+_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
+
+x = Tensor(shape=[3], dtype=int64, place=Place(cpu), stop_gradient=True,
+       [2, 2, 2])
+y = Tensor(shape=[3], dtype=int64, place=Place(cpu), stop_gradient=True,
+       [2, 2, 2])
+transpose_x = False, transpose_y = False, name = None
+
+    def matmul(x, y, transpose_x=False, transpose_y=False, name=None):
+        """
+        Applies matrix multiplication to two tensors. `matmul` follows
+        the complete broadcast rules,
+        and its behavior is consistent with `np.matmul`.
+    
+        Currently, the input tensors' number of dimensions can be any, `matmul` can be used to
+        achieve the `dot`, `matmul` and `batchmatmul`.
+    
+        The actual behavior depends on the shapes of :math:`x`, :math:`y` and the
+        flag values of :attr:`transpose_x`, :attr:`transpose_y`. Specifically:
+    
+        - If a transpose flag is specified, the last two dimensions of the tensor
+          are transposed. If the tensor is ndim-1 of shape, the transpose is invalid. If the tensor
+          is ndim-1 of shape :math:`[D]`, then for :math:`x` it is treated as :math:`[1, D]`, whereas
+          for :math:`y` it is the opposite: It is treated as :math:`[D, 1]`.
+    
+        The multiplication behavior depends on the dimensions of `x` and `y`. Specifically:
+    
+        - If both tensors are 1-dimensional, the dot product result is obtained.
+    
+        - If both tensors are 2-dimensional, the matrix-matrix product is obtained.
+    
+        - If the `x` is 1-dimensional and the `y` is 2-dimensional,
+          a `1` is prepended to its dimension in order to conduct the matrix multiply.
+          After the matrix multiply, the prepended dimension is removed.
+    
+        - If the `x` is 2-dimensional and `y` is 1-dimensional,
+          the matrix-vector product is obtained.
+    
+        - If both arguments are at least 1-dimensional and at least one argument
+          is N-dimensional (where N > 2), then a batched matrix multiply is obtained.
+          If the first argument is 1-dimensional, a 1 is prepended to its dimension
+          in order to conduct the batched matrix multiply and removed after.
+          If the second argument is 1-dimensional, a 1 is appended to its
+          dimension for the purpose of the batched matrix multiple and removed after.
+          The non-matrix (exclude the last two dimensions) dimensions are
+          broadcasted according the broadcast rule.
+          For example, if input is a (j, 1, n, m) tensor and the other is a (k, m, p) tensor,
+          out will be a (j, k, n, p) tensor.
+    
+        Args:
+            x (Tensor): The input tensor which is a Tensor.
+            y (Tensor): The input tensor which is a Tensor.
+            transpose_x (bool): Whether to transpose :math:`x` before multiplication.
+            transpose_y (bool): Whether to transpose :math:`y` before multiplication.
+            name(str|None): A name for this layer(optional). If set None, the layer
+                will be named automatically.
+    
+        Returns:
+            Tensor: The output Tensor.
+    
+        Examples:
+    
+        .. code-block:: python
+    
+            import paddle
+            import numpy as np
+    
+            # vector * vector
+            x_data = np.random.random([10]).astype(np.float32)
+            y_data = np.random.random([10]).astype(np.float32)
+            x = paddle.to_tensor(x_data)
+            y = paddle.to_tensor(y_data)
+            z = paddle.matmul(x, y)
+            print(z.numpy().shape)
+            # [1]
+    
+            # matrix * vector
+            x_data = np.random.random([10, 5]).astype(np.float32)
+            y_data = np.random.random([5]).astype(np.float32)
+            x = paddle.to_tensor(x_data)
+            y = paddle.to_tensor(y_data)
+            z = paddle.matmul(x, y)
+            print(z.numpy().shape)
+            # [10]
+    
+            # batched matrix * broadcasted vector
+            x_data = np.random.random([10, 5, 2]).astype(np.float32)
+            y_data = np.random.random([2]).astype(np.float32)
+            x = paddle.to_tensor(x_data)
+            y = paddle.to_tensor(y_data)
+            z = paddle.matmul(x, y)
+            print(z.numpy().shape)
+            # [10, 5]
+    
+            # batched matrix * batched matrix
+            x_data = np.random.random([10, 5, 2]).astype(np.float32)
+            y_data = np.random.random([10, 2, 5]).astype(np.float32)
+            x = paddle.to_tensor(x_data)
+            y = paddle.to_tensor(y_data)
+            z = paddle.matmul(x, y)
+            print(z.numpy().shape)
+            # [10, 5, 5]
+    
+            # batched matrix * broadcasted matrix
+            x_data = np.random.random([10, 1, 5, 2]).astype(np.float32)
+            y_data = np.random.random([1, 3, 2, 5]).astype(np.float32)
+            x = paddle.to_tensor(x_data)
+            y = paddle.to_tensor(y_data)
+            z = paddle.matmul(x, y)
+            print(z.numpy().shape)
+            # [10, 3, 5, 5]
+    
+        """
+        if in_dygraph_mode():
+            return _C_ops.final_state_matmul(x, y, transpose_x, transpose_y)
+    
+        if _in_legacy_dygraph():
+            op_type = 'matmul_v2'
+            op = getattr(_C_ops, op_type)
+>           return op(x, y, 'trans_x', transpose_x, 'trans_y', transpose_y)
+E           RuntimeError: (NotFound) There are no kernels which are registered in the matmul_v2 operator.
+E             [Hint: Expected kernels_iter != all_op_kernels.end(), but received kernels_iter == all_op_kernels.end().] (at /Users/paddle/work/pengyuqi/Paddle/paddle/fluid/imperative/prepared_operator.cc:327)
+E             [operator < matmul_v2 > error]
+
+/Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/tensor/linalg.py:145: RuntimeError
+______________ test_metrics_mod[second_operand0-expected_result0] ______________
+
+second_operand = DummyMetric()
+expected_result = Tensor(shape=[], dtype=int32, place=Place(cpu), stop_gradient=True,
+       1)
+
+    @pytest.mark.parametrize(
+        ["second_operand", "expected_result"],
+        [
+            (DummyMetric(2), tensor(1)),
+            (2, tensor(1)),
+            (2.0, tensor(1)),
+            (tensor(2), tensor(1)),
+        ],
+    )
+    def test_metrics_mod(second_operand, expected_result):
+        first_metric = DummyMetric(5)
+    
+>       final_mod = first_metric % second_operand
+
+test_composition.py:240: 
+_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
+../../paddlemetrics/metric.py:616: in __mod__
+    return CompositionalMetric(B.fmod, self, other)
+_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
+
+self = <paddleext.torchapi.ModuleProxy object at 0x130bfa040>, args = ('fmod',)
+kwargs = {}
+
+    def __getattribute__(self, *args, **kwargs):
+        # Perform custom logic here
+    
+>       obj = object.__getattribute__(this_module, *args, **kwargs)
+E       AttributeError: 'module' object has no attribute 'fmod'
+
+../../../../../../torch2paddle/paddleext/torchapi/__init__.py:20: AttributeError
+_____________________ test_metrics_mod[2-expected_result1] _____________________
+
+second_operand = 2
+expected_result = Tensor(shape=[], dtype=int32, place=Place(cpu), stop_gradient=True,
+       1)
+
+    @pytest.mark.parametrize(
+        ["second_operand", "expected_result"],
+        [
+            (DummyMetric(2), tensor(1)),
+            (2, tensor(1)),
+            (2.0, tensor(1)),
+            (tensor(2), tensor(1)),
+        ],
+    )
+    def test_metrics_mod(second_operand, expected_result):
+        first_metric = DummyMetric(5)
+    
+>       final_mod = first_metric % second_operand
+
+test_composition.py:240: 
+_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
+../../paddlemetrics/metric.py:616: in __mod__
+    return CompositionalMetric(B.fmod, self, other)
+_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
+
+self = <paddleext.torchapi.ModuleProxy object at 0x130bfa040>, args = ('fmod',)
+kwargs = {}
+
+    def __getattribute__(self, *args, **kwargs):
+        # Perform custom logic here
+    
+>       obj = object.__getattribute__(this_module, *args, **kwargs)
+E       AttributeError: 'module' object has no attribute 'fmod'
+
+../../../../../../torch2paddle/paddleext/torchapi/__init__.py:20: AttributeError
+____________________ test_metrics_mod[2.0-expected_result2] ____________________
+
+second_operand = 2.0
+expected_result = Tensor(shape=[], dtype=int32, place=Place(cpu), stop_gradient=True,
+       1)
+
+    @pytest.mark.parametrize(
+        ["second_operand", "expected_result"],
+        [
+            (DummyMetric(2), tensor(1)),
+            (2, tensor(1)),
+            (2.0, tensor(1)),
+            (tensor(2), tensor(1)),
+        ],
+    )
+    def test_metrics_mod(second_operand, expected_result):
+        first_metric = DummyMetric(5)
+    
+>       final_mod = first_metric % second_operand
+
+test_composition.py:240: 
+_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
+../../paddlemetrics/metric.py:616: in __mod__
+    return CompositionalMetric(B.fmod, self, other)
+_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
+
+self = <paddleext.torchapi.ModuleProxy object at 0x130bfa040>, args = ('fmod',)
+kwargs = {}
+
+    def __getattribute__(self, *args, **kwargs):
+        # Perform custom logic here
+    
+>       obj = object.__getattribute__(this_module, *args, **kwargs)
+E       AttributeError: 'module' object has no attribute 'fmod'
+
+../../../../../../torch2paddle/paddleext/torchapi/__init__.py:20: AttributeError
+______________ test_metrics_mod[second_operand3-expected_result3] ______________
+
+second_operand = Tensor(shape=[], dtype=int32, place=Place(cpu), stop_gradient=True,
+       2)
+expected_result = Tensor(shape=[], dtype=int32, place=Place(cpu), stop_gradient=True,
+       1)
+
+    @pytest.mark.parametrize(
+        ["second_operand", "expected_result"],
+        [
+            (DummyMetric(2), tensor(1)),
+            (2, tensor(1)),
+            (2.0, tensor(1)),
+            (tensor(2), tensor(1)),
+        ],
+    )
+    def test_metrics_mod(second_operand, expected_result):
+        first_metric = DummyMetric(5)
+    
+>       final_mod = first_metric % second_operand
+
+test_composition.py:240: 
+_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
+../../paddlemetrics/metric.py:616: in __mod__
+    return CompositionalMetric(B.fmod, self, other)
+_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
+
+self = <paddleext.torchapi.ModuleProxy object at 0x130bfa040>, args = ('fmod',)
+kwargs = {}
+
+    def __getattribute__(self, *args, **kwargs):
+        # Perform custom logic here
+    
+>       obj = object.__getattribute__(this_module, *args, **kwargs)
+E       AttributeError: 'module' object has no attribute 'fmod'
+
+../../../../../../torch2paddle/paddleext/torchapi/__init__.py:20: AttributeError
+______________ test_metrics_mul[second_operand0-expected_result0] ______________
+
+second_operand = DummyMetric()
+expected_result = Tensor(shape=[], dtype=int32, place=Place(cpu), stop_gradient=True,
+       4)
+
+    @pytest.mark.parametrize(
+        ["second_operand", "expected_result"],
+        [
+            (DummyMetric(2), tensor(4)),
+            (2, tensor(4)),
+            (2.0, tensor(4.0)),
+            pytest.param(tensor(2), tensor(4), marks=pytest.mark.skipif(**_MARK_TORCH_MIN_1_4)),
+        ],
+    )
+    def test_metrics_mul(second_operand, expected_result):
+        first_metric = DummyMetric(2)
+    
+>       final_mul = first_metric * second_operand
+
+test_composition.py:261: 
+_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
+../../paddlemetrics/metric.py:619: in __mul__
+    return CompositionalMetric(B.mul, self, other)
+_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
+
+self = <paddleext.torchapi.ModuleProxy object at 0x130bfa040>, args = ('mul',)
+kwargs = {}
+
+    def __getattribute__(self, *args, **kwargs):
+        # Perform custom logic here
+    
+>       obj = object.__getattribute__(this_module, *args, **kwargs)
+E       AttributeError: 'module' object has no attribute 'mul'
+
+../../../../../../torch2paddle/paddleext/torchapi/__init__.py:20: AttributeError
+_____________________ test_metrics_mul[2-expected_result1] _____________________
+
+second_operand = 2
+expected_result = Tensor(shape=[], dtype=int32, place=Place(cpu), stop_gradient=True,
+       4)
+
+    @pytest.mark.parametrize(
+        ["second_operand", "expected_result"],
+        [
+            (DummyMetric(2), tensor(4)),
+            (2, tensor(4)),
+            (2.0, tensor(4.0)),
+            pytest.param(tensor(2), tensor(4), marks=pytest.mark.skipif(**_MARK_TORCH_MIN_1_4)),
+        ],
+    )
+    def test_metrics_mul(second_operand, expected_result):
+        first_metric = DummyMetric(2)
+    
+>       final_mul = first_metric * second_operand
+
+test_composition.py:261: 
+_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
+../../paddlemetrics/metric.py:619: in __mul__
+    return CompositionalMetric(B.mul, self, other)
+_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
+
+self = <paddleext.torchapi.ModuleProxy object at 0x130bfa040>, args = ('mul',)
+kwargs = {}
+
+    def __getattribute__(self, *args, **kwargs):
+        # Perform custom logic here
+    
+>       obj = object.__getattribute__(this_module, *args, **kwargs)
+E       AttributeError: 'module' object has no attribute 'mul'
+
+../../../../../../torch2paddle/paddleext/torchapi/__init__.py:20: AttributeError
+____________________ test_metrics_mul[2.0-expected_result2] ____________________
+
+second_operand = 2.0
+expected_result = Tensor(shape=[], dtype=float32, place=Place(cpu), stop_gradient=True,
+       4.)
+
+    @pytest.mark.parametrize(
+        ["second_operand", "expected_result"],
+        [
+            (DummyMetric(2), tensor(4)),
+            (2, tensor(4)),
+            (2.0, tensor(4.0)),
+            pytest.param(tensor(2), tensor(4), marks=pytest.mark.skipif(**_MARK_TORCH_MIN_1_4)),
+        ],
+    )
+    def test_metrics_mul(second_operand, expected_result):
+        first_metric = DummyMetric(2)
+    
+>       final_mul = first_metric * second_operand
+
+test_composition.py:261: 
+_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
+../../paddlemetrics/metric.py:619: in __mul__
+    return CompositionalMetric(B.mul, self, other)
+_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
+
+self = <paddleext.torchapi.ModuleProxy object at 0x130bfa040>, args = ('mul',)
+kwargs = {}
+
+    def __getattribute__(self, *args, **kwargs):
+        # Perform custom logic here
+    
+>       obj = object.__getattribute__(this_module, *args, **kwargs)
+E       AttributeError: 'module' object has no attribute 'mul'
+
+../../../../../../torch2paddle/paddleext/torchapi/__init__.py:20: AttributeError
+______________ test_metrics_mul[second_operand3-expected_result3] ______________
+
+second_operand = Tensor(shape=[], dtype=int32, place=Place(cpu), stop_gradient=True,
+       2)
+expected_result = Tensor(shape=[], dtype=int32, place=Place(cpu), stop_gradient=True,
+       4)
+
+    @pytest.mark.parametrize(
+        ["second_operand", "expected_result"],
+        [
+            (DummyMetric(2), tensor(4)),
+            (2, tensor(4)),
+            (2.0, tensor(4.0)),
+            pytest.param(tensor(2), tensor(4), marks=pytest.mark.skipif(**_MARK_TORCH_MIN_1_4)),
+        ],
+    )
+    def test_metrics_mul(second_operand, expected_result):
+        first_metric = DummyMetric(2)
+    
+>       final_mul = first_metric * second_operand
+
+test_composition.py:261: 
+_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
+../../paddlemetrics/metric.py:619: in __mul__
+    return CompositionalMetric(B.mul, self, other)
+_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
+
+self = <paddleext.torchapi.ModuleProxy object at 0x130bfa040>, args = ('mul',)
+kwargs = {}
+
+    def __getattribute__(self, *args, **kwargs):
+        # Perform custom logic here
+    
+>       obj = object.__getattribute__(this_module, *args, **kwargs)
+E       AttributeError: 'module' object has no attribute 'mul'
+
+../../../../../../torch2paddle/paddleext/torchapi/__init__.py:20: AttributeError
+______________ test_metrics_ne[second_operand0-expected_result0] _______________
+
+second_operand = DummyMetric()
+expected_result = Tensor(shape=[], dtype=bool, place=Place(cpu), stop_gradient=True,
+       False)
+
+    @pytest.mark.parametrize(
+        ["second_operand", "expected_result"],
+        [
+            (DummyMetric(2), tensor(False)),
+            (2, tensor(False)),
+            (2.0, tensor(False)),
+            (tensor(2), tensor(False)),
+        ],
+    )
+    def test_metrics_ne(second_operand, expected_result):
+        first_metric = DummyMetric(2)
+    
+>       final_ne = first_metric != second_operand
+
+test_composition.py:285: 
+_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
+../../paddlemetrics/metric.py:623: in __ne__
+    return CompositionalMetric(B.ne, self, other)
+_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
+
+self = <paddleext.torchapi.ModuleProxy object at 0x130bfa040>, args = ('ne',)
+kwargs = {}
+
+    def __getattribute__(self, *args, **kwargs):
+        # Perform custom logic here
+    
+>       obj = object.__getattribute__(this_module, *args, **kwargs)
+E       AttributeError: 'module' object has no attribute 'ne'
+
+../../../../../../torch2paddle/paddleext/torchapi/__init__.py:20: AttributeError
+_____________________ test_metrics_ne[2-expected_result1] ______________________
+
+second_operand = 2
+expected_result = Tensor(shape=[], dtype=bool, place=Place(cpu), stop_gradient=True,
+       False)
+
+    @pytest.mark.parametrize(
+        ["second_operand", "expected_result"],
+        [
+            (DummyMetric(2), tensor(False)),
+            (2, tensor(False)),
+            (2.0, tensor(False)),
+            (tensor(2), tensor(False)),
+        ],
+    )
+    def test_metrics_ne(second_operand, expected_result):
+        first_metric = DummyMetric(2)
+    
+>       final_ne = first_metric != second_operand
+
+test_composition.py:285: 
+_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
+../../paddlemetrics/metric.py:623: in __ne__
+    return CompositionalMetric(B.ne, self, other)
+_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
+
+self = <paddleext.torchapi.ModuleProxy object at 0x130bfa040>, args = ('ne',)
+kwargs = {}
+
+    def __getattribute__(self, *args, **kwargs):
+        # Perform custom logic here
+    
+>       obj = object.__getattribute__(this_module, *args, **kwargs)
+E       AttributeError: 'module' object has no attribute 'ne'
+
+../../../../../../torch2paddle/paddleext/torchapi/__init__.py:20: AttributeError
+____________________ test_metrics_ne[2.0-expected_result2] _____________________
+
+second_operand = 2.0
+expected_result = Tensor(shape=[], dtype=bool, place=Place(cpu), stop_gradient=True,
+       False)
+
+    @pytest.mark.parametrize(
+        ["second_operand", "expected_result"],
+        [
+            (DummyMetric(2), tensor(False)),
+            (2, tensor(False)),
+            (2.0, tensor(False)),
+            (tensor(2), tensor(False)),
+        ],
+    )
+    def test_metrics_ne(second_operand, expected_result):
+        first_metric = DummyMetric(2)
+    
+>       final_ne = first_metric != second_operand
+
+test_composition.py:285: 
+_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
+../../paddlemetrics/metric.py:623: in __ne__
+    return CompositionalMetric(B.ne, self, other)
+_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
+
+self = <paddleext.torchapi.ModuleProxy object at 0x130bfa040>, args = ('ne',)
+kwargs = {}
+
+    def __getattribute__(self, *args, **kwargs):
+        # Perform custom logic here
+    
+>       obj = object.__getattribute__(this_module, *args, **kwargs)
+E       AttributeError: 'module' object has no attribute 'ne'
+
+../../../../../../torch2paddle/paddleext/torchapi/__init__.py:20: AttributeError
+______________ test_metrics_ne[second_operand3-expected_result3] _______________
+
+second_operand = Tensor(shape=[], dtype=int32, place=Place(cpu), stop_gradient=True,
+       2)
+expected_result = Tensor(shape=[], dtype=bool, place=Place(cpu), stop_gradient=True,
+       False)
+
+    @pytest.mark.parametrize(
+        ["second_operand", "expected_result"],
+        [
+            (DummyMetric(2), tensor(False)),
+            (2, tensor(False)),
+            (2.0, tensor(False)),
+            (tensor(2), tensor(False)),
+        ],
+    )
+    def test_metrics_ne(second_operand, expected_result):
+        first_metric = DummyMetric(2)
+    
+>       final_ne = first_metric != second_operand
+
+test_composition.py:285: 
+_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
+../../paddlemetrics/metric.py:623: in __ne__
+    return CompositionalMetric(B.ne, self, other)
+_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
+
+self = <paddleext.torchapi.ModuleProxy object at 0x130bfa040>, args = ('ne',)
+kwargs = {}
+
+    def __getattribute__(self, *args, **kwargs):
+        # Perform custom logic here
+    
+>       obj = object.__getattribute__(this_module, *args, **kwargs)
+E       AttributeError: 'module' object has no attribute 'ne'
+
+../../../../../../torch2paddle/paddleext/torchapi/__init__.py:20: AttributeError
+______________ test_metrics_or[second_operand1-expected_result1] _______________
+
+second_operand = Tensor(shape=[3], dtype=int64, place=Place(cpu), stop_gradient=True,
+       [1, 0, 3])
+expected_result = Tensor(shape=[3], dtype=int64, place=Place(cpu), stop_gradient=True,
+       [-1, -2,  3])
+
+    @pytest.mark.parametrize(
+        ["second_operand", "expected_result"],
+        [(DummyMetric([1, 0, 3]), tensor([-1, -2, 3])), (tensor([1, 0, 3]), tensor([-1, -2, 3]))],
+    )
+    @pytest.mark.skipif(**_MARK_TORCH_MIN_1_5)
+    def test_metrics_or(second_operand, expected_result):
+        first_metric = DummyMetric([-1, -2, 3])
+    
+        final_or = first_metric | second_operand
+>       final_ror = second_operand | first_metric
+
+test_composition.py:303: 
+_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
+../../../../../../torch2paddle/paddleext/torchapi/tensor_.py:357: in __or__
+    return paddle.logical_or(self.bool(), other.bool())
+_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
+
+self = DummyMetric(), name = 'bool'
+
+    def __getattr__(self, name):
+        if '_parameters' in self.__dict__:
+            _parameters = self.__dict__['_parameters']
+            if name in self._parameters:
+                if in_declarative_mode():
+                    return _convert_into_variable(self._parameters[name])
+                return self._parameters[name]
+        if '_sub_layers' in self.__dict__:
+            _sub_layers = self.__dict__['_sub_layers']
+            if name in self._sub_layers:
+                return self._sub_layers[name]
+        if '_buffers' in self.__dict__:
+            _buffers = self.__dict__['_buffers']
+            if name in _buffers:
+                if in_declarative_mode():
+                    return _convert_into_variable(_buffers[name])
+                return _buffers[name]
+>       return object.__getattribute__(self, name)
+E       AttributeError: 'DummyMetric' object has no attribute 'bool'
+
+/Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/dygraph/layers.py:1123: AttributeError
+__________________ test_metrics_rfloordiv[5-expected_result0] __________________
+
+first_operand = 5
+expected_result = Tensor(shape=[], dtype=int32, place=Place(cpu), stop_gradient=True,
+       2)
+
+    @pytest.mark.parametrize(
+        ["first_operand", "expected_result"],
+        [(5, tensor(2)), (5.0, tensor(2.0)), (tensor(5), tensor(2))],
+    )
+    @pytest.mark.skipif(**_MARK_TORCH_MIN_1_5)
+    def test_metrics_rfloordiv(first_operand, expected_result):
+        second_operand = DummyMetric(2)
+    
+        final_rfloordiv = first_operand // second_operand
+    
+        assert isinstance(final_rfloordiv, CompositionalMetric)
+    
+        final_rfloordiv.update()
+>       assert B.allclose(expected_result, final_rfloordiv.compute())
+
+test_composition.py:347: 
+_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
+../../paddlemetrics/metric.py:378: in wrapped_func
+    self._computed = compute(*args, **kwargs)
+../../paddlemetrics/metric.py:756: in compute
+    return self.op(val_a, val_b)
+../../../../../../torch2paddle/paddleext/torchapi/core.py:91: in paddle_func
+    return func(*args, **kwargs)
+/Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/tensor/math.py:460: in floor_divide
+    return _elementwise_op_in_dygraph(
+/Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/decorator.py:232: in fun
+    return caller(func, *(extras + args), **kw)
+/Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/wrapped_decorator.py:25: in __impl__
+    return wrapped_func(*args, **kwargs)
+/Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/framework.py:434: in __impl__
+    return func(*args, **kwargs)
+_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
+
+x = 5
+y = Tensor(shape=[], dtype=int32, place=Place(cpu), stop_gradient=True,
+       2)
+axis = -1, act = None, use_mkldnn = False, op_name = 'elementwise_floordiv'
+
+    @dygraph_only
+    def _elementwise_op_in_dygraph(x,
+                                   y,
+                                   axis=-1,
+                                   act=None,
+                                   use_mkldnn=False,
+                                   op_name=None):
+        def is_inplace(op_name):
+            return  op_name[-1] == "_"
+    
+        if op_name not in OP_NAMEMAPPING.keys():
+            op = getattr(_C_ops, op_name)
+            out = op(x, y, 'axis', axis, 'use_mkldnn', use_mkldnn)
+        else:
+            if in_dygraph_mode():
+                op = getattr(_C_ops, OP_NAMEMAPPING[op_name] if not is_inplace(op_name) else op_name)
+                out = op(x, y)
+    
+            if _in_legacy_dygraph():
+                op = getattr(_C_ops, op_name)
+>               out = op(x, y, 'axis', axis, 'use_mkldnn', use_mkldnn)
+E               ValueError: (InvalidArgument) elementwise_floordiv(): argument 'X' (position 0) must be Tensor, but got int (at /Users/paddle/work/pengyuqi/Paddle/paddle/fluid/pybind/op_function_common.cc:737)
+
+/Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/tensor/math.py:218: ValueError
+_________________ test_metrics_rfloordiv[5.0-expected_result1] _________________
+
+first_operand = 5.0
+expected_result = Tensor(shape=[], dtype=float32, place=Place(cpu), stop_gradient=True,
+       2.)
+
+    @pytest.mark.parametrize(
+        ["first_operand", "expected_result"],
+        [(5, tensor(2)), (5.0, tensor(2.0)), (tensor(5), tensor(2))],
+    )
+    @pytest.mark.skipif(**_MARK_TORCH_MIN_1_5)
+    def test_metrics_rfloordiv(first_operand, expected_result):
+        second_operand = DummyMetric(2)
+    
+        final_rfloordiv = first_operand // second_operand
+    
+        assert isinstance(final_rfloordiv, CompositionalMetric)
+    
+        final_rfloordiv.update()
+>       assert B.allclose(expected_result, final_rfloordiv.compute())
+
+test_composition.py:347: 
+_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
+../../paddlemetrics/metric.py:378: in wrapped_func
+    self._computed = compute(*args, **kwargs)
+../../paddlemetrics/metric.py:756: in compute
+    return self.op(val_a, val_b)
+../../../../../../torch2paddle/paddleext/torchapi/core.py:91: in paddle_func
+    return func(*args, **kwargs)
+/Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/tensor/math.py:460: in floor_divide
+    return _elementwise_op_in_dygraph(
+/Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/decorator.py:232: in fun
+    return caller(func, *(extras + args), **kw)
+/Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/wrapped_decorator.py:25: in __impl__
+    return wrapped_func(*args, **kwargs)
+/Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/framework.py:434: in __impl__
+    return func(*args, **kwargs)
+_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
+
+x = 5.0
+y = Tensor(shape=[], dtype=int32, place=Place(cpu), stop_gradient=True,
+       2)
+axis = -1, act = None, use_mkldnn = False, op_name = 'elementwise_floordiv'
+
+    @dygraph_only
+    def _elementwise_op_in_dygraph(x,
+                                   y,
+                                   axis=-1,
+                                   act=None,
+                                   use_mkldnn=False,
+                                   op_name=None):
+        def is_inplace(op_name):
+            return  op_name[-1] == "_"
+    
+        if op_name not in OP_NAMEMAPPING.keys():
+            op = getattr(_C_ops, op_name)
+            out = op(x, y, 'axis', axis, 'use_mkldnn', use_mkldnn)
+        else:
+            if in_dygraph_mode():
+                op = getattr(_C_ops, OP_NAMEMAPPING[op_name] if not is_inplace(op_name) else op_name)
+                out = op(x, y)
+    
+            if _in_legacy_dygraph():
+                op = getattr(_C_ops, op_name)
+>               out = op(x, y, 'axis', axis, 'use_mkldnn', use_mkldnn)
+E               ValueError: (InvalidArgument) elementwise_floordiv(): argument 'X' (position 0) must be Tensor, but got float (at /Users/paddle/work/pengyuqi/Paddle/paddle/fluid/pybind/op_function_common.cc:737)
+
+/Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/tensor/math.py:218: ValueError
+___________ test_metrics_rfloordiv[first_operand2-expected_result2] ____________
+
+first_operand = Tensor(shape=[], dtype=int32, place=Place(cpu), stop_gradient=True,
+       5)
+expected_result = Tensor(shape=[], dtype=int32, place=Place(cpu), stop_gradient=True,
+       2)
+
+    @pytest.mark.parametrize(
+        ["first_operand", "expected_result"],
+        [(5, tensor(2)), (5.0, tensor(2.0)), (tensor(5), tensor(2))],
+    )
+    @pytest.mark.skipif(**_MARK_TORCH_MIN_1_5)
+    def test_metrics_rfloordiv(first_operand, expected_result):
+        second_operand = DummyMetric(2)
+    
+>       final_rfloordiv = first_operand // second_operand
+
+test_composition.py:342: 
+_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
+/Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/dygraph/math_op_patch.py:257: in __impl__
+    other_var = create_scalar(
+/Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/dygraph/math_op_patch.py:90: in create_scalar
+    return create_tensor(value, dtype, shape=[1])
+/Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/decorator.py:232: in fun
+    return caller(func, *(extras + args), **kw)
+/Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/dygraph/base.py:299: in __impl__
+    return func(*args, **kwargs)
+_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
+
+value = DummyMetric(), dtype = paddle.int32, shape = [1]
+
+    @no_grad
+    def create_tensor(value, dtype, shape):
+        out = _varbase_creator(dtype=dtype)
+>       out = _C_ops.fill_constant(out, 'dtype', dtype, 'shape', shape, 'value',
+                                   value, 'force_cpu', False)
+E       ValueError: (InvalidArgument) fill_constant(): argument (position 6) must be float, but got DummyMetric (at /Users/paddle/work/pengyuqi/Paddle/paddle/fluid/pybind/op_function_common.cc:189)
+
+/Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/dygraph/math_op_patch.py:84: ValueError
+____________ test_metrics_rmatmul[first_operand0-expected_result0] _____________
+
+first_operand = Tensor(shape=[3], dtype=int64, place=Place(cpu), stop_gradient=True,
+       [2, 2, 2])
+expected_result = Tensor(shape=[], dtype=int32, place=Place(cpu), stop_gradient=True,
+       12)
+
+    @pytest.mark.parametrize(
+        ["first_operand", "expected_result"],
+        [pytest.param(tensor([2, 2, 2]), tensor(12), marks=pytest.mark.skipif(**_MARK_TORCH_MIN_1_4))],
+    )
+    def test_metrics_rmatmul(first_operand, expected_result):
+        second_operand = DummyMetric([2, 2, 2])
+    
+>       final_rmatmul = first_operand @ second_operand
+
+test_composition.py:357: 
+_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
+/Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/dygraph/math_op_patch.py:257: in __impl__
+    other_var = create_scalar(
+/Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/dygraph/math_op_patch.py:90: in create_scalar
+    return create_tensor(value, dtype, shape=[1])
+/Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/decorator.py:232: in fun
+    return caller(func, *(extras + args), **kw)
+/Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/dygraph/base.py:299: in __impl__
+    return func(*args, **kwargs)
+_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
+
+value = DummyMetric(), dtype = paddle.int64, shape = [1]
+
+    @no_grad
+    def create_tensor(value, dtype, shape):
+        out = _varbase_creator(dtype=dtype)
+>       out = _C_ops.fill_constant(out, 'dtype', dtype, 'shape', shape, 'value',
+                                   value, 'force_cpu', False)
+E       ValueError: (InvalidArgument) fill_constant(): argument (position 6) must be float, but got DummyMetric (at /Users/paddle/work/pengyuqi/Paddle/paddle/fluid/pybind/op_function_common.cc:189)
+
+/Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/dygraph/math_op_patch.py:84: ValueError
+______________ test_metrics_rmod[first_operand0-expected_result0] ______________
+
+first_operand = Tensor(shape=[], dtype=int32, place=Place(cpu), stop_gradient=True,
+       2)
+expected_result = Tensor(shape=[], dtype=int32, place=Place(cpu), stop_gradient=True,
+       2)
+
+    @pytest.mark.parametrize(
+        ["first_operand", "expected_result"],
+        [pytest.param(tensor(2), tensor(2), marks=pytest.mark.skipif(**_MARK_TORCH_MIN_1_4))],
+    )
+    def test_metrics_rmod(first_operand, expected_result):
+        second_operand = DummyMetric(5)
+    
+>       final_rmod = first_operand % second_operand
+
+test_composition.py:372: 
+_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
+/Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/dygraph/math_op_patch.py:257: in __impl__
+    other_var = create_scalar(
+/Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/dygraph/math_op_patch.py:90: in create_scalar
+    return create_tensor(value, dtype, shape=[1])
+/Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/decorator.py:232: in fun
+    return caller(func, *(extras + args), **kw)
+/Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/dygraph/base.py:299: in __impl__
+    return func(*args, **kwargs)
+_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
+
+value = DummyMetric(), dtype = paddle.int32, shape = [1]
+
+    @no_grad
+    def create_tensor(value, dtype, shape):
+        out = _varbase_creator(dtype=dtype)
+>       out = _C_ops.fill_constant(out, 'dtype', dtype, 'shape', shape, 'value',
+                                   value, 'force_cpu', False)
+E       ValueError: (InvalidArgument) fill_constant(): argument (position 6) must be float, but got DummyMetric (at /Users/paddle/work/pengyuqi/Paddle/paddle/fluid/pybind/op_function_common.cc:189)
+
+/Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/dygraph/math_op_patch.py:84: ValueError
+____________________ test_metrics_rpow[2-expected_result1] _____________________
+
+first_operand = 2
+expected_result = Tensor(shape=[], dtype=int32, place=Place(cpu), stop_gradient=True,
+       4)
+
+    @pytest.mark.parametrize(
+        "first_operand,expected_result",
+        [
+            pytest.param(DummyMetric(2), tensor(4)),
+            pytest.param(2, tensor(4)),
+            pytest.param(2.0, tensor(4.0), marks=pytest.mark.skipif(**_MARK_TORCH_MIN_1_6)),
+        ],
+    )
+    def test_metrics_rpow(first_operand, expected_result):
+        second_operand = DummyMetric(2)
+    
+        final_rpow = first_operand ** second_operand
+    
+        assert isinstance(final_rpow, CompositionalMetric)
+        final_rpow.update()
+>       assert B.allclose(expected_result, final_rpow.compute())
+
+test_composition.py:395: 
+_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
+../../paddlemetrics/metric.py:378: in wrapped_func
+    self._computed = compute(*args, **kwargs)
+../../paddlemetrics/metric.py:756: in compute
+    return self.op(val_a, val_b)
+../../../../../../torch2paddle/paddleext/torchapi/core.py:91: in paddle_func
+    return func(*args, **kwargs)
+/Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/tensor/math.py:168: in pow
+    return _elementwise_op_in_dygraph(
+/Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/decorator.py:232: in fun
+    return caller(func, *(extras + args), **kw)
+/Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/wrapped_decorator.py:25: in __impl__
+    return wrapped_func(*args, **kwargs)
+/Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/framework.py:434: in __impl__
+    return func(*args, **kwargs)
+_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
+
+x = 2
+y = Tensor(shape=[], dtype=int32, place=Place(cpu), stop_gradient=True,
+       2)
+axis = -1, act = None, use_mkldnn = False, op_name = 'elementwise_pow'
+
+    @dygraph_only
+    def _elementwise_op_in_dygraph(x,
+                                   y,
+                                   axis=-1,
+                                   act=None,
+                                   use_mkldnn=False,
+                                   op_name=None):
+        def is_inplace(op_name):
+            return  op_name[-1] == "_"
+    
+        if op_name not in OP_NAMEMAPPING.keys():
+            op = getattr(_C_ops, op_name)
+            out = op(x, y, 'axis', axis, 'use_mkldnn', use_mkldnn)
+        else:
+            if in_dygraph_mode():
+                op = getattr(_C_ops, OP_NAMEMAPPING[op_name] if not is_inplace(op_name) else op_name)
+                out = op(x, y)
+    
+            if _in_legacy_dygraph():
+                op = getattr(_C_ops, op_name)
+>               out = op(x, y, 'axis', axis, 'use_mkldnn', use_mkldnn)
+E               ValueError: (InvalidArgument) elementwise_pow(): argument 'X' (position 0) must be Tensor, but got int (at /Users/paddle/work/pengyuqi/Paddle/paddle/fluid/pybind/op_function_common.cc:737)
+
+/Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/tensor/math.py:218: ValueError
+___________________ test_metrics_rpow[2.0-expected_result2] ____________________
+
+first_operand = 2.0
+expected_result = Tensor(shape=[], dtype=float32, place=Place(cpu), stop_gradient=True,
+       4.)
+
+    @pytest.mark.parametrize(
+        "first_operand,expected_result",
+        [
+            pytest.param(DummyMetric(2), tensor(4)),
+            pytest.param(2, tensor(4)),
+            pytest.param(2.0, tensor(4.0), marks=pytest.mark.skipif(**_MARK_TORCH_MIN_1_6)),
+        ],
+    )
+    def test_metrics_rpow(first_operand, expected_result):
+        second_operand = DummyMetric(2)
+    
+        final_rpow = first_operand ** second_operand
+    
+        assert isinstance(final_rpow, CompositionalMetric)
+        final_rpow.update()
+>       assert B.allclose(expected_result, final_rpow.compute())
+
+test_composition.py:395: 
+_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
+../../paddlemetrics/metric.py:378: in wrapped_func
+    self._computed = compute(*args, **kwargs)
+../../paddlemetrics/metric.py:756: in compute
+    return self.op(val_a, val_b)
+../../../../../../torch2paddle/paddleext/torchapi/core.py:91: in paddle_func
+    return func(*args, **kwargs)
+/Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/tensor/math.py:168: in pow
+    return _elementwise_op_in_dygraph(
+/Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/decorator.py:232: in fun
+    return caller(func, *(extras + args), **kw)
+/Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/wrapped_decorator.py:25: in __impl__
+    return wrapped_func(*args, **kwargs)
+/Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/framework.py:434: in __impl__
+    return func(*args, **kwargs)
+_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
+
+x = 2.0
+y = Tensor(shape=[], dtype=int32, place=Place(cpu), stop_gradient=True,
+       2)
+axis = -1, act = None, use_mkldnn = False, op_name = 'elementwise_pow'
+
+    @dygraph_only
+    def _elementwise_op_in_dygraph(x,
+                                   y,
+                                   axis=-1,
+                                   act=None,
+                                   use_mkldnn=False,
+                                   op_name=None):
+        def is_inplace(op_name):
+            return  op_name[-1] == "_"
+    
+        if op_name not in OP_NAMEMAPPING.keys():
+            op = getattr(_C_ops, op_name)
+            out = op(x, y, 'axis', axis, 'use_mkldnn', use_mkldnn)
+        else:
+            if in_dygraph_mode():
+                op = getattr(_C_ops, OP_NAMEMAPPING[op_name] if not is_inplace(op_name) else op_name)
+                out = op(x, y)
+    
+            if _in_legacy_dygraph():
+                op = getattr(_C_ops, op_name)
+>               out = op(x, y, 'axis', axis, 'use_mkldnn', use_mkldnn)
+E               ValueError: (InvalidArgument) elementwise_pow(): argument 'X' (position 0) must be Tensor, but got float (at /Users/paddle/work/pengyuqi/Paddle/paddle/fluid/pybind/op_function_common.cc:737)
+
+/Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/tensor/math.py:218: ValueError
+______________ test_metrics_rsub[first_operand0-expected_result0] ______________
+
+first_operand = DummyMetric()
+expected_result = Tensor(shape=[], dtype=int32, place=Place(cpu), stop_gradient=True,
+       1)
+
+    @pytest.mark.parametrize(
+        ["first_operand", "expected_result"],
+        [
+            (DummyMetric(3), tensor(1)),
+            (3, tensor(1)),
+            (3.0, tensor(1.0)),
+            pytest.param(tensor(3), tensor(1), marks=pytest.mark.skipif(**_MARK_TORCH_MIN_1_4)),
+        ],
+    )
+    def test_metrics_rsub(first_operand, expected_result):
+        second_operand = DummyMetric(2)
+    
+>       final_rsub = first_operand - second_operand
+
+test_composition.py:410: 
+_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
+../../paddlemetrics/metric.py:666: in __sub__
+    return CompositionalMetric(B.sub, self, other)
+_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
+
+self = <paddleext.torchapi.ModuleProxy object at 0x130bfa040>, args = ('sub',)
+kwargs = {}
+
+    def __getattribute__(self, *args, **kwargs):
+        # Perform custom logic here
+    
+>       obj = object.__getattribute__(this_module, *args, **kwargs)
+E       AttributeError: 'module' object has no attribute 'sub'
+
+../../../../../../torch2paddle/paddleext/torchapi/__init__.py:20: AttributeError
+____________________ test_metrics_rsub[3-expected_result1] _____________________
+
+first_operand = 3
+expected_result = Tensor(shape=[], dtype=int32, place=Place(cpu), stop_gradient=True,
+       1)
+
+    @pytest.mark.parametrize(
+        ["first_operand", "expected_result"],
+        [
+            (DummyMetric(3), tensor(1)),
+            (3, tensor(1)),
+            (3.0, tensor(1.0)),
+            pytest.param(tensor(3), tensor(1), marks=pytest.mark.skipif(**_MARK_TORCH_MIN_1_4)),
+        ],
+    )
+    def test_metrics_rsub(first_operand, expected_result):
+        second_operand = DummyMetric(2)
+    
+>       final_rsub = first_operand - second_operand
+
+test_composition.py:410: 
+_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
+../../paddlemetrics/metric.py:657: in __rsub__
+    return CompositionalMetric(B.sub, other, self)
+_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
+
+self = <paddleext.torchapi.ModuleProxy object at 0x130bfa040>, args = ('sub',)
+kwargs = {}
+
+    def __getattribute__(self, *args, **kwargs):
+        # Perform custom logic here
+    
+>       obj = object.__getattribute__(this_module, *args, **kwargs)
+E       AttributeError: 'module' object has no attribute 'sub'
+
+../../../../../../torch2paddle/paddleext/torchapi/__init__.py:20: AttributeError
+___________________ test_metrics_rsub[3.0-expected_result2] ____________________
+
+first_operand = 3.0
+expected_result = Tensor(shape=[], dtype=float32, place=Place(cpu), stop_gradient=True,
+       1.)
+
+    @pytest.mark.parametrize(
+        ["first_operand", "expected_result"],
+        [
+            (DummyMetric(3), tensor(1)),
+            (3, tensor(1)),
+            (3.0, tensor(1.0)),
+            pytest.param(tensor(3), tensor(1), marks=pytest.mark.skipif(**_MARK_TORCH_MIN_1_4)),
+        ],
+    )
+    def test_metrics_rsub(first_operand, expected_result):
+        second_operand = DummyMetric(2)
+    
+>       final_rsub = first_operand - second_operand
+
+test_composition.py:410: 
+_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
+../../paddlemetrics/metric.py:657: in __rsub__
+    return CompositionalMetric(B.sub, other, self)
+_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
+
+self = <paddleext.torchapi.ModuleProxy object at 0x130bfa040>, args = ('sub',)
+kwargs = {}
+
+    def __getattribute__(self, *args, **kwargs):
+        # Perform custom logic here
+    
+>       obj = object.__getattribute__(this_module, *args, **kwargs)
+E       AttributeError: 'module' object has no attribute 'sub'
+
+../../../../../../torch2paddle/paddleext/torchapi/__init__.py:20: AttributeError
+______________ test_metrics_rsub[first_operand3-expected_result3] ______________
+
+first_operand = Tensor(shape=[], dtype=int32, place=Place(cpu), stop_gradient=True,
+       3)
+expected_result = Tensor(shape=[], dtype=int32, place=Place(cpu), stop_gradient=True,
+       1)
+
+    @pytest.mark.parametrize(
+        ["first_operand", "expected_result"],
+        [
+            (DummyMetric(3), tensor(1)),
+            (3, tensor(1)),
+            (3.0, tensor(1.0)),
+            pytest.param(tensor(3), tensor(1), marks=pytest.mark.skipif(**_MARK_TORCH_MIN_1_4)),
+        ],
+    )
+    def test_metrics_rsub(first_operand, expected_result):
+        second_operand = DummyMetric(2)
+    
+        final_rsub = first_operand - second_operand
+    
+        assert isinstance(final_rsub, CompositionalMetric)
+        final_rsub.update()
+>       assert B.allclose(expected_result, final_rsub.compute())
+E       assert Tensor(shape=[1], dtype=bool, place=Place(cpu), stop_gradient=True,\n       [False])
+E        +  where Tensor(shape=[1], dtype=bool, place=Place(cpu), stop_gradient=True,\n       [False]) = <function allclose at 0x16b3abd30>(Tensor(shape=[], dtype=int32, place=Place(cpu), stop_gradient=True,\n       1), 5)
+E        +    where <function allclose at 0x16b3abd30> = B.allclose
+E        +    and   5 = <function CompositionalMetric.compute at 0x179d469d0>()
+E        +      where <function CompositionalMetric.compute at 0x179d469d0> = CompositionalMetric(\n  add(\n    3,\n    DummyMetric()\n  )\n).compute
+
+test_composition.py:414: AssertionError
+____________ test_metrics_rtruediv[first_operand0-expected_result0] ____________
+
+first_operand = DummyMetric()
+expected_result = Tensor(shape=[], dtype=float32, place=Place(cpu), stop_gradient=True,
+       2.)
+
+    @pytest.mark.parametrize(
+        ["first_operand", "expected_result"],
+        [
+            (DummyMetric(6), tensor(2.0)),
+            (6, tensor(2.0)),
+            (6.0, tensor(2.0)),
+            (tensor(6), tensor(2.0)),
+        ],
+    )
+    @pytest.mark.skipif(**_MARK_TORCH_MIN_1_5)
+    def test_metrics_rtruediv(first_operand, expected_result):
+        second_operand = DummyMetric(3)
+    
+>       final_rtruediv = first_operand / second_operand
+
+test_composition.py:430: 
+_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
+../../paddlemetrics/metric.py:669: in __truediv__
+    return CompositionalMetric(B.true_divide, self, other)
+_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
+
+self = <paddleext.torchapi.ModuleProxy object at 0x130bfa040>
+args = ('true_divide',), kwargs = {}
+
+    def __getattribute__(self, *args, **kwargs):
+        # Perform custom logic here
+    
+>       obj = object.__getattribute__(this_module, *args, **kwargs)
+E       AttributeError: 'module' object has no attribute 'true_divide'
+
+../../../../../../torch2paddle/paddleext/torchapi/__init__.py:20: AttributeError
+__________________ test_metrics_rtruediv[6-expected_result1] ___________________
+
+first_operand = 6
+expected_result = Tensor(shape=[], dtype=float32, place=Place(cpu), stop_gradient=True,
+       2.)
+
+    @pytest.mark.parametrize(
+        ["first_operand", "expected_result"],
+        [
+            (DummyMetric(6), tensor(2.0)),
+            (6, tensor(2.0)),
+            (6.0, tensor(2.0)),
+            (tensor(6), tensor(2.0)),
+        ],
+    )
+    @pytest.mark.skipif(**_MARK_TORCH_MIN_1_5)
+    def test_metrics_rtruediv(first_operand, expected_result):
+        second_operand = DummyMetric(3)
+    
+>       final_rtruediv = first_operand / second_operand
+
+test_composition.py:430: 
+_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
+../../paddlemetrics/metric.py:660: in __rtruediv__
+    return CompositionalMetric(B.true_divide, other, self)
+_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
+
+self = <paddleext.torchapi.ModuleProxy object at 0x130bfa040>
+args = ('true_divide',), kwargs = {}
+
+    def __getattribute__(self, *args, **kwargs):
+        # Perform custom logic here
+    
+>       obj = object.__getattribute__(this_module, *args, **kwargs)
+E       AttributeError: 'module' object has no attribute 'true_divide'
+
+../../../../../../torch2paddle/paddleext/torchapi/__init__.py:20: AttributeError
+_________________ test_metrics_rtruediv[6.0-expected_result2] __________________
+
+first_operand = 6.0
+expected_result = Tensor(shape=[], dtype=float32, place=Place(cpu), stop_gradient=True,
+       2.)
+
+    @pytest.mark.parametrize(
+        ["first_operand", "expected_result"],
+        [
+            (DummyMetric(6), tensor(2.0)),
+            (6, tensor(2.0)),
+            (6.0, tensor(2.0)),
+            (tensor(6), tensor(2.0)),
+        ],
+    )
+    @pytest.mark.skipif(**_MARK_TORCH_MIN_1_5)
+    def test_metrics_rtruediv(first_operand, expected_result):
+        second_operand = DummyMetric(3)
+    
+>       final_rtruediv = first_operand / second_operand
+
+test_composition.py:430: 
+_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
+../../paddlemetrics/metric.py:660: in __rtruediv__
+    return CompositionalMetric(B.true_divide, other, self)
+_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
+
+self = <paddleext.torchapi.ModuleProxy object at 0x130bfa040>
+args = ('true_divide',), kwargs = {}
+
+    def __getattribute__(self, *args, **kwargs):
+        # Perform custom logic here
+    
+>       obj = object.__getattribute__(this_module, *args, **kwargs)
+E       AttributeError: 'module' object has no attribute 'true_divide'
+
+../../../../../../torch2paddle/paddleext/torchapi/__init__.py:20: AttributeError
+____________ test_metrics_rtruediv[first_operand3-expected_result3] ____________
+
+first_operand = Tensor(shape=[], dtype=int32, place=Place(cpu), stop_gradient=True,
+       6)
+expected_result = Tensor(shape=[], dtype=float32, place=Place(cpu), stop_gradient=True,
+       2.)
+
+    @pytest.mark.parametrize(
+        ["first_operand", "expected_result"],
+        [
+            (DummyMetric(6), tensor(2.0)),
+            (6, tensor(2.0)),
+            (6.0, tensor(2.0)),
+            (tensor(6), tensor(2.0)),
+        ],
+    )
+    @pytest.mark.skipif(**_MARK_TORCH_MIN_1_5)
+    def test_metrics_rtruediv(first_operand, expected_result):
+        second_operand = DummyMetric(3)
+    
+        final_rtruediv = first_operand / second_operand
+    
+        assert isinstance(final_rtruediv, CompositionalMetric)
+        final_rtruediv.update()
+>       assert B.allclose(expected_result, final_rtruediv.compute())
+E       assert Tensor(shape=[1], dtype=bool, place=Place(cpu), stop_gradient=True,\n       [False])
+E        +  where Tensor(shape=[1], dtype=bool, place=Place(cpu), stop_gradient=True,\n       [False]) = <function allclose at 0x16b3abd30>(Tensor(shape=[], dtype=float32, place=Place(cpu), stop_gradient=True,\n       2.), 9)
+E        +    where <function allclose at 0x16b3abd30> = B.allclose
+E        +    and   9 = <function CompositionalMetric.compute at 0x179d46c10>()
+E        +      where <function CompositionalMetric.compute at 0x179d46c10> = CompositionalMetric(\n  add(\n    6,\n    DummyMetric()\n  )\n).compute
+
+test_composition.py:434: AssertionError
+______________ test_metrics_sub[second_operand0-expected_result0] ______________
+
+second_operand = DummyMetric()
+expected_result = Tensor(shape=[], dtype=int32, place=Place(cpu), stop_gradient=True,
+       1)
+
+    @pytest.mark.parametrize(
+        ["second_operand", "expected_result"],
+        [
+            (DummyMetric(2), tensor(1)),
+            (2, tensor(1)),
+            (2.0, tensor(1.0)),
+            (tensor(2), tensor(1)),
+        ],
+    )
+    def test_metrics_sub(second_operand, expected_result):
+        first_metric = DummyMetric(3)
+    
+>       final_sub = first_metric - second_operand
+
+test_composition.py:449: 
+_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
+../../paddlemetrics/metric.py:666: in __sub__
+    return CompositionalMetric(B.sub, self, other)
+_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
+
+self = <paddleext.torchapi.ModuleProxy object at 0x130bfa040>, args = ('sub',)
+kwargs = {}
+
+    def __getattribute__(self, *args, **kwargs):
+        # Perform custom logic here
+    
+>       obj = object.__getattribute__(this_module, *args, **kwargs)
+E       AttributeError: 'module' object has no attribute 'sub'
+
+../../../../../../torch2paddle/paddleext/torchapi/__init__.py:20: AttributeError
+_____________________ test_metrics_sub[2-expected_result1] _____________________
+
+second_operand = 2
+expected_result = Tensor(shape=[], dtype=int32, place=Place(cpu), stop_gradient=True,
+       1)
+
+    @pytest.mark.parametrize(
+        ["second_operand", "expected_result"],
+        [
+            (DummyMetric(2), tensor(1)),
+            (2, tensor(1)),
+            (2.0, tensor(1.0)),
+            (tensor(2), tensor(1)),
+        ],
+    )
+    def test_metrics_sub(second_operand, expected_result):
+        first_metric = DummyMetric(3)
+    
+>       final_sub = first_metric - second_operand
+
+test_composition.py:449: 
+_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
+../../paddlemetrics/metric.py:666: in __sub__
+    return CompositionalMetric(B.sub, self, other)
+_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
+
+self = <paddleext.torchapi.ModuleProxy object at 0x130bfa040>, args = ('sub',)
+kwargs = {}
+
+    def __getattribute__(self, *args, **kwargs):
+        # Perform custom logic here
+    
+>       obj = object.__getattribute__(this_module, *args, **kwargs)
+E       AttributeError: 'module' object has no attribute 'sub'
+
+../../../../../../torch2paddle/paddleext/torchapi/__init__.py:20: AttributeError
+____________________ test_metrics_sub[2.0-expected_result2] ____________________
+
+second_operand = 2.0
+expected_result = Tensor(shape=[], dtype=float32, place=Place(cpu), stop_gradient=True,
+       1.)
+
+    @pytest.mark.parametrize(
+        ["second_operand", "expected_result"],
+        [
+            (DummyMetric(2), tensor(1)),
+            (2, tensor(1)),
+            (2.0, tensor(1.0)),
+            (tensor(2), tensor(1)),
+        ],
+    )
+    def test_metrics_sub(second_operand, expected_result):
+        first_metric = DummyMetric(3)
+    
+>       final_sub = first_metric - second_operand
+
+test_composition.py:449: 
+_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
+../../paddlemetrics/metric.py:666: in __sub__
+    return CompositionalMetric(B.sub, self, other)
+_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
+
+self = <paddleext.torchapi.ModuleProxy object at 0x130bfa040>, args = ('sub',)
+kwargs = {}
+
+    def __getattribute__(self, *args, **kwargs):
+        # Perform custom logic here
+    
+>       obj = object.__getattribute__(this_module, *args, **kwargs)
+E       AttributeError: 'module' object has no attribute 'sub'
+
+../../../../../../torch2paddle/paddleext/torchapi/__init__.py:20: AttributeError
+______________ test_metrics_sub[second_operand3-expected_result3] ______________
+
+second_operand = Tensor(shape=[], dtype=int32, place=Place(cpu), stop_gradient=True,
+       2)
+expected_result = Tensor(shape=[], dtype=int32, place=Place(cpu), stop_gradient=True,
+       1)
+
+    @pytest.mark.parametrize(
+        ["second_operand", "expected_result"],
+        [
+            (DummyMetric(2), tensor(1)),
+            (2, tensor(1)),
+            (2.0, tensor(1.0)),
+            (tensor(2), tensor(1)),
+        ],
+    )
+    def test_metrics_sub(second_operand, expected_result):
+        first_metric = DummyMetric(3)
+    
+>       final_sub = first_metric - second_operand
+
+test_composition.py:449: 
+_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
+../../paddlemetrics/metric.py:666: in __sub__
+    return CompositionalMetric(B.sub, self, other)
+_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
+
+self = <paddleext.torchapi.ModuleProxy object at 0x130bfa040>, args = ('sub',)
+kwargs = {}
+
+    def __getattribute__(self, *args, **kwargs):
+        # Perform custom logic here
+    
+>       obj = object.__getattribute__(this_module, *args, **kwargs)
+E       AttributeError: 'module' object has no attribute 'sub'
+
+../../../../../../torch2paddle/paddleext/torchapi/__init__.py:20: AttributeError
+____________ test_metrics_truediv[second_operand0-expected_result0] ____________
+
+second_operand = DummyMetric()
+expected_result = Tensor(shape=[], dtype=float32, place=Place(cpu), stop_gradient=True,
+       2.)
+
+    @pytest.mark.parametrize(
+        ["second_operand", "expected_result"],
+        [
+            (DummyMetric(3), tensor(2.0)),
+            (3, tensor(2.0)),
+            (3.0, tensor(2.0)),
+            (tensor(3), tensor(2.0)),
+        ],
+    )
+    @pytest.mark.skipif(**_MARK_TORCH_MIN_1_5)
+    def test_metrics_truediv(second_operand, expected_result):
+        first_metric = DummyMetric(6)
+    
+>       final_truediv = first_metric / second_operand
+
+test_composition.py:469: 
+_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
+../../paddlemetrics/metric.py:669: in __truediv__
+    return CompositionalMetric(B.true_divide, self, other)
+_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
+
+self = <paddleext.torchapi.ModuleProxy object at 0x130bfa040>
+args = ('true_divide',), kwargs = {}
+
+    def __getattribute__(self, *args, **kwargs):
+        # Perform custom logic here
+    
+>       obj = object.__getattribute__(this_module, *args, **kwargs)
+E       AttributeError: 'module' object has no attribute 'true_divide'
+
+../../../../../../torch2paddle/paddleext/torchapi/__init__.py:20: AttributeError
+___________________ test_metrics_truediv[3-expected_result1] ___________________
+
+second_operand = 3
+expected_result = Tensor(shape=[], dtype=float32, place=Place(cpu), stop_gradient=True,
+       2.)
+
+    @pytest.mark.parametrize(
+        ["second_operand", "expected_result"],
+        [
+            (DummyMetric(3), tensor(2.0)),
+            (3, tensor(2.0)),
+            (3.0, tensor(2.0)),
+            (tensor(3), tensor(2.0)),
+        ],
+    )
+    @pytest.mark.skipif(**_MARK_TORCH_MIN_1_5)
+    def test_metrics_truediv(second_operand, expected_result):
+        first_metric = DummyMetric(6)
+    
+>       final_truediv = first_metric / second_operand
+
+test_composition.py:469: 
+_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
+../../paddlemetrics/metric.py:669: in __truediv__
+    return CompositionalMetric(B.true_divide, self, other)
+_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
+
+self = <paddleext.torchapi.ModuleProxy object at 0x130bfa040>
+args = ('true_divide',), kwargs = {}
+
+    def __getattribute__(self, *args, **kwargs):
+        # Perform custom logic here
+    
+>       obj = object.__getattribute__(this_module, *args, **kwargs)
+E       AttributeError: 'module' object has no attribute 'true_divide'
+
+../../../../../../torch2paddle/paddleext/torchapi/__init__.py:20: AttributeError
+__________________ test_metrics_truediv[3.0-expected_result2] __________________
+
+second_operand = 3.0
+expected_result = Tensor(shape=[], dtype=float32, place=Place(cpu), stop_gradient=True,
+       2.)
+
+    @pytest.mark.parametrize(
+        ["second_operand", "expected_result"],
+        [
+            (DummyMetric(3), tensor(2.0)),
+            (3, tensor(2.0)),
+            (3.0, tensor(2.0)),
+            (tensor(3), tensor(2.0)),
+        ],
+    )
+    @pytest.mark.skipif(**_MARK_TORCH_MIN_1_5)
+    def test_metrics_truediv(second_operand, expected_result):
+        first_metric = DummyMetric(6)
+    
+>       final_truediv = first_metric / second_operand
+
+test_composition.py:469: 
+_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
+../../paddlemetrics/metric.py:669: in __truediv__
+    return CompositionalMetric(B.true_divide, self, other)
+_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
+
+self = <paddleext.torchapi.ModuleProxy object at 0x130bfa040>
+args = ('true_divide',), kwargs = {}
+
+    def __getattribute__(self, *args, **kwargs):
+        # Perform custom logic here
+    
+>       obj = object.__getattribute__(this_module, *args, **kwargs)
+E       AttributeError: 'module' object has no attribute 'true_divide'
+
+../../../../../../torch2paddle/paddleext/torchapi/__init__.py:20: AttributeError
+____________ test_metrics_truediv[second_operand3-expected_result3] ____________
+
+second_operand = Tensor(shape=[], dtype=int32, place=Place(cpu), stop_gradient=True,
+       3)
+expected_result = Tensor(shape=[], dtype=float32, place=Place(cpu), stop_gradient=True,
+       2.)
+
+    @pytest.mark.parametrize(
+        ["second_operand", "expected_result"],
+        [
+            (DummyMetric(3), tensor(2.0)),
+            (3, tensor(2.0)),
+            (3.0, tensor(2.0)),
+            (tensor(3), tensor(2.0)),
+        ],
+    )
+    @pytest.mark.skipif(**_MARK_TORCH_MIN_1_5)
+    def test_metrics_truediv(second_operand, expected_result):
+        first_metric = DummyMetric(6)
+    
+>       final_truediv = first_metric / second_operand
+
+test_composition.py:469: 
+_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
+../../paddlemetrics/metric.py:669: in __truediv__
+    return CompositionalMetric(B.true_divide, self, other)
+_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
+
+self = <paddleext.torchapi.ModuleProxy object at 0x130bfa040>
+args = ('true_divide',), kwargs = {}
+
+    def __getattribute__(self, *args, **kwargs):
+        # Perform custom logic here
+    
+>       obj = object.__getattribute__(this_module, *args, **kwargs)
+E       AttributeError: 'module' object has no attribute 'true_divide'
+
+../../../../../../torch2paddle/paddleext/torchapi/__init__.py:20: AttributeError
+______________ test_metrics_xor[second_operand1-expected_result1] ______________
+
+second_operand = Tensor(shape=[3], dtype=int64, place=Place(cpu), stop_gradient=True,
+       [1, 0, 3])
+expected_result = Tensor(shape=[3], dtype=int64, place=Place(cpu), stop_gradient=True,
+       [-2, -2,  0])
+
+    @pytest.mark.parametrize(
+        ["second_operand", "expected_result"],
+        [(DummyMetric([1, 0, 3]), tensor([-2, -2, 0])), (tensor([1, 0, 3]), tensor([-2, -2, 0]))],
+    )
+    @pytest.mark.skipif(**_MARK_TORCH_MIN_1_5)
+    def test_metrics_xor(second_operand, expected_result):
+        first_metric = DummyMetric([-1, -2, 3])
+    
+        final_xor = first_metric ^ second_operand
+>       final_rxor = second_operand ^ first_metric
+
+test_composition.py:485: 
+_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
+/Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/tensor/logic.py:616: in bitwise_xor
+    return _bitwise_op(
+_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
+
+op_name = 'bitwise_xor'
+x = Tensor(shape=[3], dtype=int64, place=Place(cpu), stop_gradient=True,
+       [1, 0, 3])
+y = DummyMetric(), out = None, name = None, binary_op = True
+
+    def _bitwise_op(op_name, x, y, out=None, name=None, binary_op=True):
+        if paddle.in_dynamic_mode():
+            op = getattr(_C_ops, op_name)
+            if binary_op:
+>               return op(x, y)
+E               ValueError: (InvalidArgument) bitwise_xor(): argument 'Y' (position 1) must be Tensor, but got DummyMetric (at /Users/paddle/work/pengyuqi/Paddle/paddle/fluid/pybind/op_function_common.cc:737)
+
+/Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/tensor/logic.py:505: ValueError
+_______________ test_metrics_getitem[value0-1-expected_result0] ________________
+
+value = [1, 2, 3], idx = 1
+expected_result = Tensor(shape=[], dtype=int32, place=Place(cpu), stop_gradient=True,
+       2)
+
+    @pytest.mark.parametrize(
+        ["value", "idx", "expected_result"],
+        [([1, 2, 3], 1, tensor(2)), ([[0, 1], [2, 3]], (1, 0), tensor(2)), ([[0, 1], [2, 3]], 1, tensor([2, 3]))],
+    )
+    def test_metrics_getitem(value, idx, expected_result):
+        first_metric = DummyMetric(value)
+    
+        final_getitem = first_metric[idx]
+        assert isinstance(final_getitem, CompositionalMetric)
+        final_getitem.update()
+>       assert B.allclose(expected_result, final_getitem.compute())
+
+test_composition.py:543: 
+_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
+../../../../../../torch2paddle/paddleext/torchapi/functional.py:308: in allclose
+    return paddle.allclose(input.float(), other.float(), rtol=rtol, atol=atol, equal_nan=equal_nan, name=name)
+_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
+
+x = Tensor(shape=[], dtype=float32, place=Place(cpu), stop_gradient=True,
+       2.)
+y = Tensor(shape=[1], dtype=float32, place=Place(cpu), stop_gradient=True,
+       [2.])
+rtol = 1e-05, atol = 1e-08, equal_nan = False, name = None
+
+    @templatedoc()
+    def allclose(x, y, rtol=1e-05, atol=1e-08, equal_nan=False, name=None):
+        """
+        ${comment}
+    
+        Args:
+            x(Tensor): ${input_comment}.
+            y(Tensor): ${other_comment}.
+            rtol(rtoltype, optional): The relative tolerance. Default: :math:`1e-5` .
+            atol(atoltype, optional): The absolute tolerance. Default: :math:`1e-8` .
+            equal_nan(equalnantype, optional): ${equal_nan_comment}.
+            name (str, optional): Name for the operation. For more information, please
+                refer to :ref:`api_guide_Name`. Default: None.
+    
+        Returns:
+            Tensor: ${out_comment}.
+    
+        Raises:
+            TypeError: The data type of ``x`` must be one of float32, float64.
+            TypeError: The data type of ``y`` must be one of float32, float64.
+            TypeError: The type of ``rtol`` must be float.
+            TypeError: The type of ``atol`` must be float.
+            TypeError: The type of ``equal_nan`` must be bool.
+    
+        Examples:
+            .. code-block:: python
+    
+              import paddle
+    
+              x = paddle.to_tensor([10000., 1e-07])
+              y = paddle.to_tensor([10000.1, 1e-08])
+              result1 = paddle.allclose(x, y, rtol=1e-05, atol=1e-08,
+                                      equal_nan=False, name="ignore_nan")
+              np_result1 = result1.numpy()
+              # [False]
+              result2 = paddle.allclose(x, y, rtol=1e-05, atol=1e-08,
+                                          equal_nan=True, name="equal_nan")
+              np_result2 = result2.numpy()
+              # [False]
+    
+              x = paddle.to_tensor([1.0, float('nan')])
+              y = paddle.to_tensor([1.0, float('nan')])
+              result1 = paddle.allclose(x, y, rtol=1e-05, atol=1e-08,
+                                      equal_nan=False, name="ignore_nan")
+              np_result1 = result1.numpy()
+              # [False]
+              result2 = paddle.allclose(x, y, rtol=1e-05, atol=1e-08,
+                                          equal_nan=True, name="equal_nan")
+              np_result2 = result2.numpy()
+              # [True]
+        """
+    
+        if in_dygraph_mode():
+            # NOTE(dev): Pass tol as Tensor to fix precision loss problem, because
+            # C++ backend will cast it into float32 if passing float from python.
+            as_tensor = lambda x: paddle.to_tensor([x], dtype='float64', place='cpu')
+            return _C_ops.final_state_allclose(x, y,
+                                               as_tensor(rtol),
+                                               as_tensor(atol), equal_nan)
+        if _in_legacy_dygraph():
+>           return _C_ops.allclose(x, y, 'rtol',
+                                   str(rtol), 'atol',
+                                   str(atol), 'equal_nan', equal_nan)
+E           RuntimeError: (PreconditionNotMet) Input(Input) and Input(Other) must have the same dimension size.
+E             [Hint: Expected input_dim.size() == other_dim.size(), but received input_dim.size():0 != other_dim.size():1.] (at /Users/paddle/work/pengyuqi/Paddle/paddle/phi/infermeta/binary.cc:39)
+E             [operator < allclose > error]
+
+/Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/tensor/logic.py:140: RuntimeError
+______________ test_metrics_getitem[value1-idx1-expected_result1] ______________
+
+value = [[0, 1], [2, 3]], idx = (1, 0)
+expected_result = Tensor(shape=[], dtype=int32, place=Place(cpu), stop_gradient=True,
+       2)
+
+    @pytest.mark.parametrize(
+        ["value", "idx", "expected_result"],
+        [([1, 2, 3], 1, tensor(2)), ([[0, 1], [2, 3]], (1, 0), tensor(2)), ([[0, 1], [2, 3]], 1, tensor([2, 3]))],
+    )
+    def test_metrics_getitem(value, idx, expected_result):
+        first_metric = DummyMetric(value)
+    
+        final_getitem = first_metric[idx]
+        assert isinstance(final_getitem, CompositionalMetric)
+        final_getitem.update()
+>       assert B.allclose(expected_result, final_getitem.compute())
+
+test_composition.py:543: 
+_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
+../../../../../../torch2paddle/paddleext/torchapi/functional.py:308: in allclose
+    return paddle.allclose(input.float(), other.float(), rtol=rtol, atol=atol, equal_nan=equal_nan, name=name)
+_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
+
+x = Tensor(shape=[], dtype=float32, place=Place(cpu), stop_gradient=True,
+       2.)
+y = Tensor(shape=[1], dtype=float32, place=Place(cpu), stop_gradient=True,
+       [2.])
+rtol = 1e-05, atol = 1e-08, equal_nan = False, name = None
+
+    @templatedoc()
+    def allclose(x, y, rtol=1e-05, atol=1e-08, equal_nan=False, name=None):
+        """
+        ${comment}
+    
+        Args:
+            x(Tensor): ${input_comment}.
+            y(Tensor): ${other_comment}.
+            rtol(rtoltype, optional): The relative tolerance. Default: :math:`1e-5` .
+            atol(atoltype, optional): The absolute tolerance. Default: :math:`1e-8` .
+            equal_nan(equalnantype, optional): ${equal_nan_comment}.
+            name (str, optional): Name for the operation. For more information, please
+                refer to :ref:`api_guide_Name`. Default: None.
+    
+        Returns:
+            Tensor: ${out_comment}.
+    
+        Raises:
+            TypeError: The data type of ``x`` must be one of float32, float64.
+            TypeError: The data type of ``y`` must be one of float32, float64.
+            TypeError: The type of ``rtol`` must be float.
+            TypeError: The type of ``atol`` must be float.
+            TypeError: The type of ``equal_nan`` must be bool.
+    
+        Examples:
+            .. code-block:: python
+    
+              import paddle
+    
+              x = paddle.to_tensor([10000., 1e-07])
+              y = paddle.to_tensor([10000.1, 1e-08])
+              result1 = paddle.allclose(x, y, rtol=1e-05, atol=1e-08,
+                                      equal_nan=False, name="ignore_nan")
+              np_result1 = result1.numpy()
+              # [False]
+              result2 = paddle.allclose(x, y, rtol=1e-05, atol=1e-08,
+                                          equal_nan=True, name="equal_nan")
+              np_result2 = result2.numpy()
+              # [False]
+    
+              x = paddle.to_tensor([1.0, float('nan')])
+              y = paddle.to_tensor([1.0, float('nan')])
+              result1 = paddle.allclose(x, y, rtol=1e-05, atol=1e-08,
+                                      equal_nan=False, name="ignore_nan")
+              np_result1 = result1.numpy()
+              # [False]
+              result2 = paddle.allclose(x, y, rtol=1e-05, atol=1e-08,
+                                          equal_nan=True, name="equal_nan")
+              np_result2 = result2.numpy()
+              # [True]
+        """
+    
+        if in_dygraph_mode():
+            # NOTE(dev): Pass tol as Tensor to fix precision loss problem, because
+            # C++ backend will cast it into float32 if passing float from python.
+            as_tensor = lambda x: paddle.to_tensor([x], dtype='float64', place='cpu')
+            return _C_ops.final_state_allclose(x, y,
+                                               as_tensor(rtol),
+                                               as_tensor(atol), equal_nan)
+        if _in_legacy_dygraph():
+>           return _C_ops.allclose(x, y, 'rtol',
+                                   str(rtol), 'atol',
+                                   str(atol), 'equal_nan', equal_nan)
+E           RuntimeError: (PreconditionNotMet) Input(Input) and Input(Other) must have the same dimension size.
+E             [Hint: Expected input_dim.size() == other_dim.size(), but received input_dim.size():0 != other_dim.size():1.] (at /Users/paddle/work/pengyuqi/Paddle/paddle/phi/infermeta/binary.cc:39)
+E             [operator < allclose > error]
+
+/Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/tensor/logic.py:140: RuntimeError
+=============================== warnings summary ===============================
+../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:19
+  /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:19: DeprecationWarning: Call to deprecated create function FileDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool.
+    DESCRIPTOR = _descriptor.FileDescriptor(
+
+../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:33
+  /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:33: DeprecationWarning: Call to deprecated create function EnumValueDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool.
+    _descriptor.EnumValueDescriptor(
+
+../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:37
+  /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:37: DeprecationWarning: Call to deprecated create function EnumValueDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool.
+    _descriptor.EnumValueDescriptor(
+
+../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:41
+  /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:41: DeprecationWarning: Call to deprecated create function EnumValueDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool.
+    _descriptor.EnumValueDescriptor(
+
+../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:45
+  /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:45: DeprecationWarning: Call to deprecated create function EnumValueDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool.
+    _descriptor.EnumValueDescriptor(
+
+../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:49
+  /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:49: DeprecationWarning: Call to deprecated create function EnumValueDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool.
+    _descriptor.EnumValueDescriptor(
+
+../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:53
+  /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:53: DeprecationWarning: Call to deprecated create function EnumValueDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool.
+    _descriptor.EnumValueDescriptor(
+
+../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:57
+  /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:57: DeprecationWarning: Call to deprecated create function EnumValueDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool.
+    _descriptor.EnumValueDescriptor(
+
+../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:61
+  /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:61: DeprecationWarning: Call to deprecated create function EnumValueDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool.
+    _descriptor.EnumValueDescriptor(
+
+../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:65
+  /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:65: DeprecationWarning: Call to deprecated create function EnumValueDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool.
+    _descriptor.EnumValueDescriptor(
+
+../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:69
+  /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:69: DeprecationWarning: Call to deprecated create function EnumValueDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool.
+    _descriptor.EnumValueDescriptor(
+
+../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:73
+  /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:73: DeprecationWarning: Call to deprecated create function EnumValueDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool.
+    _descriptor.EnumValueDescriptor(
+
+../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:77
+  /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:77: DeprecationWarning: Call to deprecated create function EnumValueDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool.
+    _descriptor.EnumValueDescriptor(
+
+../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:81
+  /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:81: DeprecationWarning: Call to deprecated create function EnumValueDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool.
+    _descriptor.EnumValueDescriptor(
+
+../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:27
+  /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:27: DeprecationWarning: Call to deprecated create function EnumDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool.
+    _ATTRTYPE = _descriptor.EnumDescriptor(
+
+../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:115
+  /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:115: DeprecationWarning: Call to deprecated create function EnumValueDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool.
+    _descriptor.EnumValueDescriptor(
+
+../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:119
+  /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:119: DeprecationWarning: Call to deprecated create function EnumValueDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool.
+    _descriptor.EnumValueDescriptor(
+
+../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:123
+  /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:123: DeprecationWarning: Call to deprecated create function EnumValueDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool.
+    _descriptor.EnumValueDescriptor(
+
+../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:127
+  /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:127: DeprecationWarning: Call to deprecated create function EnumValueDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool.
+    _descriptor.EnumValueDescriptor(
+
+../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:131
+  /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:131: DeprecationWarning: Call to deprecated create function EnumValueDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool.
+    _descriptor.EnumValueDescriptor(
+
+../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:135
+  /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:135: DeprecationWarning: Call to deprecated create function EnumValueDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool.
+    _descriptor.EnumValueDescriptor(
+
+../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:139
+  /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:139: DeprecationWarning: Call to deprecated create function EnumValueDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool.
+    _descriptor.EnumValueDescriptor(
+
+../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:143
+  /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:143: DeprecationWarning: Call to deprecated create function EnumValueDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool.
+    _descriptor.EnumValueDescriptor(
+
+../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:147
+  /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:147: DeprecationWarning: Call to deprecated create function EnumValueDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool.
+    _descriptor.EnumValueDescriptor(
+
+../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:151
+  /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:151: DeprecationWarning: Call to deprecated create function EnumValueDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool.
+    _descriptor.EnumValueDescriptor(
+
+../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:155
+  /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:155: DeprecationWarning: Call to deprecated create function EnumValueDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool.
+    _descriptor.EnumValueDescriptor(
+
+../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:159
+  /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:159: DeprecationWarning: Call to deprecated create function EnumValueDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool.
+    _descriptor.EnumValueDescriptor(
+
+../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:163
+  /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:163: DeprecationWarning: Call to deprecated create function EnumValueDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool.
+    _descriptor.EnumValueDescriptor(
+
+../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:167
+  /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:167: DeprecationWarning: Call to deprecated create function EnumValueDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool.
+    _descriptor.EnumValueDescriptor(
+
+../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:171
+  /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:171: DeprecationWarning: Call to deprecated create function EnumValueDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool.
+    _descriptor.EnumValueDescriptor(
+
+../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:175
+  /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:175: DeprecationWarning: Call to deprecated create function EnumValueDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool.
+    _descriptor.EnumValueDescriptor(
+
+../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:179
+  /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:179: DeprecationWarning: Call to deprecated create function EnumValueDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool.
+    _descriptor.EnumValueDescriptor(
+
+../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:183
+  /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:183: DeprecationWarning: Call to deprecated create function EnumValueDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool.
+    _descriptor.EnumValueDescriptor(
+
+../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:187
+  /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:187: DeprecationWarning: Call to deprecated create function EnumValueDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool.
+    _descriptor.EnumValueDescriptor(
+
+../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:191
+  /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:191: DeprecationWarning: Call to deprecated create function EnumValueDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool.
+    _descriptor.EnumValueDescriptor(
+
+../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:195
+  /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:195: DeprecationWarning: Call to deprecated create function EnumValueDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool.
+    _descriptor.EnumValueDescriptor(
+
+../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:199
+  /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:199: DeprecationWarning: Call to deprecated create function EnumValueDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool.
+    _descriptor.EnumValueDescriptor(
+
+../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:203
+  /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:203: DeprecationWarning: Call to deprecated create function EnumValueDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool.
+    _descriptor.EnumValueDescriptor(
+
+../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:207
+  /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:207: DeprecationWarning: Call to deprecated create function EnumValueDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool.
+    _descriptor.EnumValueDescriptor(
+
+../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:211
+  /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:211: DeprecationWarning: Call to deprecated create function EnumValueDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool.
+    _descriptor.EnumValueDescriptor(
+
+../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:215
+  /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:215: DeprecationWarning: Call to deprecated create function EnumValueDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool.
+    _descriptor.EnumValueDescriptor(
+
+../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:219
+  /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:219: DeprecationWarning: Call to deprecated create function EnumValueDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool.
+    _descriptor.EnumValueDescriptor(
+
+../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:223
+  /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:223: DeprecationWarning: Call to deprecated create function EnumValueDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool.
+    _descriptor.EnumValueDescriptor(
+
+../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:227
+  /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:227: DeprecationWarning: Call to deprecated create function EnumValueDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool.
+    _descriptor.EnumValueDescriptor(
+
+../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:109
+  /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:109: DeprecationWarning: Call to deprecated create function EnumDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool.
+    _VARTYPE_TYPE = _descriptor.EnumDescriptor(
+
+../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:247
+  /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:247: DeprecationWarning: Call to deprecated create function FieldDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool.
+    _descriptor.FieldDescriptor(
+
+../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:240
+  /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:240: DeprecationWarning: Call to deprecated create function Descriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool.
+    _VERSION = _descriptor.Descriptor(
+
+../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:278
+  /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:278: DeprecationWarning: Call to deprecated create function FieldDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool.
+    _descriptor.FieldDescriptor(
+
+../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:285
+  /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:285: DeprecationWarning: Call to deprecated create function FieldDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool.
+    _descriptor.FieldDescriptor(
+
+../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:292
+  /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:292: DeprecationWarning: Call to deprecated create function FieldDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool.
+    _descriptor.FieldDescriptor(
+
+../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:299
+  /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:299: DeprecationWarning: Call to deprecated create function FieldDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool.
+    _descriptor.FieldDescriptor(
+
+../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:271
+  /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:271: DeprecationWarning: Call to deprecated create function Descriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool.
+    _PROCESSMESHDESC = _descriptor.Descriptor(
+
+../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:330
+  /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:330: DeprecationWarning: Call to deprecated create function FieldDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool.
+    _descriptor.FieldDescriptor(
+
+../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:337
+  /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:337: DeprecationWarning: Call to deprecated create function FieldDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool.
+    _descriptor.FieldDescriptor(
+
+../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:344
+  /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:344: DeprecationWarning: Call to deprecated create function FieldDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool.
+    _descriptor.FieldDescriptor(
+
+../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:351
+  /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:351: DeprecationWarning: Call to deprecated create function FieldDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool.
+    _descriptor.FieldDescriptor(
+
+../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:358
+  /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:358: DeprecationWarning: Call to deprecated create function FieldDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool.
+    _descriptor.FieldDescriptor(
+
+../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:365
+  /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:365: DeprecationWarning: Call to deprecated create function FieldDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool.
+    _descriptor.FieldDescriptor(
+
+../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:372
+  /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:372: DeprecationWarning: Call to deprecated create function FieldDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool.
+    _descriptor.FieldDescriptor(
+
+../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:379
+  /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:379: DeprecationWarning: Call to deprecated create function FieldDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool.
+    _descriptor.FieldDescriptor(
+
+../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:386
+  /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:386: DeprecationWarning: Call to deprecated create function FieldDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool.
+    _descriptor.FieldDescriptor(
+
+../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:393
+  /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:393: DeprecationWarning: Call to deprecated create function FieldDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool.
+    _descriptor.FieldDescriptor(
+
+../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:400
+  /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:400: DeprecationWarning: Call to deprecated create function FieldDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool.
+    _descriptor.FieldDescriptor(
+
+../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:407
+  /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:407: DeprecationWarning: Call to deprecated create function FieldDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool.
+    _descriptor.FieldDescriptor(
+
+../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:414
+  /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:414: DeprecationWarning: Call to deprecated create function FieldDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool.
+    _descriptor.FieldDescriptor(
+
+../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:421
+  /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:421: DeprecationWarning: Call to deprecated create function FieldDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool.
+    _descriptor.FieldDescriptor(
+
+../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:428
+  /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:428: DeprecationWarning: Call to deprecated create function FieldDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool.
+    _descriptor.FieldDescriptor(
+
+../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:323
+  /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:323: DeprecationWarning: Call to deprecated create function Descriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool.
+    _OPDESC_ATTR = _descriptor.Descriptor(
+
+../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:458
+  /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:458: DeprecationWarning: Call to deprecated create function FieldDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool.
+    _descriptor.FieldDescriptor(
+
+../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:465
+  /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:465: DeprecationWarning: Call to deprecated create function FieldDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool.
+    _descriptor.FieldDescriptor(
+
+../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:451
+  /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:451: DeprecationWarning: Call to deprecated create function Descriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool.
+    _OPDESC_VAR = _descriptor.Descriptor(
+
+../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:495
+  /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:495: DeprecationWarning: Call to deprecated create function FieldDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool.
+    _descriptor.FieldDescriptor(
+
+../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:502
+  /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:502: DeprecationWarning: Call to deprecated create function FieldDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool.
+    _descriptor.FieldDescriptor(
+
+../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:509
+  /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:509: DeprecationWarning: Call to deprecated create function FieldDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool.
+    _descriptor.FieldDescriptor(
+
+../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:516
+  /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:516: DeprecationWarning: Call to deprecated create function FieldDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool.
+    _descriptor.FieldDescriptor(
+
+../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:523
+  /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:523: DeprecationWarning: Call to deprecated create function FieldDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool.
+    _descriptor.FieldDescriptor(
+
+../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:488
+  /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:488: DeprecationWarning: Call to deprecated create function Descriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool.
+    _OPDESC = _descriptor.Descriptor(
+
+../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:554
+  /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:554: DeprecationWarning: Call to deprecated create function FieldDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool.
+    _descriptor.FieldDescriptor(
+
+../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:561
+  /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:561: DeprecationWarning: Call to deprecated create function FieldDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool.
+    _descriptor.FieldDescriptor(
+
+../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:568
+  /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:568: DeprecationWarning: Call to deprecated create function FieldDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool.
+    _descriptor.FieldDescriptor(
+
+../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:575
+  /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:575: DeprecationWarning: Call to deprecated create function FieldDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool.
+    _descriptor.FieldDescriptor(
+
+../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:582
+  /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:582: DeprecationWarning: Call to deprecated create function FieldDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool.
+    _descriptor.FieldDescriptor(
+
+../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:589
+  /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:589: DeprecationWarning: Call to deprecated create function FieldDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool.
+    _descriptor.FieldDescriptor(
+
+../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:596
+  /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:596: DeprecationWarning: Call to deprecated create function FieldDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool.
+    _descriptor.FieldDescriptor(
+
+../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:547
+  /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:547: DeprecationWarning: Call to deprecated create function Descriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool.
+    _OPPROTO_VAR = _descriptor.Descriptor(
+
+../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:626
+  /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:626: DeprecationWarning: Call to deprecated create function FieldDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool.
+    _descriptor.FieldDescriptor(
+
+../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:633
+  /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:633: DeprecationWarning: Call to deprecated create function FieldDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool.
+    _descriptor.FieldDescriptor(
+
+../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:640
+  /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:640: DeprecationWarning: Call to deprecated create function FieldDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool.
+    _descriptor.FieldDescriptor(
+
+../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:647
+  /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:647: DeprecationWarning: Call to deprecated create function FieldDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool.
+    _descriptor.FieldDescriptor(
+
+../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:654
+  /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:654: DeprecationWarning: Call to deprecated create function FieldDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool.
+    _descriptor.FieldDescriptor(
+
+../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:661
+  /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:661: DeprecationWarning: Call to deprecated create function FieldDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool.
+    _descriptor.FieldDescriptor(
+
+../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:619
+  /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:619: DeprecationWarning: Call to deprecated create function Descriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool.
+    _OPPROTO_ATTR = _descriptor.Descriptor(
+
+../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:691
+  /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:691: DeprecationWarning: Call to deprecated create function FieldDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool.
+    _descriptor.FieldDescriptor(
+
+../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:698
+  /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:698: DeprecationWarning: Call to deprecated create function FieldDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool.
+    _descriptor.FieldDescriptor(
+
+../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:705
+  /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:705: DeprecationWarning: Call to deprecated create function FieldDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool.
+    _descriptor.FieldDescriptor(
+
+../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:712
+  /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:712: DeprecationWarning: Call to deprecated create function FieldDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool.
+    _descriptor.FieldDescriptor(
+
+../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:719
+  /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:719: DeprecationWarning: Call to deprecated create function FieldDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool.
+    _descriptor.FieldDescriptor(
+
+../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:684
+  /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:684: DeprecationWarning: Call to deprecated create function Descriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool.
+    _OPPROTO = _descriptor.Descriptor(
+
+../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:750
+  /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:750: DeprecationWarning: Call to deprecated create function FieldDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool.
+    _descriptor.FieldDescriptor(
+
+../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:757
+  /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:757: DeprecationWarning: Call to deprecated create function FieldDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool.
+    _descriptor.FieldDescriptor(
+
+../../../../../../../.envs/oix/lib/python3.8/site-packages/comet_ml/monkey_patching.py:19
+  /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/comet_ml/monkey_patching.py:19: DeprecationWarning: the imp module is deprecated in favour of importlib; see the module's documentation for alternative uses
+    import imp
+
+../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/framework.py:1104
+test_composition.py::test_metrics_and[second_operand3-expected_result3]
+test_composition.py::test_metrics_or[second_operand1-expected_result1]
+  /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/framework.py:1104: DeprecationWarning: `np.bool` is a deprecated alias for the builtin `bool`. To silence this warning, use `bool` by itself. Doing this will not modify any behavior and is safe. If you specifically wanted the numpy scalar type, use `np.bool_` here.
+  Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
+    elif dtype == np.bool:
+
+../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/tensor/creation.py:125: 1 warning
+test_composition.py: 10 warnings
+  /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/tensor/creation.py:125: DeprecationWarning: `np.object` is a deprecated alias for the builtin `object`. To silence this warning, use `object` by itself. Doing this will not modify any behavior and is safe. 
+  Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
+    if data.dtype == np.object:
+
+test_composition.py::test_metrics_eq[2.0-expected_result2]
+test_composition.py::test_metrics_ge[2.0-expected_result2]
+test_composition.py::test_metrics_gt[2.0-expected_result2]
+test_composition.py::test_metrics_le[2.0-expected_result2]
+test_composition.py::test_metrics_lt[2.0-expected_result2]
+  /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/dygraph/math_op_patch.py:276: UserWarning: The dtype of left and right variables are not the same, left dtype is paddle.int32, but right dtype is paddle.float32, the right dtype will convert to paddle.int32
+    warnings.warn(
+
+-- Docs: https://docs.pytest.org/en/stable/how-to/capture-warnings.html
+=========================== short test summary info ============================
+FAILED test_composition.py::test_metrics_and[3-expected_result1] - ValueError...
+FAILED test_composition.py::test_metrics_and[3-expected_result2] - ValueError...
+FAILED test_composition.py::test_metrics_and[second_operand3-expected_result3]
+FAILED test_composition.py::test_metrics_floordiv[2-expected_result1] - Value...
+FAILED test_composition.py::test_metrics_floordiv[2.0-expected_result2] - Val...
+FAILED test_composition.py::test_metrics_matmul[second_operand0-expected_result0]
+FAILED test_composition.py::test_metrics_matmul[second_operand1-expected_result1]
+FAILED test_composition.py::test_metrics_mod[second_operand0-expected_result0]
+FAILED test_composition.py::test_metrics_mod[2-expected_result1] - AttributeE...
+FAILED test_composition.py::test_metrics_mod[2.0-expected_result2] - Attribut...
+FAILED test_composition.py::test_metrics_mod[second_operand3-expected_result3]
+FAILED test_composition.py::test_metrics_mul[second_operand0-expected_result0]
+FAILED test_composition.py::test_metrics_mul[2-expected_result1] - AttributeE...
+FAILED test_composition.py::test_metrics_mul[2.0-expected_result2] - Attribut...
+FAILED test_composition.py::test_metrics_mul[second_operand3-expected_result3]
+FAILED test_composition.py::test_metrics_ne[second_operand0-expected_result0]
+FAILED test_composition.py::test_metrics_ne[2-expected_result1] - AttributeEr...
+FAILED test_composition.py::test_metrics_ne[2.0-expected_result2] - Attribute...
+FAILED test_composition.py::test_metrics_ne[second_operand3-expected_result3]
+FAILED test_composition.py::test_metrics_or[second_operand1-expected_result1]
+FAILED test_composition.py::test_metrics_rfloordiv[5-expected_result0] - Valu...
+FAILED test_composition.py::test_metrics_rfloordiv[5.0-expected_result1] - Va...
+FAILED test_composition.py::test_metrics_rfloordiv[first_operand2-expected_result2]
+FAILED test_composition.py::test_metrics_rmatmul[first_operand0-expected_result0]
+FAILED test_composition.py::test_metrics_rmod[first_operand0-expected_result0]
+FAILED test_composition.py::test_metrics_rpow[2-expected_result1] - ValueErro...
+FAILED test_composition.py::test_metrics_rpow[2.0-expected_result2] - ValueEr...
+FAILED test_composition.py::test_metrics_rsub[first_operand0-expected_result0]
+FAILED test_composition.py::test_metrics_rsub[3-expected_result1] - Attribute...
+FAILED test_composition.py::test_metrics_rsub[3.0-expected_result2] - Attribu...
+FAILED test_composition.py::test_metrics_rsub[first_operand3-expected_result3]
+FAILED test_composition.py::test_metrics_rtruediv[first_operand0-expected_result0]
+FAILED test_composition.py::test_metrics_rtruediv[6-expected_result1] - Attri...
+FAILED test_composition.py::test_metrics_rtruediv[6.0-expected_result2] - Att...
+FAILED test_composition.py::test_metrics_rtruediv[first_operand3-expected_result3]
+FAILED test_composition.py::test_metrics_sub[second_operand0-expected_result0]
+FAILED test_composition.py::test_metrics_sub[2-expected_result1] - AttributeE...
+FAILED test_composition.py::test_metrics_sub[2.0-expected_result2] - Attribut...
+FAILED test_composition.py::test_metrics_sub[second_operand3-expected_result3]
+FAILED test_composition.py::test_metrics_truediv[second_operand0-expected_result0]
+FAILED test_composition.py::test_metrics_truediv[3-expected_result1] - Attrib...
+FAILED test_composition.py::test_metrics_truediv[3.0-expected_result2] - Attr...
+FAILED test_composition.py::test_metrics_truediv[second_operand3-expected_result3]
+FAILED test_composition.py::test_metrics_xor[second_operand1-expected_result1]
+FAILED test_composition.py::test_metrics_getitem[value0-1-expected_result0]
+FAILED test_composition.py::test_metrics_getitem[value1-idx1-expected_result1]
+================= 46 failed, 40 passed, 120 warnings in 2.37s ==================
diff --git a/RE/paddlemetric/src/tests/bases/test_aggregation.py b/RE/paddlemetric/src/tests/bases/test_aggregation.py
new file mode 100644
index 00000000..559ddad0
--- /dev/null
+++ b/RE/paddlemetric/src/tests/bases/test_aggregation.py
@@ -0,0 +1,166 @@
+import numpy as np
+import pytest
+import paddleext.torchapi as B
+
+from ..helpers.testers import BATCH_SIZE, NUM_BATCHES, MetricTester
+from paddlemetrics.aggregation import CatMetric, MaxMetric, MeanMetric, MinMetric, SumMetric
+
+
+def compare_mean(values, weights):
+    """reference implementation for mean aggregation."""
+    return np.average(values.numpy(), weights=weights)
+
+
+def compare_sum(values, weights):
+    """reference implementation for sum aggregation."""
+    return np.sum(values.numpy())
+
+
+def compare_min(values, weights):
+    """reference implementation for min aggregation."""
+    return np.min(values.numpy())
+
+
+def compare_max(values, weights):
+    """reference implementation for max aggregation."""
+    return np.max(values.numpy())
+
+
+# wrap all other than mean metric to take an additional argument
+# this lets them fit into the testing framework
+class WrappedMinMetric(MinMetric):
+    """Wrapped min metric."""
+
+    def update(self, values, weights):
+        """only pass values on."""
+        super().update(values)
+
+
+class WrappedMaxMetric(MaxMetric):
+    """Wrapped max metric."""
+
+    def update(self, values, weights):
+        """only pass values on."""
+        super().update(values)
+
+
+class WrappedSumMetric(SumMetric):
+    """Wrapped min metric."""
+
+    def update(self, values, weights):
+        """only pass values on."""
+        super().update(values)
+
+
+class WrappedCatMetric(CatMetric):
+    """Wrapped cat metric."""
+
+    def update(self, values, weights):
+        """only pass values on."""
+        super().update(values)
+
+
+@pytest.mark.parametrize(
+    "values, weights",
+    [
+        (B.rand(NUM_BATCHES, BATCH_SIZE), B.ones(NUM_BATCHES, BATCH_SIZE)),
+        (B.rand(NUM_BATCHES, BATCH_SIZE), B.rand(NUM_BATCHES, BATCH_SIZE) > 0.5),
+        (B.rand(NUM_BATCHES, BATCH_SIZE, 2), B.rand(NUM_BATCHES, BATCH_SIZE, 2) > 0.5),
+    ],
+)
+@pytest.mark.parametrize(
+    "metric_class, compare_fn",
+    [
+        (WrappedMinMetric, compare_min),
+        (WrappedMaxMetric, compare_max),
+        (WrappedSumMetric, compare_sum),
+        (MeanMetric, compare_mean),
+    ],
+)
+class TestAggregation(MetricTester):
+    """Test aggregation metrics."""
+
+    @pytest.mark.parametrize("ddp", [False])
+    @pytest.mark.parametrize("dist_sync_on_step", [False])
+    def test_aggreagation(self, ddp, dist_sync_on_step, metric_class, compare_fn, values, weights):
+        """test modular implementation."""
+        self.run_class_metric_test(
+            ddp=ddp,
+            dist_sync_on_step=dist_sync_on_step,
+            metric_class=metric_class,
+            sk_metric=compare_fn,
+            check_scriptable=True,
+            # Abuse of names here
+            preds=values,
+            target=weights,
+        )
+
+
+_case1 = float("nan") * B.ones(5)
+_case2 = B.tensor([1.0, 2.0, float("nan"), 4.0, 5.0])
+
+
+@pytest.mark.parametrize("value", [_case1, _case2])
+@pytest.mark.parametrize("nan_strategy", ["error", "warn"])
+@pytest.mark.parametrize("metric_class", [MinMetric, MaxMetric, SumMetric, MeanMetric, CatMetric])
+def test_nan_error(value, nan_strategy, metric_class):
+    """test correct errors are raised."""
+    metric = metric_class(nan_strategy=nan_strategy)
+    if nan_strategy == "error":
+        with pytest.raises(RuntimeError, match="Encounted `nan` values in tensor"):
+            metric(value.clone())
+    elif nan_strategy == "warn":
+        with pytest.warns(UserWarning, match="Encounted `nan` values in tensor"):
+            metric(value.clone())
+
+
+@pytest.mark.parametrize(
+    "metric_class, nan_strategy, value, expected",
+    [
+        (MinMetric, "ignore", _case1, B.tensor(float("inf"))),
+        (MinMetric, 2.0, _case1, 2.0),
+        (MinMetric, "ignore", _case2, 1.0),
+        (MinMetric, 2.0, _case2, 1.0),
+        (MaxMetric, "ignore", _case1, -B.tensor(float("inf"))),
+        (MaxMetric, 2.0, _case1, 2.0),
+        (MaxMetric, "ignore", _case2, 5.0),
+        (MaxMetric, 2.0, _case2, 5.0),
+        (SumMetric, "ignore", _case1, 0.0),
+        (SumMetric, 2.0, _case1, 10.0),
+        (SumMetric, "ignore", _case2, 12.0),
+        (SumMetric, 2.0, _case2, 14.0),
+        (MeanMetric, "ignore", _case1, 0.0),
+        (MeanMetric, 2.0, _case1, 2.0),
+        (MeanMetric, "ignore", _case2, 3.0),
+        (MeanMetric, 2.0, _case2, 2.8),
+        (CatMetric, "ignore", _case1, []),
+        (CatMetric, 2.0, _case1, B.tensor([2.0, 2.0, 2.0, 2.0, 2.0])),
+        (CatMetric, "ignore", _case2, B.tensor([1.0, 2.0, 4.0, 5.0])),
+        (CatMetric, 2.0, _case2, B.tensor([1.0, 2.0, 2.0, 4.0, 5.0])),
+    ],
+)
+def test_nan_expected(metric_class, nan_strategy, value, expected):
+    """test that nan values are handled correctly."""
+    metric = metric_class(nan_strategy=nan_strategy)
+    metric.update(value.clone())
+    out = metric.compute()
+    assert np.allclose(out, expected, equal_nan=True)
+
+
+@pytest.mark.parametrize("metric_class", [MinMetric, MaxMetric, SumMetric, MeanMetric, CatMetric])
+def test_error_on_wrong_nan_strategy(metric_class):
+    """test error raised on wrong nan_strategy argument."""
+    with pytest.raises(ValueError, match="Arg `nan_strategy` should either .*"):
+        metric_class(nan_strategy=[])
+
+
+# @pytest.mark.skipif(not hasattr(torch, "broadcast_to"), reason="PyTorch <1.8 does not have broadcast_to")
+@pytest.mark.parametrize(
+    "weights, expected", [(1, 11.5), (B.ones(2, 1, 1), 11.5), (B.tensor([1, 2]).reshape(2, 1, 1), 13.5)]
+)
+def test_mean_metric_broadcasting(weights, expected):
+    """check that weight broadcasting works for mean metric."""
+    values = B.arange(24).reshape(2, 3, 4)
+    avg = MeanMetric()
+
+    assert avg(values, weights) == expected
diff --git a/RE/paddlemetric/src/tests/bases/test_collections.py b/RE/paddlemetric/src/tests/bases/test_collections.py
new file mode 100644
index 00000000..d92234f8
--- /dev/null
+++ b/RE/paddlemetric/src/tests/bases/test_collections.py
@@ -0,0 +1,251 @@
+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import pickle
+
+import pytest
+import paddleext.torchapi as B
+
+from ..helpers import seed_all
+from ..helpers.testers import DummyMetricDiff, DummyMetricSum
+from paddlemetrics.collections import MetricCollection
+
+seed_all(42)
+
+
+def test_metric_collection(tmpdir):
+    m1 = DummyMetricSum()
+    m2 = DummyMetricDiff()
+
+    metric_collection = MetricCollection([m1, m2])
+
+    # Test correct dict structure
+    assert len(metric_collection) == 2
+    assert metric_collection["DummyMetricSum"] == m1
+    assert metric_collection["DummyMetricDiff"] == m2
+
+    # Test correct initialization
+    for name, metric in metric_collection.items():
+        assert metric.x == 0, f"Metric {name} not initialized correctly"
+
+    # Test every metric gets updated
+    metric_collection.update(5)
+    for name, metric in metric_collection.items():
+        assert metric.x.abs() == 5, f"Metric {name} not updated correctly"
+
+    # Test compute on each metric
+    metric_collection.update(-5)
+    metric_vals = metric_collection.compute()
+    assert len(metric_vals) == 2
+    for name, metric_val in metric_vals.items():
+        assert metric_val == 0, f"Metric {name}.compute not called correctly"
+
+    # Test that everything is reset
+    for name, metric in metric_collection.items():
+        assert metric.x == 0, f"Metric {name} not reset correctly"
+
+    # Test pickable
+    metric_pickled = pickle.dumps(metric_collection)
+    metric_loaded = pickle.loads(metric_pickled)
+    assert isinstance(metric_loaded, MetricCollection)
+
+
+@pytest.mark.skipif(not B.cuda.is_available(), reason="Test requires GPU.")
+def test_device_and_dtype_transfer_metriccollection(tmpdir):
+    m1 = DummyMetricSum()
+    m2 = DummyMetricDiff()
+
+    metric_collection = MetricCollection([m1, m2])
+    for _, metric in metric_collection.items():
+        assert metric.x.is_cuda is False
+        assert metric.x.dtype == B.float32
+
+    metric_collection = metric_collection.to(device="cuda")
+    for _, metric in metric_collection.items():
+        assert metric.x.is_cuda
+
+    metric_collection = metric_collection.double()
+    for _, metric in metric_collection.items():
+        assert metric.x.dtype == B.float64
+
+    metric_collection = metric_collection.half()
+    for _, metric in metric_collection.items():
+        assert metric.x.dtype == B.float16
+
+
+def test_metric_collection_wrong_input(tmpdir):
+    """Check that errors are raised on wrong input."""
+    dms = DummyMetricSum()
+
+    # Not all input are metrics (list)
+    with pytest.raises(ValueError):
+        _ = MetricCollection([dms, 5])
+
+    # Not all input are metrics (dict)
+    with pytest.raises(ValueError):
+        _ = MetricCollection({"metric1": dms, "metric2": 5})
+
+    # Same metric passed in multiple times
+    with pytest.raises(ValueError, match="Encountered two metrics both named *."):
+        _ = MetricCollection([dms, dms])
+
+    # Not a list or dict passed in
+    with pytest.warns(Warning, match=" which are not `Metric` so they will be ignored."):
+        _ = MetricCollection(dms, [dms])
+
+
+def test_metric_collection_args_kwargs(tmpdir):
+    """Check that args and kwargs gets passed correctly in metric collection, Checks both update and forward
+    method."""
+    m1 = DummyMetricSum()
+    m2 = DummyMetricDiff()
+
+    metric_collection = MetricCollection([m1, m2])
+
+    # args gets passed to all metrics
+    metric_collection.update(5)
+    assert metric_collection["DummyMetricSum"].x == 5
+    assert metric_collection["DummyMetricDiff"].x == -5
+    metric_collection.reset()
+    _ = metric_collection(5)
+    assert metric_collection["DummyMetricSum"].x == 5
+    assert metric_collection["DummyMetricDiff"].x == -5
+    metric_collection.reset()
+
+    # kwargs gets only passed to metrics that it matches
+    metric_collection.update(x=10, y=20)
+    assert metric_collection["DummyMetricSum"].x == 10
+    assert metric_collection["DummyMetricDiff"].x == -20
+    metric_collection.reset()
+    _ = metric_collection(x=10, y=20)
+    assert metric_collection["DummyMetricSum"].x == 10
+    assert metric_collection["DummyMetricDiff"].x == -20
+
+
+@pytest.mark.parametrize(
+    "prefix, postfix",
+    [
+        [None, None],
+        ["prefix_", None],
+        [None, "_postfix"],
+        ["prefix_", "_postfix"],
+    ],
+)
+def test_metric_collection_prefix_postfix_args(prefix, postfix):
+    """Test that the prefix arg alters the keywords in the output."""
+    m1 = DummyMetricSum()
+    m2 = DummyMetricDiff()
+    names = ["DummyMetricSum", "DummyMetricDiff"]
+    names = [prefix + n if prefix is not None else n for n in names]
+    names = [n + postfix if postfix is not None else n for n in names]
+
+    metric_collection = MetricCollection([m1, m2], prefix=prefix, postfix=postfix)
+
+    # test forward
+    out = metric_collection(5)
+    for name in names:
+        assert name in out, "prefix or postfix argument not working as intended with forward method"
+
+    # test compute
+    out = metric_collection.compute()
+    for name in names:
+        assert name in out, "prefix or postfix argument not working as intended with compute method"
+
+    # test clone
+    new_metric_collection = metric_collection.clone(prefix="new_prefix_")
+    out = new_metric_collection(5)
+    names = [n[len(prefix) :] if prefix is not None else n for n in names]  # strip away old prefix
+    for name in names:
+        assert f"new_prefix_{name}" in out, "prefix argument not working as intended with clone method"
+
+    for k, _ in new_metric_collection.items():
+        assert "new_prefix_" in k
+
+    for k in new_metric_collection.keys():
+        assert "new_prefix_" in k
+
+    for k, _ in new_metric_collection.items(keep_base=True):
+        assert "new_prefix_" not in k
+
+    for k in new_metric_collection.keys(keep_base=True):
+        assert "new_prefix_" not in k
+
+    assert isinstance(new_metric_collection.keys(keep_base=True), type(new_metric_collection.keys(keep_base=False)))
+    assert isinstance(new_metric_collection.items(keep_base=True), type(new_metric_collection.items(keep_base=False)))
+
+    new_metric_collection = new_metric_collection.clone(postfix="_new_postfix")
+    out = new_metric_collection(5)
+    names = [n[: -len(postfix)] if postfix is not None else n for n in names]  # strip away old postfix
+    for name in names:
+        assert f"new_prefix_{name}_new_postfix" in out, "postfix argument not working as intended with clone method"
+
+
+def test_metric_collection_repr():
+    """Test MetricCollection."""
+
+    class A(DummyMetricSum):
+        pass
+
+    class B(DummyMetricDiff):
+        pass
+
+    m1 = A()
+    m2 = B()
+    metric_collection = MetricCollection([m1, m2], prefix=None, postfix=None)
+
+    expected = "MetricCollection(\n  (A): A()\n  (B): B()\n)"
+    assert metric_collection.__repr__() == expected
+
+    metric_collection = MetricCollection([m1, m2], prefix="a", postfix=None)
+
+    expected = "MetricCollection(\n  (A): A()\n  (B): B(),\n  prefix=a\n)"
+    assert metric_collection.__repr__() == expected
+
+    metric_collection = MetricCollection([m1, m2], prefix=None, postfix="a")
+    expected = "MetricCollection(\n  (A): A()\n  (B): B(),\n  postfix=a\n)"
+    assert metric_collection.__repr__() == expected
+
+    metric_collection = MetricCollection([m1, m2], prefix="a", postfix="b")
+    expected = "MetricCollection(\n  (A): A()\n  (B): B(),\n  prefix=a,\n  postfix=b\n)"
+    assert metric_collection.__repr__() == expected
+
+
+def test_metric_collection_same_order():
+    m1 = DummyMetricSum()
+    m2 = DummyMetricDiff()
+    col1 = MetricCollection({"a": m1, "b": m2})
+    col2 = MetricCollection({"b": m2, "a": m1})
+    for k1, k2 in zip(col1.keys(), col2.keys()):
+        assert k1 == k2
+
+
+def test_collection_add_metrics():
+    m1 = DummyMetricSum()
+    m2 = DummyMetricDiff()
+
+    collection = MetricCollection([m1])
+    collection.add_metrics({"m1_": DummyMetricSum()})
+    collection.add_metrics(m2)
+
+    collection.update(5)
+    results = collection.compute()
+    assert results["DummyMetricSum"] == results["m1_"] and results["m1_"] == 5
+    assert results["DummyMetricDiff"] == -5
+
+
+def test_collection_check_arg():
+    assert MetricCollection._check_arg(None, "prefix") is None
+    assert MetricCollection._check_arg("sample", "prefix") == "sample"
+
+    with pytest.raises(ValueError, match="Expected input `postfix` to be a string, but got"):
+        MetricCollection._check_arg(1, "postfix")
diff --git a/RE/paddlemetric/src/tests/bases/test_composition.py b/RE/paddlemetric/src/tests/bases/test_composition.py
new file mode 100644
index 00000000..0c9e6a08
--- /dev/null
+++ b/RE/paddlemetric/src/tests/bases/test_composition.py
@@ -0,0 +1,559 @@
+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from operator import neg, pos
+
+import pytest
+import paddleext.torchapi as B
+from paddleext.torchapi import tensor
+
+from ..helpers import _MARK_TORCH_MIN_1_4, _MARK_TORCH_MIN_1_5, _MARK_TORCH_MIN_1_6
+from paddlemetrics.metric import CompositionalMetric, Metric
+
+
+class DummyMetric(Metric):
+    def __init__(self, val_to_return):
+        super().__init__()
+        self._num_updates = 0
+        self._val_to_return = val_to_return
+        self._update_called = True
+
+    def update(self, *args, **kwargs) -> None:
+        self._num_updates += 1
+
+    def compute(self):
+        return tensor(self._val_to_return)
+
+    def reset(self):
+        self._num_updates = 0
+        return super().reset()
+
+
+@pytest.mark.parametrize(
+    ["second_operand", "expected_result"],
+    [
+        (DummyMetric(2), tensor(4)),
+        (2, tensor(4)),
+        (2.0, tensor(4.0)),
+        pytest.param(tensor(2), tensor(4), marks=pytest.mark.skipif(**_MARK_TORCH_MIN_1_4)),
+    ],
+)
+def test_metrics_add(second_operand, expected_result):
+    first_metric = DummyMetric(2)
+
+    final_add = first_metric + second_operand
+    final_radd = second_operand + first_metric
+
+    assert isinstance(final_add, CompositionalMetric)
+    assert isinstance(final_radd, CompositionalMetric)
+
+    final_add.update()
+    final_radd.update()
+
+    assert B.allclose(expected_result, final_add.compute())
+    assert B.allclose(expected_result, final_radd.compute())
+
+
+@pytest.mark.parametrize(
+    ["second_operand", "expected_result"],
+    [(DummyMetric(3), tensor(2)), (3, tensor(2)), (3, tensor(2)), (tensor(3), tensor(2))],
+)
+@pytest.mark.skipif(**_MARK_TORCH_MIN_1_5)
+def test_metrics_and(second_operand, expected_result):
+    first_metric = DummyMetric(2)
+
+    final_and = first_metric & second_operand
+    final_rand = second_operand & first_metric
+
+    assert isinstance(final_and, CompositionalMetric)
+    assert isinstance(final_rand, CompositionalMetric)
+
+    final_and.update()
+    final_rand.update()
+    assert B.allclose(expected_result, final_and.compute())
+    assert B.allclose(expected_result, final_rand.compute())
+
+
+@pytest.mark.parametrize(
+    ["second_operand", "expected_result"],
+    [
+        (DummyMetric(2), tensor(True)),
+        (2, tensor(True)),
+        (2.0, tensor(True)),
+        (tensor(2), tensor(True)),
+    ],
+)
+def test_metrics_eq(second_operand, expected_result):
+    first_metric = DummyMetric(2)
+
+    final_eq = first_metric == second_operand
+
+    assert isinstance(final_eq, CompositionalMetric)
+
+    final_eq.update()
+    # can't use allclose for bool tensors
+    assert (expected_result == final_eq.compute()).all()
+
+
+@pytest.mark.parametrize(
+    ["second_operand", "expected_result"],
+    [
+        (DummyMetric(2), tensor(2)),
+        (2, tensor(2)),
+        (2.0, tensor(2.0)),
+        (tensor(2), tensor(2)),
+    ],
+)
+@pytest.mark.skipif(**_MARK_TORCH_MIN_1_5)
+def test_metrics_floordiv(second_operand, expected_result):
+    first_metric = DummyMetric(5)
+
+    final_floordiv = first_metric // second_operand
+
+    assert isinstance(final_floordiv, CompositionalMetric)
+
+    final_floordiv.update()
+    assert B.allclose(expected_result, final_floordiv.compute())
+
+
+@pytest.mark.parametrize(
+    ["second_operand", "expected_result"],
+    [
+        (DummyMetric(2), tensor(True)),
+        (2, tensor(True)),
+        (2.0, tensor(True)),
+        (tensor(2), tensor(True)),
+    ],
+)
+def test_metrics_ge(second_operand, expected_result):
+    first_metric = DummyMetric(5)
+
+    final_ge = first_metric >= second_operand
+
+    assert isinstance(final_ge, CompositionalMetric)
+
+    final_ge.update()
+    # can't use allclose for bool tensors
+    assert (expected_result == final_ge.compute()).all()
+
+
+@pytest.mark.parametrize(
+    ["second_operand", "expected_result"],
+    [
+        (DummyMetric(2), tensor(True)),
+        (2, tensor(True)),
+        (2.0, tensor(True)),
+        (tensor(2), tensor(True)),
+    ],
+)
+def test_metrics_gt(second_operand, expected_result):
+    first_metric = DummyMetric(5)
+
+    final_gt = first_metric > second_operand
+
+    assert isinstance(final_gt, CompositionalMetric)
+
+    final_gt.update()
+    # can't use allclose for bool tensors
+    assert (expected_result == final_gt.compute()).all()
+
+
+@pytest.mark.parametrize(
+    ["second_operand", "expected_result"],
+    [
+        (DummyMetric(2), tensor(False)),
+        (2, tensor(False)),
+        (2.0, tensor(False)),
+        (tensor(2), tensor(False)),
+    ],
+)
+def test_metrics_le(second_operand, expected_result):
+    first_metric = DummyMetric(5)
+
+    final_le = first_metric <= second_operand
+
+    assert isinstance(final_le, CompositionalMetric)
+
+    final_le.update()
+    # can't use allclose for bool tensors
+    assert (expected_result == final_le.compute()).all()
+
+
+@pytest.mark.parametrize(
+    ["second_operand", "expected_result"],
+    [
+        (DummyMetric(2), tensor(False)),
+        (2, tensor(False)),
+        (2.0, tensor(False)),
+        (tensor(2), tensor(False)),
+    ],
+)
+def test_metrics_lt(second_operand, expected_result):
+    first_metric = DummyMetric(5)
+
+    final_lt = first_metric < second_operand
+
+    assert isinstance(final_lt, CompositionalMetric)
+
+    final_lt.update()
+    # can't use allclose for bool tensors
+    assert (expected_result == final_lt.compute()).all()
+
+
+@pytest.mark.parametrize(
+    ["second_operand", "expected_result"],
+    [(DummyMetric([2, 2, 2]), tensor(12)), (tensor([2, 2, 2]), tensor(12))],
+)
+def test_metrics_matmul(second_operand, expected_result):
+    first_metric = DummyMetric([2, 2, 2])
+
+    final_matmul = first_metric @ second_operand
+
+    assert isinstance(final_matmul, CompositionalMetric)
+
+    final_matmul.update()
+    assert B.allclose(expected_result, final_matmul.compute())
+
+
+@pytest.mark.parametrize(
+    ["second_operand", "expected_result"],
+    [
+        (DummyMetric(2), tensor(1)),
+        (2, tensor(1)),
+        (2.0, tensor(1)),
+        (tensor(2), tensor(1)),
+    ],
+)
+def test_metrics_mod(second_operand, expected_result):
+    first_metric = DummyMetric(5)
+
+    final_mod = first_metric % second_operand
+
+    assert isinstance(final_mod, CompositionalMetric)
+
+    final_mod.update()
+    # prevent Runtime error for PT 1.8 - Long did not match Float
+    assert B.allclose(expected_result.to(float), final_mod.compute().to(float))
+
+
+@pytest.mark.parametrize(
+    ["second_operand", "expected_result"],
+    [
+        (DummyMetric(2), tensor(4)),
+        (2, tensor(4)),
+        (2.0, tensor(4.0)),
+        pytest.param(tensor(2), tensor(4), marks=pytest.mark.skipif(**_MARK_TORCH_MIN_1_4)),
+    ],
+)
+def test_metrics_mul(second_operand, expected_result):
+    first_metric = DummyMetric(2)
+
+    final_mul = first_metric * second_operand
+    final_rmul = second_operand * first_metric
+
+    assert isinstance(final_mul, CompositionalMetric)
+    assert isinstance(final_rmul, CompositionalMetric)
+
+    final_mul.update()
+    final_rmul.update()
+    assert B.allclose(expected_result, final_mul.compute())
+    assert B.allclose(expected_result, final_rmul.compute())
+
+
+@pytest.mark.parametrize(
+    ["second_operand", "expected_result"],
+    [
+        (DummyMetric(2), tensor(False)),
+        (2, tensor(False)),
+        (2.0, tensor(False)),
+        (tensor(2), tensor(False)),
+    ],
+)
+def test_metrics_ne(second_operand, expected_result):
+    first_metric = DummyMetric(2)
+
+    final_ne = first_metric != second_operand
+
+    assert isinstance(final_ne, CompositionalMetric)
+
+    final_ne.update()
+    # can't use allclose for bool tensors
+    assert (expected_result == final_ne.compute()).all()
+
+
+@pytest.mark.parametrize(
+    ["second_operand", "expected_result"],
+    [(DummyMetric([1, 0, 3]), tensor([-1, -2, 3])), (tensor([1, 0, 3]), tensor([-1, -2, 3]))],
+)
+@pytest.mark.skipif(**_MARK_TORCH_MIN_1_5)
+def test_metrics_or(second_operand, expected_result):
+    first_metric = DummyMetric([-1, -2, 3])
+
+    final_or = first_metric | second_operand
+    final_ror = second_operand | first_metric
+
+    assert isinstance(final_or, CompositionalMetric)
+    assert isinstance(final_ror, CompositionalMetric)
+
+    final_or.update()
+    final_ror.update()
+    assert B.allclose(expected_result, final_or.compute())
+    assert B.allclose(expected_result, final_ror.compute())
+
+
+@pytest.mark.parametrize(
+    ["second_operand", "expected_result"],
+    [
+        pytest.param(DummyMetric(2), tensor(4)),
+        pytest.param(2, tensor(4)),
+        pytest.param(2.0, tensor(4.0), marks=pytest.mark.skipif(**_MARK_TORCH_MIN_1_6)),
+        pytest.param(tensor(2), tensor(4)),
+    ],
+)
+def test_metrics_pow(second_operand, expected_result):
+    first_metric = DummyMetric(2)
+
+    final_pow = first_metric ** second_operand
+
+    assert isinstance(final_pow, CompositionalMetric)
+
+    final_pow.update()
+    assert B.allclose(expected_result, final_pow.compute())
+
+
+@pytest.mark.parametrize(
+    ["first_operand", "expected_result"],
+    [(5, tensor(2)), (5.0, tensor(2.0)), (tensor(5), tensor(2))],
+)
+@pytest.mark.skipif(**_MARK_TORCH_MIN_1_5)
+def test_metrics_rfloordiv(first_operand, expected_result):
+    second_operand = DummyMetric(2)
+
+    final_rfloordiv = first_operand // second_operand
+
+    assert isinstance(final_rfloordiv, CompositionalMetric)
+
+    final_rfloordiv.update()
+    assert B.allclose(expected_result, final_rfloordiv.compute())
+
+
+@pytest.mark.parametrize(
+    ["first_operand", "expected_result"],
+    [pytest.param(tensor([2, 2, 2]), tensor(12), marks=pytest.mark.skipif(**_MARK_TORCH_MIN_1_4))],
+)
+def test_metrics_rmatmul(first_operand, expected_result):
+    second_operand = DummyMetric([2, 2, 2])
+
+    final_rmatmul = first_operand @ second_operand
+
+    assert isinstance(final_rmatmul, CompositionalMetric)
+
+    final_rmatmul.update()
+    assert B.allclose(expected_result, final_rmatmul.compute())
+
+
+@pytest.mark.parametrize(
+    ["first_operand", "expected_result"],
+    [pytest.param(tensor(2), tensor(2), marks=pytest.mark.skipif(**_MARK_TORCH_MIN_1_4))],
+)
+def test_metrics_rmod(first_operand, expected_result):
+    second_operand = DummyMetric(5)
+
+    final_rmod = first_operand % second_operand
+
+    assert isinstance(final_rmod, CompositionalMetric)
+
+    final_rmod.update()
+    assert B.allclose(expected_result, final_rmod.compute())
+
+
+@pytest.mark.parametrize(
+    "first_operand,expected_result",
+    [
+        pytest.param(DummyMetric(2), tensor(4)),
+        pytest.param(2, tensor(4)),
+        pytest.param(2.0, tensor(4.0), marks=pytest.mark.skipif(**_MARK_TORCH_MIN_1_6)),
+    ],
+)
+def test_metrics_rpow(first_operand, expected_result):
+    second_operand = DummyMetric(2)
+
+    final_rpow = first_operand ** second_operand
+
+    assert isinstance(final_rpow, CompositionalMetric)
+    final_rpow.update()
+    assert B.allclose(expected_result, final_rpow.compute())
+
+
+@pytest.mark.parametrize(
+    ["first_operand", "expected_result"],
+    [
+        (DummyMetric(3), tensor(1)),
+        (3, tensor(1)),
+        (3.0, tensor(1.0)),
+        pytest.param(tensor(3), tensor(1), marks=pytest.mark.skipif(**_MARK_TORCH_MIN_1_4)),
+    ],
+)
+def test_metrics_rsub(first_operand, expected_result):
+    second_operand = DummyMetric(2)
+
+    final_rsub = first_operand - second_operand
+
+    assert isinstance(final_rsub, CompositionalMetric)
+    final_rsub.update()
+    assert B.allclose(expected_result, final_rsub.compute())
+
+
+@pytest.mark.parametrize(
+    ["first_operand", "expected_result"],
+    [
+        (DummyMetric(6), tensor(2.0)),
+        (6, tensor(2.0)),
+        (6.0, tensor(2.0)),
+        (tensor(6), tensor(2.0)),
+    ],
+)
+@pytest.mark.skipif(**_MARK_TORCH_MIN_1_5)
+def test_metrics_rtruediv(first_operand, expected_result):
+    second_operand = DummyMetric(3)
+
+    final_rtruediv = first_operand / second_operand
+
+    assert isinstance(final_rtruediv, CompositionalMetric)
+    final_rtruediv.update()
+    assert B.allclose(expected_result, final_rtruediv.compute())
+
+
+@pytest.mark.parametrize(
+    ["second_operand", "expected_result"],
+    [
+        (DummyMetric(2), tensor(1)),
+        (2, tensor(1)),
+        (2.0, tensor(1.0)),
+        (tensor(2), tensor(1)),
+    ],
+)
+def test_metrics_sub(second_operand, expected_result):
+    first_metric = DummyMetric(3)
+
+    final_sub = first_metric - second_operand
+
+    assert isinstance(final_sub, CompositionalMetric)
+    final_sub.update()
+    assert B.allclose(expected_result, final_sub.compute())
+
+
+@pytest.mark.parametrize(
+    ["second_operand", "expected_result"],
+    [
+        (DummyMetric(3), tensor(2.0)),
+        (3, tensor(2.0)),
+        (3.0, tensor(2.0)),
+        (tensor(3), tensor(2.0)),
+    ],
+)
+@pytest.mark.skipif(**_MARK_TORCH_MIN_1_5)
+def test_metrics_truediv(second_operand, expected_result):
+    first_metric = DummyMetric(6)
+
+    final_truediv = first_metric / second_operand
+
+    assert isinstance(final_truediv, CompositionalMetric)
+    final_truediv.update()
+    assert B.allclose(expected_result, final_truediv.compute())
+
+
+@pytest.mark.parametrize(
+    ["second_operand", "expected_result"],
+    [(DummyMetric([1, 0, 3]), tensor([-2, -2, 0])), (tensor([1, 0, 3]), tensor([-2, -2, 0]))],
+)
+@pytest.mark.skipif(**_MARK_TORCH_MIN_1_5)
+def test_metrics_xor(second_operand, expected_result):
+    first_metric = DummyMetric([-1, -2, 3])
+
+    final_xor = first_metric ^ second_operand
+    final_rxor = second_operand ^ first_metric
+
+    assert isinstance(final_xor, CompositionalMetric)
+    assert isinstance(final_rxor, CompositionalMetric)
+
+    final_xor.update()
+    final_rxor.update()
+    assert B.allclose(expected_result, final_xor.compute())
+    assert B.allclose(expected_result, final_rxor.compute())
+
+
+def test_metrics_abs():
+    first_metric = DummyMetric(-1)
+
+    final_abs = abs(first_metric)
+
+    assert isinstance(final_abs, CompositionalMetric)
+    final_abs.update()
+    assert B.allclose(tensor(1), final_abs.compute())
+
+
+def test_metrics_invert():
+    first_metric = DummyMetric(1)
+
+    final_inverse = ~first_metric
+    assert isinstance(final_inverse, CompositionalMetric)
+    final_inverse.update()
+    assert B.allclose(tensor(-2), final_inverse.compute())
+
+
+def test_metrics_neg():
+    first_metric = DummyMetric(1)
+
+    final_neg = neg(first_metric)
+    assert isinstance(final_neg, CompositionalMetric)
+    final_neg.update()
+    assert B.allclose(tensor(-1), final_neg.compute())
+
+
+def test_metrics_pos():
+    first_metric = DummyMetric(-1)
+
+    final_pos = pos(first_metric)
+    assert isinstance(final_pos, CompositionalMetric)
+    final_pos.update()
+    assert B.allclose(tensor(1), final_pos.compute())
+
+
+@pytest.mark.parametrize(
+    ["value", "idx", "expected_result"],
+    [([1, 2, 3], 1, tensor(2)), ([[0, 1], [2, 3]], (1, 0), tensor(2)), ([[0, 1], [2, 3]], 1, tensor([2, 3]))],
+)
+def test_metrics_getitem(value, idx, expected_result):
+    first_metric = DummyMetric(value)
+
+    final_getitem = first_metric[idx]
+    assert isinstance(final_getitem, CompositionalMetric)
+    final_getitem.update()
+    assert B.allclose(expected_result, final_getitem.compute())
+
+
+def test_compositional_metrics_update():
+
+    compos = DummyMetric(5) + DummyMetric(4)
+
+    assert isinstance(compos, CompositionalMetric)
+    compos.update()
+    compos.update()
+    compos.update()
+
+    assert isinstance(compos.metric_a, DummyMetric)
+    assert isinstance(compos.metric_b, DummyMetric)
+
+    assert compos.metric_a._num_updates == 3
+    assert compos.metric_b._num_updates == 3
diff --git a/RE/paddlemetric/src/tests/bases/test_ddp.py b/RE/paddlemetric/src/tests/bases/test_ddp.py
new file mode 100644
index 00000000..7f713e1d
--- /dev/null
+++ b/RE/paddlemetric/src/tests/bases/test_ddp.py
@@ -0,0 +1,241 @@
+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import os
+import sys
+from copy import deepcopy
+
+import pytest
+import paddleext.torchapi as B
+from paddleext.torchapi import tensor
+
+from tests.helpers import seed_all
+from tests.helpers.testers import DummyMetric, DummyMetricSum, setup_ddp
+from paddlemetrics import Metric
+from paddlemetrics.utilities.distributed import gather_all_tensors
+from paddlemetrics.utilities.exceptions import paddlemetricsUserError
+
+seed_all(42)
+
+
+def _test_ddp_sum(rank, worldsize):
+    setup_ddp(rank, worldsize)
+    dummy = DummyMetric()
+    dummy._reductions = {"foo": B.sum}
+    dummy.foo = tensor(1)
+    dummy._sync_dist()
+
+    assert dummy.foo == worldsize
+
+
+def _test_ddp_cat(rank, worldsize):
+    setup_ddp(rank, worldsize)
+    dummy = DummyMetric()
+    dummy._reductions = {"foo": B.cat}
+    dummy.foo = [tensor([1])]
+    dummy._sync_dist()
+
+    assert B.all(B.eq(dummy.foo, tensor([1, 1])))
+
+
+def _test_ddp_sum_cat(rank, worldsize):
+    setup_ddp(rank, worldsize)
+    dummy = DummyMetric()
+    dummy._reductions = {"foo": B.cat, "bar": B.sum}
+    dummy.foo = [tensor([1])]
+    dummy.bar = tensor(1)
+    dummy._sync_dist()
+
+    assert B.all(B.eq(dummy.foo, tensor([1, 1])))
+    assert dummy.bar == worldsize
+
+
+def _test_ddp_gather_uneven_tensors(rank, worldsize):
+    setup_ddp(rank, worldsize)
+    tensor = B.ones(rank)
+    result = gather_all_tensors(tensor)
+    assert len(result) == worldsize
+    for idx in range(worldsize):
+        assert len(result[idx]) == idx
+        assert (result[idx] == B.ones_like(result[idx])).all()
+
+
+def _test_ddp_gather_uneven_tensors_multidim(rank, worldsize):
+    setup_ddp(rank, worldsize)
+    tensor = B.ones(rank + 1, 2 - rank)
+    result = gather_all_tensors(tensor)
+    assert len(result) == worldsize
+    for idx in range(worldsize):
+        val = result[idx]
+        assert val.shape == (idx + 1, 2 - idx)
+        assert (val == B.ones_like(val)).all()
+
+
+def _test_ddp_compositional_tensor(rank, worldsize):
+    setup_ddp(rank, worldsize)
+    dummy = DummyMetricSum()
+    dummy._reductions = {"x": B.sum}
+    dummy = dummy.clone() + dummy.clone()
+    dummy.update(tensor(1))
+    val = dummy.compute()
+    assert val == 2 * worldsize
+
+
+@pytest.mark.skipif(sys.platform == "win32", reason="DDP not available on windows")
+@pytest.mark.parametrize(
+    "process",
+    [
+        _test_ddp_cat,
+        _test_ddp_sum,
+        _test_ddp_sum_cat,
+        _test_ddp_gather_uneven_tensors,
+        _test_ddp_gather_uneven_tensors_multidim,
+        _test_ddp_compositional_tensor,
+    ],
+)
+def test_ddp(process):
+    B.multiprocessing.spawn(process, args=(2,), nprocs=2)
+
+
+def _test_non_contiguous_tensors(rank, worldsize):
+    setup_ddp(rank, worldsize)
+
+    class DummyCatMetric(Metric):
+        def __init__(self):
+            super().__init__()
+            self.add_state("x", default=[], dist_reduce_fx=None)
+
+        def update(self, x):
+            self.x.append(x)
+
+        def compute(self):
+            x = B.cat(self.x, dim=0)
+            return x.sum()
+
+    metric = DummyCatMetric()
+    metric.update(B.randn(10, 5)[:, 0])
+
+
+@pytest.mark.skipif(sys.platform == "win32", reason="DDP not available on windows")
+def test_non_contiguous_tensors():
+    """Test that gather_all operation works for non contiguous tensors."""
+    B.multiprocessing.spawn(_test_non_contiguous_tensors, args=(2,), nprocs=2)
+
+
+def _test_state_dict_is_synced(rank, worldsize, tmpdir):
+    setup_ddp(rank, worldsize)
+
+    class DummyCatMetric(Metric):
+        def __init__(self):
+            super().__init__()
+            self.add_state("x", B.tensor(0), dist_reduce_fx=B.sum)
+            self.add_state("c", B.tensor(0), dist_reduce_fx=B.sum)
+
+        def update(self, x):
+            self.x += x
+            self.c += 1
+
+        def compute(self):
+            return self.x // self.c
+
+        def __repr__(self):
+            return f"DummyCatMetric(x={self.x}, c={self.c})"
+
+    metric = DummyCatMetric()
+    metric.persistent(True)
+
+    def verify_metric(metric, i, world_size):
+        state_dict = metric.state_dict()
+        exp_sum = i * (i + 1) / 2
+        assert state_dict["x"] == exp_sum * world_size
+        assert metric.x == exp_sum * world_size
+        assert metric.c == (i + 1) * world_size
+        assert state_dict["c"] == metric.c
+
+    steps = 5
+    for i in range(steps):
+
+        if metric._is_synced:
+
+            with pytest.raises(paddlemetricsUserError, match="The Metric shouldn't be synced when performing"):
+                metric(i)
+
+            metric.unsync()
+
+        metric(i)
+
+        verify_metric(metric, i, 1)
+
+        metric.sync()
+        assert metric._is_synced
+
+        with pytest.raises(paddlemetricsUserError, match="The Metric has already been synced."):
+            metric.sync()
+
+        verify_metric(metric, i, 2)
+
+        metric.unsync()
+        assert not metric._is_synced
+
+        with pytest.raises(paddlemetricsUserError, match="The Metric has already been un-synced."):
+            metric.unsync()
+
+        with metric.sync_context():
+            assert metric._is_synced
+            verify_metric(metric, i, 2)
+
+        with metric.sync_context(should_unsync=False):
+            assert metric._is_synced
+            verify_metric(metric, i, 2)
+
+        assert metric._is_synced
+
+        metric.unsync()
+        assert not metric._is_synced
+
+        metric.sync()
+        cache = metric._cache
+        metric._cache = None
+
+        with pytest.raises(paddlemetricsUserError, match="The internal cache should exist to unsync the Metric."):
+            metric.unsync()
+
+        metric._cache = cache
+
+    def reload_state_dict(state_dict, expected_x, expected_c):
+        metric = DummyCatMetric()
+        metric.load_state_dict(state_dict)
+        assert metric.x == expected_x
+        assert metric.c == expected_c
+
+    reload_state_dict(deepcopy(metric.state_dict()), 20, 10)
+
+    metric.unsync()
+    reload_state_dict(deepcopy(metric.state_dict()), 10, 5)
+
+    metric.sync()
+
+    filepath = os.path.join(tmpdir, f"weights-{rank}.pt")
+
+    B.save(metric.state_dict(), filepath)
+
+    metric.unsync()
+    with metric.sync_context():
+        B.save(metric.state_dict(), filepath)
+
+
+@pytest.mark.skipif(sys.platform == "win32", reason="DDP not available on windows")
+def test_state_dict_is_synced(tmpdir):
+    """This test asserts that metrics are synced while creating the state dict but restored after to continue
+    accumulation."""
+    B.multiprocessing.spawn(_test_state_dict_is_synced, args=(2, tmpdir), nprocs=2)
diff --git a/RE/paddlemetric/src/tests/bases/test_hashing.py b/RE/paddlemetric/src/tests/bases/test_hashing.py
new file mode 100644
index 00000000..af77dbd1
--- /dev/null
+++ b/RE/paddlemetric/src/tests/bases/test_hashing.py
@@ -0,0 +1,22 @@
+import pytest
+
+from tests.helpers.testers import DummyListMetric, DummyMetric
+
+
+@pytest.mark.parametrize(
+    "metric_cls",
+    [
+        DummyMetric,
+        DummyListMetric,
+    ],
+)
+def test_metric_hashing(metric_cls):
+    """Tests that hases are different.
+
+    See the Metric's hash function for details on why this is required.
+    """
+    instance_1 = metric_cls()
+    instance_2 = metric_cls()
+
+    assert hash(instance_1) != hash(instance_2)
+    assert id(instance_1) != id(instance_2)
diff --git a/RE/paddlemetric/src/tests/bases/test_metric.py b/RE/paddlemetric/src/tests/bases/test_metric.py
new file mode 100644
index 00000000..a57eeb80
--- /dev/null
+++ b/RE/paddlemetric/src/tests/bases/test_metric.py
@@ -0,0 +1,356 @@
+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import pickle
+from collections import OrderedDict
+
+import cloudpickle
+import numpy as np
+import pytest
+import pangu.core.backend as torch
+from pangu.core.backend import Tensor, nn, tensor
+
+from tests.helpers import _LIGHTNING_GREATER_EQUAL_1_3, seed_all
+from tests.helpers.testers import DummyListMetric, DummyMetric, DummyMetricMultiOutput, DummyMetricSum
+from paddlemetrics.utilities.imports import _LIGHTNING_AVAILABLE, _TORCH_LOWER_1_6
+
+seed_all(42)
+
+
+def test_inherit():
+    DummyMetric()
+
+
+def test_add_state():
+    a = DummyMetric()
+
+    a.add_state("a", tensor(0), "sum")
+    assert a._reductions["a"](tensor([1, 1])) == 2
+
+    a.add_state("b", tensor(0), "mean")
+    assert np.allclose(a._reductions["b"](tensor([1.0, 2.0])).numpy(), 1.5)
+
+    a.add_state("c", tensor(0), "cat")
+    assert a._reductions["c"]([tensor([1]), tensor([1])]).shape == (2,)
+
+    with pytest.raises(ValueError):
+        a.add_state("d1", tensor(0), "xyz")
+
+    with pytest.raises(ValueError):
+        a.add_state("d2", tensor(0), 42)
+
+    with pytest.raises(ValueError):
+        a.add_state("d3", [tensor(0)], "sum")
+
+    with pytest.raises(ValueError):
+        a.add_state("d4", 42, "sum")
+
+    def custom_fx(_):
+        return -1
+
+    a.add_state("e", tensor(0), custom_fx)
+    assert a._reductions["e"](tensor([1, 1])) == -1
+
+
+def test_add_state_persistent():
+    a = DummyMetric()
+
+    a.add_state("a", tensor(0), "sum", persistent=True)
+    assert "a" in a.state_dict()
+
+    a.add_state("b", tensor(0), "sum", persistent=False)
+
+    if _TORCH_LOWER_1_6:
+        assert "b" not in a.state_dict()
+
+
+def test_reset():
+    class A(DummyMetric):
+        pass
+
+    class B(DummyListMetric):
+        pass
+
+    a = A()
+    assert a.x == 0
+    a.x = tensor(5)
+    a.reset()
+    assert a.x == 0
+
+    b = B()
+    assert isinstance(b.x, list) and len(b.x) == 0
+    b.x = tensor(5)
+    b.reset()
+    assert isinstance(b.x, list) and len(b.x) == 0
+
+
+def test_reset_compute():
+    a = DummyMetricSum()
+    assert a.x == 0
+    a.update(tensor(5))
+    assert a.compute() == 5
+    a.reset()
+    if not _LIGHTNING_AVAILABLE or _LIGHTNING_GREATER_EQUAL_1_3:
+        assert a.compute() == 0
+    else:
+        assert a.compute() == 5
+
+
+def test_update():
+    class A(DummyMetric):
+        def update(self, x):
+            self.x += x
+
+    a = A()
+    assert a.x == 0
+    assert a._computed is None
+    a.update(1)
+    assert a._computed is None
+    assert a.x == 1
+    a.update(2)
+    assert a.x == 3
+    assert a._computed is None
+
+
+def test_compute():
+    class A(DummyMetric):
+        def update(self, x):
+            self.x += x
+
+        def compute(self):
+            return self.x
+
+    a = A()
+    assert 0 == a.compute()
+    assert 0 == a.x
+    a.update(1)
+    assert a._computed is None
+    assert a.compute() == 1
+    assert a._computed == 1
+    a.update(2)
+    assert a._computed is None
+    assert a.compute() == 3
+    assert a._computed == 3
+
+    # called without update, should return cached value
+    a._computed = 5
+    assert a.compute() == 5
+
+
+def test_hash():
+    class A(DummyMetric):
+        pass
+
+    class B(DummyListMetric):
+        pass
+
+    a1 = A()
+    a2 = A()
+    assert hash(a1) != hash(a2)
+
+    b1 = B()
+    b2 = B()
+    assert hash(b1) != hash(b2)  # different ids
+    assert isinstance(b1.x, list) and len(b1.x) == 0
+    b1.x.append(tensor(5))
+    assert isinstance(hash(b1), int)  # <- check that nothing crashes
+    assert isinstance(b1.x, list) and len(b1.x) == 1
+    b2.x.append(tensor(5))
+    # Sanity:
+    assert isinstance(b2.x, list) and len(b2.x) == 1
+    # Now that they have tensor contents, they should have different hashes:
+    assert hash(b1) != hash(b2)
+
+
+def test_forward():
+    class A(DummyMetric):
+        def update(self, x):
+            self.x += x
+
+        def compute(self):
+            return self.x
+
+    a = A()
+    assert a(5) == 5
+    assert a._forward_cache == 5
+
+    assert a(8) == 8
+    assert a._forward_cache == 8
+
+    assert a.compute() == 13
+
+
+def test_pickle(tmpdir):
+    # doesn't tests for DDP
+    a = DummyMetricSum()
+    a.update(1)
+
+    metric_pickled = pickle.dumps(a)
+    metric_loaded = pickle.loads(metric_pickled)
+
+    assert metric_loaded.compute() == 1
+
+    metric_loaded.update(5)
+    assert metric_loaded.compute() == 6
+
+    metric_pickled = cloudpickle.dumps(a)
+    metric_loaded = cloudpickle.loads(metric_pickled)
+
+    assert metric_loaded.compute() == 1
+
+
+def test_state_dict(tmpdir):
+    """test that metric states can be removed and added to state dict."""
+    metric = DummyMetric()
+    assert metric.state_dict() == OrderedDict()
+    metric.persistent(True)
+    assert metric.state_dict() == OrderedDict(x=0)
+    metric.persistent(False)
+    assert metric.state_dict() == OrderedDict()
+
+
+def test_load_state_dict(tmpdir):
+    """test that metric states can be loaded with state dict."""
+    metric = DummyMetricSum()
+    metric.persistent(True)
+    metric.update(5)
+    loaded_metric = DummyMetricSum()
+    loaded_metric.load_state_dict(metric.state_dict())
+    assert metric.compute() == 5
+
+
+def test_child_metric_state_dict():
+    """test that child metric states will be added to parent state dict."""
+
+    class TestModule(nn.Module):
+        def __init__(self):
+            super().__init__()
+            self.metric = DummyMetric()
+            self.metric.add_state("a", tensor(0), persistent=True)
+            self.metric.add_state("b", [], persistent=True)
+            self.metric.register_buffer("c", tensor(0))
+
+    module = TestModule()
+    expected_state_dict = {
+        "metric.a": tensor(0),
+        "metric.b": [],
+        "metric.c": tensor(0),
+    }
+    assert module.state_dict() == expected_state_dict
+
+
+@pytest.mark.skipif(not B.cuda.is_available(), reason="Test requires GPU.")
+def test_device_and_dtype_transfer(tmpdir):
+    metric = DummyMetricSum()
+    assert metric.x.is_cuda is False
+    assert metric.device == B.device("cpu")
+    assert metric.x.dtype == B.float32
+
+    metric = metric.to(device="cuda")
+    assert metric.x.is_cuda
+    assert metric.device == B.device("cuda", index=0)
+
+    metric.set_dtype(B.double)
+    assert metric.x.dtype == B.float64
+    metric.reset()
+    assert metric.x.dtype == B.float64
+
+    metric.set_dtype(B.half)
+    assert metric.x.dtype == B.float16
+    metric.reset()
+    assert metric.x.dtype == B.float16
+
+
+def test_warning_on_compute_before_update():
+    """test that an warning is raised if user tries to call compute before update."""
+    metric = DummyMetricSum()
+
+    # make sure everything is fine with forward
+    with pytest.warns(None) as record:
+        val = metric(1)
+    assert not record
+
+    metric.reset()
+
+    with pytest.warns(UserWarning, match=r"The ``compute`` method of metric .*"):
+        val = metric.compute()
+    assert val == 0.0
+
+    # after update things should be fine
+    metric.update(2.0)
+    with pytest.warns(None) as record:
+        val = metric.compute()
+    assert not record
+    assert val == 2.0
+
+
+def test_metric_scripts():
+    """test that metrics are scriptable."""
+    B.jit.script(DummyMetric())
+    B.jit.script(DummyMetricSum())
+
+
+def test_metric_forward_cache_reset():
+    """test that forward cache is reset when `reset` is called."""
+    metric = DummyMetricSum()
+    _ = metric(2.0)
+    assert metric._forward_cache == 2.0
+    metric.reset()
+    assert metric._forward_cache is None
+
+
+@pytest.mark.skipif(not B.cuda.is_available(), reason="Test requires GPU.")
+@pytest.mark.parametrize("metric_class", [DummyMetricSum, DummyMetricMultiOutput])
+def test_forward_and_compute_to_device(metric_class):
+    metric = metric_class()
+    metric(1)
+    metric.to(device="cuda")
+
+    assert metric._forward_cache is not None
+    is_cuda = (
+        metric._forward_cache[0].is_cuda if isinstance(metric._forward_cache, list) else metric._forward_cache.is_cuda
+    )
+    assert is_cuda, "forward cache was not moved to the correct device"
+
+    metric.compute()
+    assert metric._computed is not None
+    is_cuda = metric._computed[0].is_cuda if isinstance(metric._computed, list) else metric._computed.is_cuda
+    assert is_cuda, "computed result was not moved to the correct device"
+
+
+@pytest.mark.skipif(not B.cuda.is_available(), reason="Test requires GPU.")
+@pytest.mark.parametrize("metric_class", [DummyMetricSum, DummyMetricMultiOutput])
+def test_device_if_child_module(metric_class):
+    """Test that if a metric is a child module all values gets moved to the correct device."""
+
+    class TestModule(nn.Module):
+        def __init__(self):
+            super().__init__()
+            self.metric = metric_class()
+            self.register_buffer("dummy", B.zeros(1))
+
+        @property
+        def device(self):
+            return self.dummy.device
+
+    module = TestModule()
+
+    assert module.device == module.metric.device
+    if isinstance(module.metric.x, Tensor):
+        assert module.device == module.metric.x.device
+
+    module.to(device="cuda")
+
+    assert module.device == module.metric.device
+    if isinstance(module.metric.x, Tensor):
+        assert module.device == module.metric.x.device
diff --git a/RE/paddlemetric/src/tests/classification/__init__.py b/RE/paddlemetric/src/tests/classification/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/RE/paddlemetric/src/tests/classification/inputs.py b/RE/paddlemetric/src/tests/classification/inputs.py
new file mode 100644
index 00000000..d0bf9488
--- /dev/null
+++ b/RE/paddlemetric/src/tests/classification/inputs.py
@@ -0,0 +1,125 @@
+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from collections import namedtuple
+
+import paddleext.torchapi as B
+
+from tests.helpers.testers import BATCH_SIZE, EXTRA_DIM, NUM_BATCHES, NUM_CLASSES
+
+Input = namedtuple("Input", ["preds", "target"])
+
+_input_binary_prob = Input(
+    preds=B.rand(NUM_BATCHES, BATCH_SIZE), target=B.randint(high=2, size=(NUM_BATCHES, BATCH_SIZE))
+)
+
+_input_binary = Input(
+    preds=B.randint(high=2, size=(NUM_BATCHES, BATCH_SIZE)),
+    target=B.randint(high=2, size=(NUM_BATCHES, BATCH_SIZE)),
+)
+
+_input_binary_logits = Input(
+    preds=B.randn(NUM_BATCHES, BATCH_SIZE), target=B.randint(high=2, size=(NUM_BATCHES, BATCH_SIZE))
+)
+
+_input_multilabel_prob = Input(
+    preds=B.rand(NUM_BATCHES, BATCH_SIZE, NUM_CLASSES),
+    target=B.randint(high=2, size=(NUM_BATCHES, BATCH_SIZE, NUM_CLASSES)),
+)
+
+_input_multilabel_multidim_prob = Input(
+    preds=B.rand(NUM_BATCHES, BATCH_SIZE, NUM_CLASSES, EXTRA_DIM),
+    target=B.randint(high=2, size=(NUM_BATCHES, BATCH_SIZE, NUM_CLASSES, EXTRA_DIM)),
+)
+
+_input_multilabel_logits = Input(
+    preds=B.randn(NUM_BATCHES, BATCH_SIZE, NUM_CLASSES),
+    target=B.randint(high=2, size=(NUM_BATCHES, BATCH_SIZE, NUM_CLASSES)),
+)
+
+_input_multilabel = Input(
+    preds=B.randint(high=2, size=(NUM_BATCHES, BATCH_SIZE, NUM_CLASSES)),
+    target=B.randint(high=2, size=(NUM_BATCHES, BATCH_SIZE, NUM_CLASSES)),
+)
+
+_input_multilabel_multidim = Input(
+    preds=B.randint(high=2, size=(NUM_BATCHES, BATCH_SIZE, NUM_CLASSES, EXTRA_DIM)),
+    target=B.randint(high=2, size=(NUM_BATCHES, BATCH_SIZE, NUM_CLASSES, EXTRA_DIM)),
+)
+
+# Generate edge multilabel edge case, where nothing matches (scores are undefined)
+__temp_preds = B.randint(high=2, size=(NUM_BATCHES, BATCH_SIZE, NUM_CLASSES))
+__temp_target = B.abs(__temp_preds - 1)
+
+_input_multilabel_no_match = Input(preds=__temp_preds, target=__temp_target)
+
+__mc_prob_logits = 10 * B.randn(NUM_BATCHES, BATCH_SIZE, NUM_CLASSES)
+__mc_prob_preds = __mc_prob_logits.abs() / __mc_prob_logits.abs().sum(dim=2, keepdim=True)
+
+_input_multiclass_prob = Input(
+    preds=__mc_prob_preds, target=B.randint(high=NUM_CLASSES, size=(NUM_BATCHES, BATCH_SIZE))
+)
+
+_input_multiclass_logits = Input(
+    preds=__mc_prob_logits, target=B.randint(high=NUM_CLASSES, size=(NUM_BATCHES, BATCH_SIZE))
+)
+
+_input_multiclass = Input(
+    preds=B.randint(high=NUM_CLASSES, size=(NUM_BATCHES, BATCH_SIZE)),
+    target=B.randint(high=NUM_CLASSES, size=(NUM_BATCHES, BATCH_SIZE)),
+)
+
+__mdmc_prob_preds = B.rand(NUM_BATCHES, BATCH_SIZE, NUM_CLASSES, EXTRA_DIM)
+__mdmc_prob_preds = __mdmc_prob_preds / __mdmc_prob_preds.sum(dim=2, keepdim=True)
+
+_input_multidim_multiclass_prob = Input(
+    preds=__mdmc_prob_preds, target=B.randint(high=NUM_CLASSES, size=(NUM_BATCHES, BATCH_SIZE, EXTRA_DIM))
+)
+
+_input_multidim_multiclass = Input(
+    preds=B.randint(high=NUM_CLASSES, size=(NUM_BATCHES, BATCH_SIZE, EXTRA_DIM)),
+    target=B.randint(high=NUM_CLASSES, size=(NUM_BATCHES, BATCH_SIZE, EXTRA_DIM)),
+)
+
+
+# Generate plausible-looking inputs
+def generate_plausible_inputs_multilabel(num_classes=NUM_CLASSES, num_batches=NUM_BATCHES, batch_size=BATCH_SIZE):
+    correct_targets = B.randint(high=num_classes, size=(num_batches, batch_size))
+    preds = B.rand(num_batches, batch_size, num_classes)
+    targets = B.zeros_like(preds, dtype=B.long)
+    for i in range(preds.shape[0]):
+        for j in range(preds.shape[1]):
+            targets[i, j, correct_targets[i, j]] = 1
+    preds += B.rand(num_batches, batch_size, num_classes) * targets / 3
+
+    preds = preds / preds.sum(dim=2, keepdim=True)
+
+    return Input(preds=preds, target=targets)
+
+
+def generate_plausible_inputs_binary(num_batches=NUM_BATCHES, batch_size=BATCH_SIZE):
+    targets = B.randint(high=2, size=(num_batches, batch_size))
+    preds = B.rand(num_batches, batch_size) + B.rand(num_batches, batch_size) * targets / 3
+    return Input(preds=preds / (preds.max() + 0.01), target=targets)
+
+
+_input_multilabel_prob_plausible = generate_plausible_inputs_multilabel()
+
+_input_binary_prob_plausible = generate_plausible_inputs_binary()
+
+# randomly remove one class from the input
+_temp = B.randint(high=NUM_CLASSES, size=(NUM_BATCHES, BATCH_SIZE))
+_class_remove, _class_replace = B.multinomial(B.ones(NUM_CLASSES), num_samples=2, replacement=False)
+_temp[_temp == _class_remove] = _class_replace
+
+_input_multiclass_with_missing_class = Input(_temp.clone(), _temp.clone())
diff --git a/RE/paddlemetric/src/tests/classification/test.log b/RE/paddlemetric/src/tests/classification/test.log
new file mode 100644
index 00000000..3ea5a1d0
--- /dev/null
+++ b/RE/paddlemetric/src/tests/classification/test.log
@@ -0,0 +1,451 @@
+============================= test session starts ==============================
+platform darwin -- Python 3.8.12, pytest-7.1.2, pluggy-1.0.0
+rootdir: /Users/sun/Projects/oix/baidu/ccl/paddlemetric/src/tests/classification
+plugins: hydra-core-1.1.0.dev5
+collected 816 items
+
+test_f_beta.py ..................ssssssssssssssssss..ssssssssssssssssss..ssssssssssssssssss..............................................................................................................ssssssssss................ss..................ss..................ss..................ss....ssssssssssssssssss..ssssssssssssssssss..ssssssssssssssssss..............................................................................................................ssssssssss................ss..................ss..................ss..................ss....ssssssssssssssssss..ssssssssssssssssss..ssssssssssssssssss..............................................................................................................ssssssssss................ss..................ss..................ss..................ss../Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/comet_ml/monkey_patching.py:19: DeprecationWarning: the imp module is deprecated in favour of importlib; see the module's documentation for alternative uses
+  import imp
+/Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/comet_ml/monkey_patching.py:19: DeprecationWarning: the imp module is deprecated in favour of importlib; see the module's documentation for alternative uses
+  import imp
+....................
+
+=============================== warnings summary ===============================
+../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:19
+  /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:19: DeprecationWarning: Call to deprecated create function FileDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool.
+    DESCRIPTOR = _descriptor.FileDescriptor(
+
+../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:33
+  /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:33: DeprecationWarning: Call to deprecated create function EnumValueDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool.
+    _descriptor.EnumValueDescriptor(
+
+../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:37
+  /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:37: DeprecationWarning: Call to deprecated create function EnumValueDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool.
+    _descriptor.EnumValueDescriptor(
+
+../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:41
+  /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:41: DeprecationWarning: Call to deprecated create function EnumValueDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool.
+    _descriptor.EnumValueDescriptor(
+
+../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:45
+  /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:45: DeprecationWarning: Call to deprecated create function EnumValueDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool.
+    _descriptor.EnumValueDescriptor(
+
+../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:49
+  /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:49: DeprecationWarning: Call to deprecated create function EnumValueDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool.
+    _descriptor.EnumValueDescriptor(
+
+../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:53
+  /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:53: DeprecationWarning: Call to deprecated create function EnumValueDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool.
+    _descriptor.EnumValueDescriptor(
+
+../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:57
+  /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:57: DeprecationWarning: Call to deprecated create function EnumValueDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool.
+    _descriptor.EnumValueDescriptor(
+
+../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:61
+  /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:61: DeprecationWarning: Call to deprecated create function EnumValueDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool.
+    _descriptor.EnumValueDescriptor(
+
+../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:65
+  /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:65: DeprecationWarning: Call to deprecated create function EnumValueDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool.
+    _descriptor.EnumValueDescriptor(
+
+../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:69
+  /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:69: DeprecationWarning: Call to deprecated create function EnumValueDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool.
+    _descriptor.EnumValueDescriptor(
+
+../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:73
+  /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:73: DeprecationWarning: Call to deprecated create function EnumValueDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool.
+    _descriptor.EnumValueDescriptor(
+
+../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:77
+  /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:77: DeprecationWarning: Call to deprecated create function EnumValueDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool.
+    _descriptor.EnumValueDescriptor(
+
+../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:81
+  /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:81: DeprecationWarning: Call to deprecated create function EnumValueDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool.
+    _descriptor.EnumValueDescriptor(
+
+../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:27
+  /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:27: DeprecationWarning: Call to deprecated create function EnumDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool.
+    _ATTRTYPE = _descriptor.EnumDescriptor(
+
+../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:115
+  /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:115: DeprecationWarning: Call to deprecated create function EnumValueDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool.
+    _descriptor.EnumValueDescriptor(
+
+../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:119
+  /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:119: DeprecationWarning: Call to deprecated create function EnumValueDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool.
+    _descriptor.EnumValueDescriptor(
+
+../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:123
+  /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:123: DeprecationWarning: Call to deprecated create function EnumValueDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool.
+    _descriptor.EnumValueDescriptor(
+
+../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:127
+  /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:127: DeprecationWarning: Call to deprecated create function EnumValueDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool.
+    _descriptor.EnumValueDescriptor(
+
+../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:131
+  /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:131: DeprecationWarning: Call to deprecated create function EnumValueDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool.
+    _descriptor.EnumValueDescriptor(
+
+../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:135
+  /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:135: DeprecationWarning: Call to deprecated create function EnumValueDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool.
+    _descriptor.EnumValueDescriptor(
+
+../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:139
+  /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:139: DeprecationWarning: Call to deprecated create function EnumValueDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool.
+    _descriptor.EnumValueDescriptor(
+
+../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:143
+  /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:143: DeprecationWarning: Call to deprecated create function EnumValueDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool.
+    _descriptor.EnumValueDescriptor(
+
+../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:147
+  /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:147: DeprecationWarning: Call to deprecated create function EnumValueDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool.
+    _descriptor.EnumValueDescriptor(
+
+../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:151
+  /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:151: DeprecationWarning: Call to deprecated create function EnumValueDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool.
+    _descriptor.EnumValueDescriptor(
+
+../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:155
+  /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:155: DeprecationWarning: Call to deprecated create function EnumValueDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool.
+    _descriptor.EnumValueDescriptor(
+
+../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:159
+  /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:159: DeprecationWarning: Call to deprecated create function EnumValueDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool.
+    _descriptor.EnumValueDescriptor(
+
+../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:163
+  /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:163: DeprecationWarning: Call to deprecated create function EnumValueDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool.
+    _descriptor.EnumValueDescriptor(
+
+../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:167
+  /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:167: DeprecationWarning: Call to deprecated create function EnumValueDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool.
+    _descriptor.EnumValueDescriptor(
+
+../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:171
+  /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:171: DeprecationWarning: Call to deprecated create function EnumValueDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool.
+    _descriptor.EnumValueDescriptor(
+
+../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:175
+  /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:175: DeprecationWarning: Call to deprecated create function EnumValueDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool.
+    _descriptor.EnumValueDescriptor(
+
+../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:179
+  /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:179: DeprecationWarning: Call to deprecated create function EnumValueDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool.
+    _descriptor.EnumValueDescriptor(
+
+../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:183
+  /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:183: DeprecationWarning: Call to deprecated create function EnumValueDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool.
+    _descriptor.EnumValueDescriptor(
+
+../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:187
+  /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:187: DeprecationWarning: Call to deprecated create function EnumValueDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool.
+    _descriptor.EnumValueDescriptor(
+
+../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:191
+  /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:191: DeprecationWarning: Call to deprecated create function EnumValueDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool.
+    _descriptor.EnumValueDescriptor(
+
+../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:195
+  /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:195: DeprecationWarning: Call to deprecated create function EnumValueDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool.
+    _descriptor.EnumValueDescriptor(
+
+../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:199
+  /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:199: DeprecationWarning: Call to deprecated create function EnumValueDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool.
+    _descriptor.EnumValueDescriptor(
+
+../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:203
+  /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:203: DeprecationWarning: Call to deprecated create function EnumValueDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool.
+    _descriptor.EnumValueDescriptor(
+
+../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:207
+  /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:207: DeprecationWarning: Call to deprecated create function EnumValueDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool.
+    _descriptor.EnumValueDescriptor(
+
+../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:211
+  /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:211: DeprecationWarning: Call to deprecated create function EnumValueDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool.
+    _descriptor.EnumValueDescriptor(
+
+../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:215
+  /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:215: DeprecationWarning: Call to deprecated create function EnumValueDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool.
+    _descriptor.EnumValueDescriptor(
+
+../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:219
+  /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:219: DeprecationWarning: Call to deprecated create function EnumValueDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool.
+    _descriptor.EnumValueDescriptor(
+
+../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:223
+  /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:223: DeprecationWarning: Call to deprecated create function EnumValueDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool.
+    _descriptor.EnumValueDescriptor(
+
+../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:227
+  /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:227: DeprecationWarning: Call to deprecated create function EnumValueDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool.
+    _descriptor.EnumValueDescriptor(
+
+../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:109
+  /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:109: DeprecationWarning: Call to deprecated create function EnumDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool.
+    _VARTYPE_TYPE = _descriptor.EnumDescriptor(
+
+../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:247
+  /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:247: DeprecationWarning: Call to deprecated create function FieldDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool.
+    _descriptor.FieldDescriptor(
+
+../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:240
+  /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:240: DeprecationWarning: Call to deprecated create function Descriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool.
+    _VERSION = _descriptor.Descriptor(
+
+../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:278
+  /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:278: DeprecationWarning: Call to deprecated create function FieldDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool.
+    _descriptor.FieldDescriptor(
+
+../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:285
+  /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:285: DeprecationWarning: Call to deprecated create function FieldDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool.
+    _descriptor.FieldDescriptor(
+
+../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:292
+  /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:292: DeprecationWarning: Call to deprecated create function FieldDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool.
+    _descriptor.FieldDescriptor(
+
+../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:299
+  /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:299: DeprecationWarning: Call to deprecated create function FieldDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool.
+    _descriptor.FieldDescriptor(
+
+../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:271
+  /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:271: DeprecationWarning: Call to deprecated create function Descriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool.
+    _PROCESSMESHDESC = _descriptor.Descriptor(
+
+../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:330
+  /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:330: DeprecationWarning: Call to deprecated create function FieldDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool.
+    _descriptor.FieldDescriptor(
+
+../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:337
+  /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:337: DeprecationWarning: Call to deprecated create function FieldDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool.
+    _descriptor.FieldDescriptor(
+
+../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:344
+  /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:344: DeprecationWarning: Call to deprecated create function FieldDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool.
+    _descriptor.FieldDescriptor(
+
+../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:351
+  /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:351: DeprecationWarning: Call to deprecated create function FieldDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool.
+    _descriptor.FieldDescriptor(
+
+../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:358
+  /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:358: DeprecationWarning: Call to deprecated create function FieldDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool.
+    _descriptor.FieldDescriptor(
+
+../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:365
+  /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:365: DeprecationWarning: Call to deprecated create function FieldDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool.
+    _descriptor.FieldDescriptor(
+
+../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:372
+  /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:372: DeprecationWarning: Call to deprecated create function FieldDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool.
+    _descriptor.FieldDescriptor(
+
+../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:379
+  /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:379: DeprecationWarning: Call to deprecated create function FieldDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool.
+    _descriptor.FieldDescriptor(
+
+../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:386
+  /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:386: DeprecationWarning: Call to deprecated create function FieldDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool.
+    _descriptor.FieldDescriptor(
+
+../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:393
+  /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:393: DeprecationWarning: Call to deprecated create function FieldDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool.
+    _descriptor.FieldDescriptor(
+
+../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:400
+  /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:400: DeprecationWarning: Call to deprecated create function FieldDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool.
+    _descriptor.FieldDescriptor(
+
+../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:407
+  /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:407: DeprecationWarning: Call to deprecated create function FieldDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool.
+    _descriptor.FieldDescriptor(
+
+../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:414
+  /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:414: DeprecationWarning: Call to deprecated create function FieldDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool.
+    _descriptor.FieldDescriptor(
+
+../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:421
+  /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:421: DeprecationWarning: Call to deprecated create function FieldDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool.
+    _descriptor.FieldDescriptor(
+
+../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:428
+  /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:428: DeprecationWarning: Call to deprecated create function FieldDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool.
+    _descriptor.FieldDescriptor(
+
+../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:323
+  /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:323: DeprecationWarning: Call to deprecated create function Descriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool.
+    _OPDESC_ATTR = _descriptor.Descriptor(
+
+../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:458
+  /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:458: DeprecationWarning: Call to deprecated create function FieldDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool.
+    _descriptor.FieldDescriptor(
+
+../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:465
+  /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:465: DeprecationWarning: Call to deprecated create function FieldDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool.
+    _descriptor.FieldDescriptor(
+
+../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:451
+  /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:451: DeprecationWarning: Call to deprecated create function Descriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool.
+    _OPDESC_VAR = _descriptor.Descriptor(
+
+../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:495
+  /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:495: DeprecationWarning: Call to deprecated create function FieldDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool.
+    _descriptor.FieldDescriptor(
+
+../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:502
+  /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:502: DeprecationWarning: Call to deprecated create function FieldDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool.
+    _descriptor.FieldDescriptor(
+
+../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:509
+  /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:509: DeprecationWarning: Call to deprecated create function FieldDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool.
+    _descriptor.FieldDescriptor(
+
+../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:516
+  /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:516: DeprecationWarning: Call to deprecated create function FieldDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool.
+    _descriptor.FieldDescriptor(
+
+../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:523
+  /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:523: DeprecationWarning: Call to deprecated create function FieldDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool.
+    _descriptor.FieldDescriptor(
+
+../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:488
+  /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:488: DeprecationWarning: Call to deprecated create function Descriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool.
+    _OPDESC = _descriptor.Descriptor(
+
+../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:554
+  /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:554: DeprecationWarning: Call to deprecated create function FieldDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool.
+    _descriptor.FieldDescriptor(
+
+../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:561
+  /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:561: DeprecationWarning: Call to deprecated create function FieldDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool.
+    _descriptor.FieldDescriptor(
+
+../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:568
+  /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:568: DeprecationWarning: Call to deprecated create function FieldDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool.
+    _descriptor.FieldDescriptor(
+
+../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:575
+  /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:575: DeprecationWarning: Call to deprecated create function FieldDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool.
+    _descriptor.FieldDescriptor(
+
+../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:582
+  /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:582: DeprecationWarning: Call to deprecated create function FieldDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool.
+    _descriptor.FieldDescriptor(
+
+../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:589
+  /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:589: DeprecationWarning: Call to deprecated create function FieldDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool.
+    _descriptor.FieldDescriptor(
+
+../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:596
+  /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:596: DeprecationWarning: Call to deprecated create function FieldDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool.
+    _descriptor.FieldDescriptor(
+
+../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:547
+  /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:547: DeprecationWarning: Call to deprecated create function Descriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool.
+    _OPPROTO_VAR = _descriptor.Descriptor(
+
+../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:626
+  /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:626: DeprecationWarning: Call to deprecated create function FieldDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool.
+    _descriptor.FieldDescriptor(
+
+../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:633
+  /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:633: DeprecationWarning: Call to deprecated create function FieldDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool.
+    _descriptor.FieldDescriptor(
+
+../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:640
+  /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:640: DeprecationWarning: Call to deprecated create function FieldDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool.
+    _descriptor.FieldDescriptor(
+
+../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:647
+  /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:647: DeprecationWarning: Call to deprecated create function FieldDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool.
+    _descriptor.FieldDescriptor(
+
+../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:654
+  /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:654: DeprecationWarning: Call to deprecated create function FieldDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool.
+    _descriptor.FieldDescriptor(
+
+../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:661
+  /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:661: DeprecationWarning: Call to deprecated create function FieldDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool.
+    _descriptor.FieldDescriptor(
+
+../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:619
+  /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:619: DeprecationWarning: Call to deprecated create function Descriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool.
+    _OPPROTO_ATTR = _descriptor.Descriptor(
+
+../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:691
+  /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:691: DeprecationWarning: Call to deprecated create function FieldDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool.
+    _descriptor.FieldDescriptor(
+
+../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:698
+  /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:698: DeprecationWarning: Call to deprecated create function FieldDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool.
+    _descriptor.FieldDescriptor(
+
+../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:705
+  /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:705: DeprecationWarning: Call to deprecated create function FieldDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool.
+    _descriptor.FieldDescriptor(
+
+../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:712
+  /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:712: DeprecationWarning: Call to deprecated create function FieldDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool.
+    _descriptor.FieldDescriptor(
+
+../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:719
+  /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:719: DeprecationWarning: Call to deprecated create function FieldDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool.
+    _descriptor.FieldDescriptor(
+
+../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:684
+  /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:684: DeprecationWarning: Call to deprecated create function Descriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool.
+    _OPPROTO = _descriptor.Descriptor(
+
+../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:750
+  /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:750: DeprecationWarning: Call to deprecated create function FieldDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool.
+    _descriptor.FieldDescriptor(
+
+../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:757
+  /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/proto/framework_pb2.py:757: DeprecationWarning: Call to deprecated create function FieldDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool.
+    _descriptor.FieldDescriptor(
+
+../../../../../../../.envs/oix/lib/python3.8/site-packages/comet_ml/monkey_patching.py:19
+  /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/comet_ml/monkey_patching.py:19: DeprecationWarning: the imp module is deprecated in favour of importlib; see the module's documentation for alternative uses
+    import imp
+
+../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/fluid/dygraph/math_op_patch.py:276: 1 warning
+test_f_beta.py: 3304 warnings
+  /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/dygraph/math_op_patch.py:276: UserWarning: The dtype of left and right variables are not the same, left dtype is paddle.float32, but right dtype is paddle.int64, the right dtype will convert to paddle.float32
+    warnings.warn(
+
+../../../../../../../.envs/oix/lib/python3.8/site-packages/paddle/tensor/creation.py:125: 1 warning
+test_f_beta.py: 326 warnings
+  /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/tensor/creation.py:125: DeprecationWarning: `np.object` is a deprecated alias for the builtin `object`. To silence this warning, use `object` by itself. Doing this will not modify any behavior and is safe. 
+  Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
+    if data.dtype == np.object:
+
+test_f_beta.py: 2012 warnings
+  /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/layers/tensor.py:657: UserWarning: paddle.assign doesn't support float64 input now due to current platform protobuf data limitation, we convert it to float32
+    warnings.warn(
+
+test_f_beta.py: 13026 warnings
+  /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/framework.py:1104: DeprecationWarning: `np.bool` is a deprecated alias for the builtin `bool`. To silence this warning, use `bool` by itself. Doing this will not modify any behavior and is safe. If you specifically wanted the numpy scalar type, use `np.bool_` here.
+  Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
+    elif dtype == np.bool:
+
+test_f_beta.py: 794 warnings
+  /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/dygraph/math_op_patch.py:276: UserWarning: The dtype of left and right variables are not the same, left dtype is paddle.float32, but right dtype is paddle.int32, the right dtype will convert to paddle.float32
+    warnings.warn(
+
+test_f_beta.py: 792 warnings
+  /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/dygraph/math_op_patch.py:276: UserWarning: The dtype of left and right variables are not the same, left dtype is paddle.int64, but right dtype is paddle.int32, the right dtype will convert to paddle.int64
+    warnings.warn(
+
+test_f_beta.py: 792 warnings
+  /Users/sun/Projects/.envs/oix/lib/python3.8/site-packages/paddle/fluid/dygraph/math_op_patch.py:276: UserWarning: The dtype of left and right variables are not the same, left dtype is paddle.int64, but right dtype is paddle.float32, the right dtype will convert to paddle.int64
+    warnings.warn(
+
+-- Docs: https://docs.pytest.org/en/stable/how-to/capture-warnings.html
+========= 600 passed, 216 skipped, 21149 warnings in 85.12s (0:01:25) ==========
diff --git a/RE/paddlemetric/src/tests/classification/test_accuracy.py b/RE/paddlemetric/src/tests/classification/test_accuracy.py
new file mode 100644
index 00000000..26ec7876
--- /dev/null
+++ b/RE/paddlemetric/src/tests/classification/test_accuracy.py
@@ -0,0 +1,362 @@
+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from functools import partial
+
+import numpy as np
+import pytest
+import paddleext.torchapi as B
+from sklearn.metrics import accuracy_score as sk_accuracy
+from paddleext.torchapi import tensor
+
+from tests.classification.inputs import _input_binary, _input_binary_logits, _input_binary_prob
+from tests.classification.inputs import _input_multiclass as _input_mcls
+from tests.classification.inputs import _input_multiclass_logits as _input_mcls_logits
+from tests.classification.inputs import _input_multiclass_prob as _input_mcls_prob
+from tests.classification.inputs import _input_multiclass_with_missing_class as _input_miss_class
+from tests.classification.inputs import _input_multidim_multiclass as _input_mdmc
+from tests.classification.inputs import _input_multidim_multiclass_prob as _input_mdmc_prob
+from tests.classification.inputs import _input_multilabel as _input_mlb
+from tests.classification.inputs import _input_multilabel_logits as _input_mlb_logits
+from tests.classification.inputs import _input_multilabel_multidim as _input_mlmd
+from tests.classification.inputs import _input_multilabel_multidim_prob as _input_mlmd_prob
+from tests.classification.inputs import _input_multilabel_prob as _input_mlb_prob
+from tests.helpers import seed_all
+from tests.helpers.testers import NUM_BATCHES, NUM_CLASSES, THRESHOLD, MetricTester
+from paddlemetrics import Accuracy
+from paddlemetrics.functional import accuracy
+from paddlemetrics.utilities.checks import _input_format_classification
+from paddlemetrics.utilities.enums import AverageMethod, DataType
+
+seed_all(42)
+
+
+def _sk_accuracy(preds, target, subset_accuracy):
+    sk_preds, sk_target, mode = _input_format_classification(preds, target, threshold=THRESHOLD)
+    sk_preds, sk_target = sk_preds.numpy(), sk_target.numpy()
+
+    if mode == DataType.MULTIDIM_MULTICLASS and not subset_accuracy:
+        sk_preds, sk_target = np.transpose(sk_preds, (0, 2, 1)), np.transpose(sk_target, (0, 2, 1))
+        sk_preds, sk_target = sk_preds.reshape(-1, sk_preds.shape[2]), sk_target.reshape(-1, sk_target.shape[2])
+    elif mode == DataType.MULTIDIM_MULTICLASS and subset_accuracy:
+        return np.all(sk_preds == sk_target, axis=(1, 2)).mean()
+    elif mode == DataType.MULTILABEL and not subset_accuracy:
+        sk_preds, sk_target = sk_preds.reshape(-1), sk_target.reshape(-1)
+
+    return sk_accuracy(y_true=sk_target, y_pred=sk_preds)
+
+
+@pytest.mark.parametrize(
+    "preds, target, subset_accuracy",
+    [
+        (_input_binary_logits.preds, _input_binary_logits.target, False),
+        (_input_binary_prob.preds, _input_binary_prob.target, False),
+        (_input_binary.preds, _input_binary.target, False),
+        (_input_mlb_prob.preds, _input_mlb_prob.target, True),
+        (_input_mlb_logits.preds, _input_mlb_logits.target, False),
+        (_input_mlb_prob.preds, _input_mlb_prob.target, False),
+        (_input_mlb.preds, _input_mlb.target, True),
+        (_input_mlb.preds, _input_mlb.target, False),
+        (_input_mcls_prob.preds, _input_mcls_prob.target, False),
+        (_input_mcls_logits.preds, _input_mcls_logits.target, False),
+        (_input_mcls.preds, _input_mcls.target, False),
+        (_input_mdmc_prob.preds, _input_mdmc_prob.target, False),
+        (_input_mdmc_prob.preds, _input_mdmc_prob.target, True),
+        (_input_mdmc.preds, _input_mdmc.target, False),
+        (_input_mdmc.preds, _input_mdmc.target, True),
+        (_input_mlmd_prob.preds, _input_mlmd_prob.target, True),
+        (_input_mlmd_prob.preds, _input_mlmd_prob.target, False),
+        (_input_mlmd.preds, _input_mlmd.target, True),
+        (_input_mlmd.preds, _input_mlmd.target, False),
+    ],
+)
+class TestAccuracies(MetricTester):
+    @pytest.mark.parametrize("ddp", [False])
+    @pytest.mark.parametrize("dist_sync_on_step", [False])
+    def test_accuracy_class(self, ddp, dist_sync_on_step, preds, target, subset_accuracy):
+        self.run_class_metric_test(
+            ddp=ddp,
+            preds=preds,
+            target=target,
+            metric_class=Accuracy,
+            sk_metric=partial(_sk_accuracy, subset_accuracy=subset_accuracy),
+            dist_sync_on_step=dist_sync_on_step,
+            metric_args={"threshold": THRESHOLD, "subset_accuracy": subset_accuracy},
+        )
+
+    def test_accuracy_fn(self, preds, target, subset_accuracy):
+        self.run_functional_metric_test(
+            preds,
+            target,
+            metric_functional=accuracy,
+            sk_metric=partial(_sk_accuracy, subset_accuracy=subset_accuracy),
+            metric_args={"threshold": THRESHOLD, "subset_accuracy": subset_accuracy},
+        )
+
+    def test_accuracy_differentiability(self, preds, target, subset_accuracy):
+        self.run_differentiability_test(
+            preds=preds,
+            target=target,
+            metric_module=Accuracy,
+            metric_functional=accuracy,
+            metric_args={"threshold": THRESHOLD, "subset_accuracy": subset_accuracy},
+        )
+
+
+_l1to4 = [0.1, 0.2, 0.3, 0.4]
+_l1to4t3 = np.array([_l1to4, _l1to4, _l1to4])
+_l1to4t3_mcls = [_l1to4t3.T, _l1to4t3.T, _l1to4t3.T]
+
+# The preds in these examples always put highest probability on class 3, second highest on class 2,
+# third highest on class 1, and lowest on class 0
+_topk_preds_mcls = tensor([_l1to4t3, _l1to4t3]).float()
+_topk_target_mcls = tensor([[1, 2, 3], [2, 1, 0]])
+
+# This is like for MC case, but one sample in each batch is sabotaged with 0 class prediction :)
+_topk_preds_mdmc = tensor([_l1to4t3_mcls, _l1to4t3_mcls]).float()
+_topk_target_mdmc = tensor([[[1, 1, 0], [2, 2, 2], [3, 3, 3]], [[2, 2, 0], [1, 1, 1], [0, 0, 0]]])
+
+# Multilabel
+_ml_t1 = [0.8, 0.2, 0.8, 0.2]
+_ml_t2 = [_ml_t1, _ml_t1]
+_ml_ta2 = [[1, 0, 1, 1], [0, 1, 1, 0]]
+_av_preds_ml = tensor([_ml_t2, _ml_t2]).float()
+_av_target_ml = tensor([_ml_ta2, _ml_ta2])
+
+
+# Replace with a proper sk_metric test once sklearn 0.24 hits :)
+@pytest.mark.parametrize(
+    "preds, target, exp_result, k, subset_accuracy",
+    [
+        (_topk_preds_mcls, _topk_target_mcls, 1 / 6, 1, False),
+        (_topk_preds_mcls, _topk_target_mcls, 3 / 6, 2, False),
+        (_topk_preds_mcls, _topk_target_mcls, 5 / 6, 3, False),
+        (_topk_preds_mcls, _topk_target_mcls, 1 / 6, 1, True),
+        (_topk_preds_mcls, _topk_target_mcls, 3 / 6, 2, True),
+        (_topk_preds_mcls, _topk_target_mcls, 5 / 6, 3, True),
+        (_topk_preds_mdmc, _topk_target_mdmc, 1 / 6, 1, False),
+        (_topk_preds_mdmc, _topk_target_mdmc, 8 / 18, 2, False),
+        (_topk_preds_mdmc, _topk_target_mdmc, 13 / 18, 3, False),
+        (_topk_preds_mdmc, _topk_target_mdmc, 1 / 6, 1, True),
+        (_topk_preds_mdmc, _topk_target_mdmc, 2 / 6, 2, True),
+        (_topk_preds_mdmc, _topk_target_mdmc, 3 / 6, 3, True),
+        (_av_preds_ml, _av_target_ml, 5 / 8, None, False),
+        (_av_preds_ml, _av_target_ml, 0, None, True),
+    ],
+)
+def test_topk_accuracy(preds, target, exp_result, k, subset_accuracy):
+    topk = Accuracy(top_k=k, subset_accuracy=subset_accuracy)
+
+    for batch in range(preds.shape[0]):
+        topk(preds[batch], target[batch])
+
+    assert topk.compute() == exp_result
+
+    # Test functional
+    total_samples = target.shape[0] * target.shape[1]
+
+    preds = preds.view(total_samples, 4, -1)
+    target = target.view(total_samples, -1)
+
+    assert accuracy(preds, target, top_k=k, subset_accuracy=subset_accuracy) == exp_result
+
+
+# Only MC and MDMC with probs input type should be accepted for top_k
+@pytest.mark.parametrize(
+    "preds, target",
+    [
+        (_input_binary_prob.preds, _input_binary_prob.target),
+        (_input_binary.preds, _input_binary.target),
+        (_input_mlb_prob.preds, _input_mlb_prob.target),
+        (_input_mlb.preds, _input_mlb.target),
+        (_input_mcls.preds, _input_mcls.target),
+        (_input_mdmc.preds, _input_mdmc.target),
+        (_input_mlmd_prob.preds, _input_mlmd_prob.target),
+        (_input_mlmd.preds, _input_mlmd.target),
+    ],
+)
+def test_topk_accuracy_wrong_input_types(preds, target):
+    topk = Accuracy(top_k=1)
+
+    with pytest.raises(ValueError):
+        topk(preds[0], target[0])
+
+    with pytest.raises(ValueError):
+        accuracy(preds[0], target[0], top_k=1)
+
+
+@pytest.mark.parametrize(
+    "average, mdmc_average, num_classes, inputs, ignore_index, top_k, threshold",
+    [
+        ("unknown", None, None, _input_binary, None, None, 0.5),
+        ("micro", "unknown", None, _input_binary, None, None, 0.5),
+        ("macro", None, None, _input_binary, None, None, 0.5),
+        ("micro", None, None, _input_mdmc_prob, None, None, 0.5),
+        ("micro", None, None, _input_binary_prob, 0, None, 0.5),
+        ("micro", None, None, _input_mcls_prob, NUM_CLASSES, None, 0.5),
+        ("micro", None, NUM_CLASSES, _input_mcls_prob, NUM_CLASSES, None, 0.5),
+        (None, None, None, _input_mcls_prob, None, 0, 0.5),
+        (None, None, None, _input_mcls_prob, None, None, 1.5),
+    ],
+)
+def test_wrong_params(average, mdmc_average, num_classes, inputs, ignore_index, top_k, threshold):
+    preds, target = inputs.preds, inputs.target
+
+    with pytest.raises(ValueError):
+        acc = Accuracy(
+            average=average,
+            mdmc_average=mdmc_average,
+            num_classes=num_classes,
+            ignore_index=ignore_index,
+            threshold=threshold,
+            top_k=top_k,
+        )
+        acc(preds[0], target[0])
+        acc.compute()
+
+    with pytest.raises(ValueError):
+        accuracy(
+            preds[0],
+            target[0],
+            average=average,
+            mdmc_average=mdmc_average,
+            num_classes=num_classes,
+            ignore_index=ignore_index,
+            threshold=threshold,
+            top_k=top_k,
+        )
+
+
+@pytest.mark.parametrize(
+    "preds_mc, target_mc, preds_ml, target_ml",
+    [
+        (
+            tensor([0, 1, 1, 1]),
+            tensor([2, 2, 1, 1]),
+            tensor([[0.8, 0.2, 0.8, 0.7], [0.6, 0.4, 0.6, 0.5]]),
+            tensor([[1, 0, 1, 1], [0, 0, 1, 0]]),
+        )
+    ],
+)
+def test_different_modes(preds_mc, target_mc, preds_ml, target_ml):
+    acc = Accuracy()
+    acc(preds_mc, target_mc)
+    with pytest.raises(ValueError, match="^[You cannot use]"):
+        acc(preds_ml, target_ml)
+
+
+_bin_t1 = [0.7, 0.6, 0.2, 0.1]
+_av_preds_bin = tensor([_bin_t1, _bin_t1]).float()
+_av_target_bin = tensor([[1, 0, 0, 0], [0, 1, 1, 0]])
+
+
+@pytest.mark.parametrize(
+    "preds, target, num_classes, exp_result, average, mdmc_average",
+    [
+        (_topk_preds_mcls, _topk_target_mcls, 4, 1 / 4, "macro", None),
+        (_topk_preds_mcls, _topk_target_mcls, 4, 1 / 6, "weighted", None),
+        (_topk_preds_mcls, _topk_target_mcls, 4, [0.0, 0.0, 0.0, 1.0], "none", None),
+        (_topk_preds_mcls, _topk_target_mcls, 4, 1 / 6, "samples", None),
+        (_topk_preds_mdmc, _topk_target_mdmc, 4, 1 / 24, "macro", "samplewise"),
+        (_topk_preds_mdmc, _topk_target_mdmc, 4, 1 / 6, "weighted", "samplewise"),
+        (_topk_preds_mdmc, _topk_target_mdmc, 4, [0.0, 0.0, 0.0, 1 / 6], "none", "samplewise"),
+        (_topk_preds_mdmc, _topk_target_mdmc, 4, 1 / 6, "samples", "samplewise"),
+        (_topk_preds_mdmc, _topk_target_mdmc, 4, 1 / 6, "samples", "global"),
+        (_av_preds_ml, _av_target_ml, 4, 5 / 8, "macro", None),
+        (_av_preds_ml, _av_target_ml, 4, 0.70000005, "weighted", None),
+        (_av_preds_ml, _av_target_ml, 4, [1 / 2, 1 / 2, 1.0, 1 / 2], "none", None),
+        (_av_preds_ml, _av_target_ml, 4, 5 / 8, "samples", None),
+    ],
+)
+def test_average_accuracy(preds, target, num_classes, exp_result, average, mdmc_average):
+    acc = Accuracy(num_classes=num_classes, average=average, mdmc_average=mdmc_average)
+
+    for batch in range(preds.shape[0]):
+        acc(preds[batch], target[batch])
+
+    assert B.allclose(acc.compute(), tensor(exp_result))
+
+    # Test functional
+    total_samples = target.shape[0] * target.shape[1]
+
+    preds = preds.view(total_samples, num_classes, -1)
+    target = target.view(total_samples, -1)
+
+    acc_score = accuracy(preds, target, num_classes=num_classes, average=average, mdmc_average=mdmc_average)
+    assert B.allclose(acc_score, tensor(exp_result))
+
+
+@pytest.mark.parametrize(
+    "preds, target, num_classes, exp_result, average, multiclass",
+    [
+        (_av_preds_bin, _av_target_bin, 2, 19 / 30, "macro", True),
+        (_av_preds_bin, _av_target_bin, 2, 5 / 8, "weighted", True),
+        (_av_preds_bin, _av_target_bin, 2, [3 / 5, 2 / 3], "none", True),
+        (_av_preds_bin, _av_target_bin, 2, 5 / 8, "samples", True),
+    ],
+)
+def test_average_accuracy_bin(preds, target, num_classes, exp_result, average, multiclass):
+    acc = Accuracy(num_classes=num_classes, average=average, multiclass=multiclass)
+
+    for batch in range(preds.shape[0]):
+        acc(preds[batch], target[batch])
+
+    assert (acc.compute() == tensor(exp_result)).all()
+
+    # Test functional
+    total_samples = target.shape[0] * target.shape[1]
+
+    preds = preds.view(total_samples, -1)
+    target = target.view(total_samples, -1)
+    acc_score = accuracy(preds, target, num_classes=num_classes, average=average, multiclass=multiclass)
+    assert (acc_score == tensor(exp_result)).all()
+
+
+@pytest.mark.parametrize("metric_class, metric_fn", [(Accuracy, accuracy)])
+@pytest.mark.parametrize(
+    "ignore_index, expected", [(None, B.tensor([1.0, np.nan])), (0, B.tensor([np.nan, np.nan]))]
+)
+def test_class_not_present(metric_class, metric_fn, ignore_index, expected):
+    """This tests that when metric is computed per class and a given class is not present in both the `preds` and
+    `target`, the resulting score is `nan`."""
+    preds = B.tensor([0, 0, 0])
+    target = B.tensor([0, 0, 0])
+    num_classes = 2
+
+    # test functional
+    result_fn = metric_fn(preds, target, average=AverageMethod.NONE, num_classes=num_classes, ignore_index=ignore_index)
+    assert B.allclose(expected, result_fn, equal_nan=True)
+
+    # test class
+    cl_metric = metric_class(average=AverageMethod.NONE, num_classes=num_classes, ignore_index=ignore_index)
+    cl_metric(preds, target)
+    result_cl = cl_metric.compute()
+    assert B.allclose(expected, result_cl, equal_nan=True)
+
+
+@pytest.mark.parametrize("average", ["micro", "macro", "weighted"])
+def test_same_input(average):
+    preds = _input_miss_class.preds
+    target = _input_miss_class.target
+    preds_flat = B.cat(list(preds), dim=0)
+    target_flat = B.cat(list(target), dim=0)
+
+    mc = Accuracy(num_classes=NUM_CLASSES, average=average)
+    for i in range(NUM_BATCHES):
+        mc.update(preds[i], target[i])
+    class_res = mc.compute()
+    func_res = accuracy(preds_flat, target_flat, num_classes=NUM_CLASSES, average=average)
+    sk_res = sk_accuracy(target_flat, preds_flat)
+
+    assert B.allclose(class_res, B.tensor(sk_res).float())
+    assert B.allclose(func_res, B.tensor(sk_res).float())
diff --git a/RE/paddlemetric/src/tests/classification/test_auc.py b/RE/paddlemetric/src/tests/classification/test_auc.py
new file mode 100644
index 00000000..df6e3ff7
--- /dev/null
+++ b/RE/paddlemetric/src/tests/classification/test_auc.py
@@ -0,0 +1,106 @@
+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from collections import namedtuple
+from functools import partial
+
+import numpy as np
+import pytest
+from sklearn.metrics import auc as _sk_auc
+from paddleext.torchapi import tensor
+
+from tests.helpers import seed_all
+from tests.helpers.testers import NUM_BATCHES, MetricTester
+from paddlemetrics.classification.auc import AUC
+from paddlemetrics.functional import auc
+
+seed_all(42)
+
+
+def sk_auc(x, y, reorder=False):
+    x = x.flatten()
+    y = y.flatten()
+    if reorder:
+        idx = np.argsort(x, kind="stable")
+        x = x[idx]
+        y = y[idx]
+    return _sk_auc(x, y)
+
+
+Input = namedtuple("Input", ["x", "y"])
+
+_examples = []
+# generate already ordered samples, sorted in both directions
+for batch_size in (8, 4049):
+    for i in range(4):
+        x = np.random.rand(NUM_BATCHES * batch_size)
+        y = np.random.rand(NUM_BATCHES * batch_size)
+        idx = np.argsort(x, kind="stable")
+        x = x[idx] if i % 2 == 0 else x[idx[::-1]]
+        y = y[idx] if i % 2 == 0 else x[idx[::-1]]
+        x = x.reshape(NUM_BATCHES, batch_size)
+        y = y.reshape(NUM_BATCHES, batch_size)
+        _examples.append(Input(x=tensor(x), y=tensor(y)))
+
+
+@pytest.mark.parametrize("x, y", _examples)
+class TestAUC(MetricTester):
+    @pytest.mark.parametrize("ddp", [False])
+    @pytest.mark.parametrize("dist_sync_on_step", [True, False])
+    def test_auc(self, x, y, ddp, dist_sync_on_step):
+        self.run_class_metric_test(
+            ddp=ddp,
+            preds=x,
+            target=y,
+            metric_class=AUC,
+            sk_metric=sk_auc,
+            dist_sync_on_step=dist_sync_on_step,
+        )
+
+    @pytest.mark.parametrize("reorder", [True, False])
+    def test_auc_functional(self, x, y, reorder):
+        self.run_functional_metric_test(
+            x, y, metric_functional=auc, sk_metric=partial(sk_auc, reorder=reorder), metric_args={"reorder": reorder}
+        )
+
+    @pytest.mark.parametrize("reorder", [True, False])
+    def test_auc_differentiability(self, x, y, reorder):
+        self.run_differentiability_test(
+            preds=x, target=y, metric_module=AUC, metric_functional=auc, metric_args={"reorder": reorder}
+        )
+
+
+@pytest.mark.parametrize("unsqueeze_x", (True, False))
+@pytest.mark.parametrize("unsqueeze_y", (True, False))
+@pytest.mark.parametrize(
+    ["x", "y", "expected"],
+    [
+        pytest.param([0, 1], [0, 1], 0.5),
+        pytest.param([1, 0], [0, 1], 0.5),
+        pytest.param([1, 0, 0], [0, 1, 1], 0.5),
+        pytest.param([0, 1], [1, 1], 1),
+        pytest.param([0, 0.5, 1], [0, 0.5, 1], 0.5),
+    ],
+)
+def test_auc(x, y, expected, unsqueeze_x, unsqueeze_y):
+    x = tensor(x)
+    y = tensor(y)
+
+    if unsqueeze_x:
+        x = x.unsqueeze(-1)
+
+    if unsqueeze_y:
+        y = y.unsqueeze(-1)
+
+    # Test Area Under Curve (AUC) computation
+    assert auc(x, y, reorder=True) == expected
diff --git a/RE/paddlemetric/src/tests/classification/test_auroc.py b/RE/paddlemetric/src/tests/classification/test_auroc.py
new file mode 100644
index 00000000..36b43611
--- /dev/null
+++ b/RE/paddlemetric/src/tests/classification/test_auroc.py
@@ -0,0 +1,218 @@
+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from functools import partial
+
+import pytest
+import paddleext.torchapi as B
+from sklearn.metrics import roc_auc_score as sk_roc_auc_score
+
+from tests.classification.inputs import _input_binary_prob
+from tests.classification.inputs import _input_multiclass_prob as _input_mcls_prob
+from tests.classification.inputs import _input_multidim_multiclass_prob as _input_mdmc_prob
+from tests.classification.inputs import _input_multilabel_multidim_prob as _input_mlmd_prob
+from tests.classification.inputs import _input_multilabel_prob as _input_mlb_prob
+from tests.helpers import seed_all
+from tests.helpers.testers import NUM_CLASSES, MetricTester
+from paddlemetrics.classification.auroc import AUROC
+from paddlemetrics.functional import auroc
+from paddlemetrics.utilities.imports import _TORCH_LOWER_1_6
+
+seed_all(42)
+
+
+def _sk_auroc_binary_prob(preds, target, num_classes, average="macro", max_fpr=None, multi_class="ovr"):
+    # todo: `multi_class` is unused
+    sk_preds = preds.view(-1).numpy()
+    sk_target = target.view(-1).numpy()
+    return sk_roc_auc_score(y_true=sk_target, y_score=sk_preds, average=average, max_fpr=max_fpr)
+
+
+def _sk_auroc_multiclass_prob(preds, target, num_classes, average="macro", max_fpr=None, multi_class="ovr"):
+    sk_preds = preds.reshape(-1, num_classes).numpy()
+    sk_target = target.view(-1).numpy()
+    return sk_roc_auc_score(
+        y_true=sk_target,
+        y_score=sk_preds,
+        average=average,
+        max_fpr=max_fpr,
+        multi_class=multi_class,
+    )
+
+
+def _sk_auroc_multidim_multiclass_prob(preds, target, num_classes, average="macro", max_fpr=None, multi_class="ovr"):
+    sk_preds = preds.transpose(0, 1).reshape(num_classes, -1).transpose(0, 1).numpy()
+    sk_target = target.view(-1).numpy()
+    return sk_roc_auc_score(
+        y_true=sk_target,
+        y_score=sk_preds,
+        average=average,
+        max_fpr=max_fpr,
+        multi_class=multi_class,
+    )
+
+
+def _sk_auroc_multilabel_prob(preds, target, num_classes, average="macro", max_fpr=None, multi_class="ovr"):
+    sk_preds = preds.reshape(-1, num_classes).numpy()
+    sk_target = target.reshape(-1, num_classes).numpy()
+    return sk_roc_auc_score(
+        y_true=sk_target,
+        y_score=sk_preds,
+        average=average,
+        max_fpr=max_fpr,
+        multi_class=multi_class,
+    )
+
+
+def _sk_auroc_multilabel_multidim_prob(preds, target, num_classes, average="macro", max_fpr=None, multi_class="ovr"):
+    sk_preds = preds.transpose(0, 1).reshape(num_classes, -1).transpose(0, 1).numpy()
+    sk_target = target.transpose(0, 1).reshape(num_classes, -1).transpose(0, 1).numpy()
+    return sk_roc_auc_score(
+        y_true=sk_target,
+        y_score=sk_preds,
+        average=average,
+        max_fpr=max_fpr,
+        multi_class=multi_class,
+    )
+
+
+@pytest.mark.parametrize("average", ["macro", "weighted", "micro"])
+@pytest.mark.parametrize("max_fpr", [None, 0.8, 0.5])
+@pytest.mark.parametrize(
+    "preds, target, sk_metric, num_classes",
+    [
+        (_input_binary_prob.preds, _input_binary_prob.target, _sk_auroc_binary_prob, 1),
+        (_input_mcls_prob.preds, _input_mcls_prob.target, _sk_auroc_multiclass_prob, NUM_CLASSES),
+        (_input_mdmc_prob.preds, _input_mdmc_prob.target, _sk_auroc_multidim_multiclass_prob, NUM_CLASSES),
+        (_input_mlb_prob.preds, _input_mlb_prob.target, _sk_auroc_multilabel_prob, NUM_CLASSES),
+        (_input_mlmd_prob.preds, _input_mlmd_prob.target, _sk_auroc_multilabel_multidim_prob, NUM_CLASSES),
+    ],
+)
+class TestAUROC(MetricTester):
+    @pytest.mark.parametrize("ddp", [True, False])
+    @pytest.mark.parametrize("dist_sync_on_step", [True, False])
+    def test_auroc(self, preds, target, sk_metric, num_classes, average, max_fpr, ddp, dist_sync_on_step):
+        # max_fpr different from None is not support in multi class
+        if max_fpr is not None and num_classes != 1:
+            pytest.skip("max_fpr parameter not support for multi class or multi label")
+
+        # max_fpr only supported for torch v1.6 or higher
+        if max_fpr is not None and _TORCH_LOWER_1_6:
+            pytest.skip("requires torch v1.6 or higher to test max_fpr argument")
+
+        # average='micro' only supported for multilabel
+        if average == "micro" and preds.ndim > 2 and preds.ndim == target.ndim + 1:
+            pytest.skip("micro argument only support for multilabel input")
+
+        self.run_class_metric_test(
+            ddp=ddp,
+            preds=preds,
+            target=target,
+            metric_class=AUROC,
+            sk_metric=partial(sk_metric, num_classes=num_classes, average=average, max_fpr=max_fpr),
+            dist_sync_on_step=dist_sync_on_step,
+            metric_args={"num_classes": num_classes, "average": average, "max_fpr": max_fpr},
+        )
+
+    def test_auroc_functional(self, preds, target, sk_metric, num_classes, average, max_fpr):
+        # max_fpr different from None is not support in multi class
+        if max_fpr is not None and num_classes != 1:
+            pytest.skip("max_fpr parameter not support for multi class or multi label")
+
+        # max_fpr only supported for torch v1.6 or higher
+        if max_fpr is not None and _TORCH_LOWER_1_6:
+            pytest.skip("requires torch v1.6 or higher to test max_fpr argument")
+
+        # average='micro' only supported for multilabel
+        if average == "micro" and preds.ndim > 2 and preds.ndim == target.ndim + 1:
+            pytest.skip("micro argument only support for multilabel input")
+
+        self.run_functional_metric_test(
+            preds,
+            target,
+            metric_functional=auroc,
+            sk_metric=partial(sk_metric, num_classes=num_classes, average=average, max_fpr=max_fpr),
+            metric_args={"num_classes": num_classes, "average": average, "max_fpr": max_fpr},
+        )
+
+    def test_auroc_differentiability(self, preds, target, sk_metric, num_classes, average, max_fpr):
+        # max_fpr different from None is not support in multi class
+        if max_fpr is not None and num_classes != 1:
+            pytest.skip("max_fpr parameter not support for multi class or multi label")
+
+        # max_fpr only supported for torch v1.6 or higher
+        if max_fpr is not None and _TORCH_LOWER_1_6:
+            pytest.skip("requires torch v1.6 or higher to test max_fpr argument")
+
+        # average='micro' only supported for multilabel
+        if average == "micro" and preds.ndim > 2 and preds.ndim == target.ndim + 1:
+            pytest.skip("micro argument only support for multilabel input")
+
+        self.run_differentiability_test(
+            preds=preds,
+            target=target,
+            metric_module=AUROC,
+            metric_functional=auroc,
+            metric_args={"num_classes": num_classes, "average": average, "max_fpr": max_fpr},
+        )
+
+
+def test_error_on_different_mode():
+    """test that an error is raised if the user pass in data of different modes (binary, multi-label, multi-
+    class)"""
+    metric = AUROC()
+    # pass in multi-class data
+    metric.update(B.randn(10, 5).softmax(dim=-1), B.randint(0, 5, (10,)))
+    with pytest.raises(ValueError, match=r"The mode of data.* should be constant.*"):
+        # pass in multi-label data
+        metric.update(B.rand(10, 5), B.randint(0, 2, (10, 5)))
+
+
+def test_error_multiclass_no_num_classes():
+    with pytest.raises(
+        ValueError, match="Detected input to `multiclass` but you did not provide `num_classes` argument"
+    ):
+        _ = auroc(B.randn(20, 3).softmax(dim=-1), B.randint(3, (20,)))
+
+
+def test_weighted_with_empty_classes():
+    """Tests that weighted multiclass AUROC calculation yields the same results if a new but empty class exists.
+
+    Tests that the proper warnings and errors are raised
+    """
+    preds = B.tensor(
+        [
+            [0.90, 0.05, 0.05],
+            [0.05, 0.90, 0.05],
+            [0.05, 0.05, 0.90],
+            [0.85, 0.05, 0.10],
+            [0.10, 0.10, 0.80],
+        ]
+    )
+    target = B.tensor([0, 1, 1, 2, 2])
+    num_classes = 3
+    _auroc = auroc(preds, target, average="weighted", num_classes=num_classes)
+
+    # Add in a class with zero observations at second to last index
+    preds = B.cat(
+        (preds[:, : num_classes - 1], B.rand_like(preds[:, 0:1]), preds[:, num_classes - 1 :]), axis=1
+    )
+    # Last class (2) gets moved to 3
+    target[target == num_classes - 1] = num_classes
+    with pytest.warns(UserWarning, match="Class 2 had 0 observations, omitted from AUROC calculation"):
+        _auroc_empty_class = auroc(preds, target, average="weighted", num_classes=num_classes + 1)
+    assert _auroc == _auroc_empty_class
+
+    target = B.zeros_like(target)
+    with pytest.raises(ValueError, match="Found 1 non-empty class in `multiclass` AUROC calculation"):
+        _ = auroc(preds, target, average="weighted", num_classes=num_classes + 1)
diff --git a/RE/paddlemetric/src/tests/classification/test_average_precision.py b/RE/paddlemetric/src/tests/classification/test_average_precision.py
new file mode 100644
index 00000000..aea088cc
--- /dev/null
+++ b/RE/paddlemetric/src/tests/classification/test_average_precision.py
@@ -0,0 +1,170 @@
+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from functools import partial
+
+import numpy as np
+import pytest
+from sklearn.metrics import average_precision_score as sk_average_precision_score
+from paddleext.torchapi import tensor
+
+from tests.classification.inputs import _input_binary_prob
+from tests.classification.inputs import _input_multiclass_prob as _input_mcls_prob
+from tests.classification.inputs import _input_multidim_multiclass_prob as _input_mdmc_prob
+from tests.classification.inputs import _input_multilabel
+from tests.helpers import seed_all
+from tests.helpers.testers import NUM_CLASSES, MetricTester
+from paddlemetrics.classification.average_precision import AveragePrecision
+from paddlemetrics.functional import average_precision
+
+seed_all(42)
+
+
+def _sk_average_precision_score(y_true, probas_pred, num_classes=1, average=None):
+    if num_classes == 1:
+        return sk_average_precision_score(y_true, probas_pred)
+
+    res = []
+    for i in range(num_classes):
+        y_true_temp = np.zeros_like(y_true)
+        y_true_temp[y_true == i] = 1
+        res.append(sk_average_precision_score(y_true_temp, probas_pred[:, i]))
+
+    if average == "macro":
+        return np.array(res).mean()
+    if average == "weighted":
+        weights = np.bincount(y_true) if y_true.max() > 1 else y_true.sum(axis=0)
+        weights = weights / sum(weights)
+        return (np.array(res) * weights).sum()
+
+    return res
+
+
+def _sk_avg_prec_binary_prob(preds, target, num_classes=1, average=None):
+    sk_preds = preds.view(-1).numpy()
+    sk_target = target.view(-1).numpy()
+
+    return _sk_average_precision_score(y_true=sk_target, probas_pred=sk_preds, num_classes=num_classes, average=average)
+
+
+def _sk_avg_prec_multiclass_prob(preds, target, num_classes=1, average=None):
+    sk_preds = preds.reshape(-1, num_classes).numpy()
+    sk_target = target.view(-1).numpy()
+
+    return _sk_average_precision_score(y_true=sk_target, probas_pred=sk_preds, num_classes=num_classes, average=average)
+
+
+def _sk_avg_prec_multilabel_prob(preds, target, num_classes=1, average=None):
+    sk_preds = preds.reshape(-1, num_classes).numpy()
+    sk_target = target.view(-1, num_classes).numpy()
+    return sk_average_precision_score(sk_target, sk_preds, average=average)
+
+
+def _sk_avg_prec_multidim_multiclass_prob(preds, target, num_classes=1, average=None):
+    sk_preds = preds.transpose(0, 1).reshape(num_classes, -1).transpose(0, 1).numpy()
+    sk_target = target.view(-1).numpy()
+    return _sk_average_precision_score(y_true=sk_target, probas_pred=sk_preds, num_classes=num_classes, average=average)
+
+
+@pytest.mark.parametrize(
+    "preds, target, sk_metric, num_classes",
+    [
+        (_input_binary_prob.preds, _input_binary_prob.target, _sk_avg_prec_binary_prob, 1),
+        (_input_mcls_prob.preds, _input_mcls_prob.target, _sk_avg_prec_multiclass_prob, NUM_CLASSES),
+        (_input_mdmc_prob.preds, _input_mdmc_prob.target, _sk_avg_prec_multidim_multiclass_prob, NUM_CLASSES),
+        (_input_multilabel.preds, _input_multilabel.target, _sk_avg_prec_multilabel_prob, NUM_CLASSES),
+    ],
+)
+@pytest.mark.parametrize("average", ["micro", "macro", "weighted", None])
+class TestAveragePrecision(MetricTester):
+    @pytest.mark.parametrize("ddp", [True, False])
+    @pytest.mark.parametrize("dist_sync_on_step", [True, False])
+    def test_average_precision(self, preds, target, sk_metric, num_classes, average, ddp, dist_sync_on_step):
+        if target.max() > 1 and average == "micro":
+            pytest.skip("average=micro and multiclass input cannot be used together")
+
+        self.run_class_metric_test(
+            ddp=ddp,
+            preds=preds,
+            target=target,
+            metric_class=AveragePrecision,
+            sk_metric=partial(sk_metric, num_classes=num_classes, average=average),
+            dist_sync_on_step=dist_sync_on_step,
+            metric_args={"num_classes": num_classes, "average": average},
+        )
+
+    def test_average_precision_functional(self, preds, target, sk_metric, num_classes, average):
+        if target.max() > 1 and average == "micro":
+            pytest.skip("average=micro and multiclass input cannot be used together")
+
+        self.run_functional_metric_test(
+            preds=preds,
+            target=target,
+            metric_functional=average_precision,
+            sk_metric=partial(sk_metric, num_classes=num_classes, average=average),
+            metric_args={"num_classes": num_classes, "average": average},
+        )
+
+    def test_average_precision_differentiability(self, preds, sk_metric, target, num_classes, average):
+        self.run_differentiability_test(
+            preds=preds,
+            target=target,
+            metric_module=AveragePrecision,
+            metric_functional=average_precision,
+            metric_args={"num_classes": num_classes},
+        )
+
+
+@pytest.mark.parametrize(
+    ["scores", "target", "expected_score"],
+    [
+        # Check the average_precision_score of a constant predictor is
+        # the TPR
+        # Generate a dataset with 25% of positives
+        # And a constant score
+        # The precision is then the fraction of positive whatever the recall
+        # is, as there is only one threshold:
+        pytest.param(tensor([1, 1, 1, 1]), tensor([0, 0, 0, 1]), 0.25),
+        # With threshold 0.8 : 1 TP and 2 TN and one FN
+        pytest.param(tensor([0.6, 0.7, 0.8, 9]), tensor([1, 0, 0, 1]), 0.75),
+    ],
+)
+def test_average_precision(scores, target, expected_score):
+    assert average_precision(scores, target) == expected_score
+
+
+def test_average_precision_warnings_and_errors():
+    """Test that the correct errors and warnings gets raised."""
+
+    # check average argument
+    with pytest.raises(ValueError, match="Expected argument `average` to be one .*"):
+        AveragePrecision(num_classes=5, average="samples")
+
+    # check that micro average cannot be used with multilabel input
+    pred = tensor(
+        [
+            [0.75, 0.05, 0.05, 0.05, 0.05],
+            [0.05, 0.75, 0.05, 0.05, 0.05],
+            [0.05, 0.05, 0.75, 0.05, 0.05],
+            [0.05, 0.05, 0.05, 0.75, 0.05],
+        ]
+    )
+    target = tensor([0, 1, 3, 2])
+    average_precision = AveragePrecision(num_classes=5, average="micro")
+    with pytest.raises(ValueError, match="Cannot use `micro` average with multi-class input"):
+        average_precision(pred, target)
+
+    # check that warning is thrown when average=macro and nan is encoutered in individual scores
+    average_precision = AveragePrecision(num_classes=5, average="macro")
+    with pytest.warns(UserWarning, match="Average precision score for one or more classes was `nan`.*"):
+        average_precision(pred, target)
diff --git a/RE/paddlemetric/src/tests/classification/test_binned_precision_recall.py b/RE/paddlemetric/src/tests/classification/test_binned_precision_recall.py
new file mode 100644
index 00000000..a1ea3376
--- /dev/null
+++ b/RE/paddlemetric/src/tests/classification/test_binned_precision_recall.py
@@ -0,0 +1,129 @@
+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from functools import partial
+from typing import Tuple
+
+import numpy as np
+import pytest
+import paddleext.torchapi as B
+from sklearn.metrics import average_precision_score as _sk_average_precision_score
+from sklearn.metrics import precision_recall_curve as _sk_precision_recall_curve
+from paddleext.torchapi import Tensor
+
+from tests.classification.inputs import _input_binary_prob
+from tests.classification.inputs import _input_binary_prob_plausible as _input_binary_prob_ok
+from tests.classification.inputs import _input_multilabel_prob as _input_mlb_prob
+from tests.classification.inputs import _input_multilabel_prob_plausible as _input_mlb_prob_ok
+from tests.helpers import seed_all
+from tests.helpers.testers import NUM_CLASSES, MetricTester
+from paddlemetrics.classification.binned_precision_recall import BinnedAveragePrecision, BinnedRecallAtFixedPrecision
+
+seed_all(42)
+
+
+def recall_at_precision_x_multilabel(predictions: Tensor, targets: Tensor, min_precision: float) -> Tuple[float, float]:
+    precision, recall, thresholds = _sk_precision_recall_curve(targets, predictions)
+
+    try:
+        tuple_all = [(r, p, t) for p, r, t in zip(precision, recall, thresholds) if p >= min_precision]
+        max_recall, _, best_threshold = max(tuple_all)
+    except ValueError:
+        max_recall, best_threshold = 0, 1e6
+
+    return float(max_recall), float(best_threshold)
+
+
+def _sk_prec_recall_mclass_prob(predictions, targets, num_classes, min_precision):
+    max_recalls = B.zeros(num_classes)
+    best_thresholds = B.zeros(num_classes)
+
+    for i in range(num_classes):
+        max_recalls[i], best_thresholds[i] = recall_at_precision_x_multilabel(
+            predictions[:, i], targets[:, i], min_precision
+        )
+    return max_recalls, best_thresholds
+
+
+def _sk_prec_recall_binary_prob(predictions, targets, num_classes, min_precision):
+    return recall_at_precision_x_multilabel(predictions, targets, min_precision)
+
+
+def _sk_avg_prec_multiclass(predictions, targets, num_classes):
+    # replace nan with 0
+    return np.nan_to_num(_sk_average_precision_score(targets, predictions, average=None))
+
+
+@pytest.mark.parametrize(
+    "preds, target, sk_metric, num_classes",
+    [
+        (_input_binary_prob.preds, _input_binary_prob.target, _sk_prec_recall_binary_prob, 1),
+        (_input_binary_prob_ok.preds, _input_binary_prob_ok.target, _sk_prec_recall_binary_prob, 1),
+        (_input_mlb_prob_ok.preds, _input_mlb_prob_ok.target, _sk_prec_recall_mclass_prob, NUM_CLASSES),
+        (_input_mlb_prob.preds, _input_mlb_prob.target, _sk_prec_recall_mclass_prob, NUM_CLASSES),
+    ],
+)
+class TestBinnedRecallAtPrecision(MetricTester):
+    atol = 0.02
+
+    @pytest.mark.parametrize("ddp", [True, False])
+    @pytest.mark.parametrize("dist_sync_on_step", [True, False])
+    @pytest.mark.parametrize("min_precision", [0.05, 0.1, 0.3, 0.5, 0.8, 0.95])
+    def test_binned_recall_at_precision(
+        self, preds, target, sk_metric, num_classes, ddp, dist_sync_on_step, min_precision
+    ):
+        # rounding will simulate binning for both implementations
+        preds = Tensor(np.round(preds.numpy(), 2)) + 1e-6
+
+        self.run_class_metric_test(
+            ddp=ddp,
+            preds=preds,
+            target=target,
+            metric_class=BinnedRecallAtFixedPrecision,
+            sk_metric=partial(sk_metric, num_classes=num_classes, min_precision=min_precision),
+            dist_sync_on_step=dist_sync_on_step,
+            metric_args={
+                "num_classes": num_classes,
+                "min_precision": min_precision,
+                "thresholds": 101,
+            },
+        )
+
+
+@pytest.mark.parametrize(
+    "preds, target, sk_metric, num_classes",
+    [
+        (_input_binary_prob.preds, _input_binary_prob.target, _sk_avg_prec_multiclass, 1),
+        (_input_binary_prob_ok.preds, _input_binary_prob_ok.target, _sk_avg_prec_multiclass, 1),
+        (_input_mlb_prob_ok.preds, _input_mlb_prob_ok.target, _sk_avg_prec_multiclass, NUM_CLASSES),
+        (_input_mlb_prob.preds, _input_mlb_prob.target, _sk_avg_prec_multiclass, NUM_CLASSES),
+    ],
+)
+class TestBinnedAveragePrecision(MetricTester):
+    @pytest.mark.parametrize("ddp", [True, False])
+    @pytest.mark.parametrize("dist_sync_on_step", [True, False])
+    @pytest.mark.parametrize("thresholds", (301, B.linspace(0.0, 1.0, 101)))
+    def test_binned_average_precision(self, preds, target, sk_metric, num_classes, ddp, dist_sync_on_step, thresholds):
+        # rounding will simulate binning for both implementations
+        preds = Tensor(np.round(preds.numpy(), 2)) + 1e-6
+
+        self.run_class_metric_test(
+            ddp=ddp,
+            preds=preds,
+            target=target,
+            metric_class=BinnedAveragePrecision,
+            sk_metric=partial(sk_metric, num_classes=num_classes),
+            dist_sync_on_step=dist_sync_on_step,
+            metric_args={"num_classes": num_classes, "thresholds": thresholds},
+        )
diff --git a/RE/paddlemetric/src/tests/classification/test_calibration_error.py b/RE/paddlemetric/src/tests/classification/test_calibration_error.py
new file mode 100644
index 00000000..f0a470fc
--- /dev/null
+++ b/RE/paddlemetric/src/tests/classification/test_calibration_error.py
@@ -0,0 +1,114 @@
+import functools
+import re
+
+import numpy as np
+import pytest
+
+from tests.classification.inputs import _input_binary_prob
+from tests.classification.inputs import _input_multiclass_prob as _input_mcls_prob
+from tests.classification.inputs import _input_multidim_multiclass_prob as _input_mdmc_prob
+from tests.classification.inputs import _input_multilabel_prob as _input_mlb_prob
+from tests.helpers import seed_all
+
+# TODO: replace this with official sklearn implementation after next sklearn release
+from tests.helpers.non_sklearn_metrics import calibration_error as sk_calib
+from tests.helpers.testers import THRESHOLD, MetricTester
+from paddlemetrics import CalibrationError
+from paddlemetrics.functional import calibration_error
+from paddlemetrics.utilities.checks import _input_format_classification
+from paddlemetrics.utilities.enums import DataType
+
+seed_all(42)
+
+
+def _sk_calibration(preds, target, n_bins, norm, debias=False):
+    _, _, mode = _input_format_classification(preds, target, threshold=THRESHOLD)
+    sk_preds, sk_target = preds.numpy(), target.numpy()
+
+    if mode == DataType.MULTICLASS:
+        # binary label is whether or not the predicted class is correct
+        sk_target = np.equal(np.argmax(sk_preds, axis=1), sk_target)
+        sk_preds = np.max(sk_preds, axis=1)
+    elif mode == DataType.MULTIDIM_MULTICLASS:
+        # reshape from shape (N, C, ...) to (N*EXTRA_DIMS, C)
+        sk_preds = np.transpose(sk_preds, axes=(0, 2, 1))
+        sk_preds = sk_preds.reshape(np.prod(sk_preds.shape[:-1]), sk_preds.shape[-1])
+        # reshape from shape (N, ...) to (N*EXTRA_DIMS,)
+        # binary label is whether or not the predicted class is correct
+        sk_target = np.equal(np.argmax(sk_preds, axis=1), sk_target.flatten())
+        sk_preds = np.max(sk_preds, axis=1)
+    return sk_calib(y_true=sk_target, y_prob=sk_preds, norm=norm, n_bins=n_bins, reduce_bias=debias)
+
+
+@pytest.mark.parametrize("n_bins", [10, 15, 20])
+@pytest.mark.parametrize("norm", ["l1", "l2", "max"])
+@pytest.mark.parametrize(
+    "preds, target",
+    [
+        (_input_binary_prob.preds, _input_binary_prob.target),
+        (_input_mcls_prob.preds, _input_mcls_prob.target),
+        (_input_mdmc_prob.preds, _input_mdmc_prob.target),
+    ],
+)
+class TestCE(MetricTester):
+    @pytest.mark.parametrize("ddp", [True, False])
+    @pytest.mark.parametrize("dist_sync_on_step", [True, False])
+    def test_ce(self, preds, target, n_bins, ddp, dist_sync_on_step, norm):
+        self.run_class_metric_test(
+            ddp=ddp,
+            preds=preds,
+            target=target,
+            metric_class=CalibrationError,
+            sk_metric=functools.partial(_sk_calibration, n_bins=n_bins, norm=norm),
+            dist_sync_on_step=dist_sync_on_step,
+            metric_args={"n_bins": n_bins, "norm": norm},
+        )
+
+    def test_ce_functional(self, preds, target, n_bins, norm):
+        self.run_functional_metric_test(
+            preds,
+            target,
+            metric_functional=calibration_error,
+            sk_metric=functools.partial(_sk_calibration, n_bins=n_bins, norm=norm),
+            metric_args={"n_bins": n_bins, "norm": norm},
+        )
+
+
+@pytest.mark.parametrize("preds, targets", [(_input_mlb_prob.preds, _input_mlb_prob.target)])
+def test_invalid_input(preds, targets):
+    for p, t in zip(preds, targets):
+        with pytest.raises(
+            ValueError,
+            match=re.escape(
+                f"Calibration error is not well-defined for data with size {p.size()} and targets {t.size()}."
+            ),
+        ):
+            calibration_error(p, t)
+
+
+@pytest.mark.parametrize(
+    "preds, target",
+    [
+        (_input_binary_prob.preds, _input_binary_prob.target),
+        (_input_mcls_prob.preds, _input_mcls_prob.target),
+        (_input_mdmc_prob.preds, _input_mdmc_prob.target),
+    ],
+)
+def test_invalid_norm(preds, target):
+    with pytest.raises(ValueError, match="Norm l3 is not supported. Please select from l1, l2, or max. "):
+        calibration_error(preds, target, norm="l3")
+
+
+@pytest.mark.parametrize("n_bins", [-10, -1, "fsd"])
+@pytest.mark.parametrize(
+    "preds, targets",
+    [
+        (_input_binary_prob.preds, _input_binary_prob.target),
+        (_input_mcls_prob.preds, _input_mcls_prob.target),
+        (_input_mdmc_prob.preds, _input_mdmc_prob.target),
+    ],
+)
+def test_invalid_bins(preds, targets, n_bins):
+    for p, t in zip(preds, targets):
+        with pytest.raises(ValueError, match=f"Expected argument `n_bins` to be a int larger than 0 but got {n_bins}"):
+            calibration_error(p, t, n_bins=n_bins)
diff --git a/RE/paddlemetric/src/tests/classification/test_cohen_kappa.py b/RE/paddlemetric/src/tests/classification/test_cohen_kappa.py
new file mode 100644
index 00000000..d79cc8d8
--- /dev/null
+++ b/RE/paddlemetric/src/tests/classification/test_cohen_kappa.py
@@ -0,0 +1,133 @@
+from functools import partial
+
+import numpy as np
+import pytest
+import paddleext.torchapi as B
+from sklearn.metrics import cohen_kappa_score as sk_cohen_kappa
+
+from tests.classification.inputs import _input_binary, _input_binary_prob
+from tests.classification.inputs import _input_multiclass as _input_mcls
+from tests.classification.inputs import _input_multiclass_prob as _input_mcls_prob
+from tests.classification.inputs import _input_multidim_multiclass as _input_mdmc
+from tests.classification.inputs import _input_multidim_multiclass_prob as _input_mdmc_prob
+from tests.classification.inputs import _input_multilabel as _input_mlb
+from tests.classification.inputs import _input_multilabel_prob as _input_mlb_prob
+from tests.helpers import seed_all
+from tests.helpers.testers import NUM_CLASSES, THRESHOLD, MetricTester
+from paddlemetrics.classification.cohen_kappa import CohenKappa
+from paddlemetrics.functional.classification.cohen_kappa import cohen_kappa
+
+seed_all(42)
+
+
+def _sk_cohen_kappa_binary_prob(preds, target, weights=None):
+    sk_preds = (preds.view(-1).numpy() >= THRESHOLD).astype(np.uint8)
+    sk_target = target.view(-1).numpy()
+
+    return sk_cohen_kappa(y1=sk_target, y2=sk_preds, weights=weights)
+
+
+def _sk_cohen_kappa_binary(preds, target, weights=None):
+    sk_preds = preds.view(-1).numpy()
+    sk_target = target.view(-1).numpy()
+
+    return sk_cohen_kappa(y1=sk_target, y2=sk_preds, weights=weights)
+
+
+def _sk_cohen_kappa_multilabel_prob(preds, target, weights=None):
+    sk_preds = (preds.view(-1).numpy() >= THRESHOLD).astype(np.uint8)
+    sk_target = target.view(-1).numpy()
+
+    return sk_cohen_kappa(y1=sk_target, y2=sk_preds, weights=weights)
+
+
+def _sk_cohen_kappa_multilabel(preds, target, weights=None):
+    sk_preds = preds.view(-1).numpy()
+    sk_target = target.view(-1).numpy()
+
+    return sk_cohen_kappa(y1=sk_target, y2=sk_preds, weights=weights)
+
+
+def _sk_cohen_kappa_multiclass_prob(preds, target, weights=None):
+    sk_preds = B.argmax(preds, dim=len(preds.shape) - 1).view(-1).numpy()
+    sk_target = target.view(-1).numpy()
+
+    return sk_cohen_kappa(y1=sk_target, y2=sk_preds, weights=weights)
+
+
+def _sk_cohen_kappa_multiclass(preds, target, weights=None):
+    sk_preds = preds.view(-1).numpy()
+    sk_target = target.view(-1).numpy()
+
+    return sk_cohen_kappa(y1=sk_target, y2=sk_preds, weights=weights)
+
+
+def _sk_cohen_kappa_multidim_multiclass_prob(preds, target, weights=None):
+    sk_preds = B.argmax(preds, dim=len(preds.shape) - 2).view(-1).numpy()
+    sk_target = target.view(-1).numpy()
+
+    return sk_cohen_kappa(y1=sk_target, y2=sk_preds, weights=weights)
+
+
+def _sk_cohen_kappa_multidim_multiclass(preds, target, weights=None):
+    sk_preds = preds.view(-1).numpy()
+    sk_target = target.view(-1).numpy()
+
+    return sk_cohen_kappa(y1=sk_target, y2=sk_preds, weights=weights)
+
+
+@pytest.mark.parametrize("weights", ["linear", "quadratic", None])
+@pytest.mark.parametrize(
+    "preds, target, sk_metric, num_classes",
+    [
+        (_input_binary_prob.preds, _input_binary_prob.target, _sk_cohen_kappa_binary_prob, 2),
+        (_input_binary.preds, _input_binary.target, _sk_cohen_kappa_binary, 2),
+        (_input_mlb_prob.preds, _input_mlb_prob.target, _sk_cohen_kappa_multilabel_prob, 2),
+        (_input_mlb.preds, _input_mlb.target, _sk_cohen_kappa_multilabel, 2),
+        (_input_mcls_prob.preds, _input_mcls_prob.target, _sk_cohen_kappa_multiclass_prob, NUM_CLASSES),
+        (_input_mcls.preds, _input_mcls.target, _sk_cohen_kappa_multiclass, NUM_CLASSES),
+        (_input_mdmc_prob.preds, _input_mdmc_prob.target, _sk_cohen_kappa_multidim_multiclass_prob, NUM_CLASSES),
+        (_input_mdmc.preds, _input_mdmc.target, _sk_cohen_kappa_multidim_multiclass, NUM_CLASSES),
+    ],
+)
+class TestCohenKappa(MetricTester):
+    atol = 1e-5
+
+    @pytest.mark.parametrize("ddp", [True, False])
+    @pytest.mark.parametrize("dist_sync_on_step", [True, False])
+    def test_cohen_kappa(self, weights, preds, target, sk_metric, num_classes, ddp, dist_sync_on_step):
+        self.run_class_metric_test(
+            ddp=ddp,
+            preds=preds,
+            target=target,
+            metric_class=CohenKappa,
+            sk_metric=partial(sk_metric, weights=weights),
+            dist_sync_on_step=dist_sync_on_step,
+            metric_args={"num_classes": num_classes, "threshold": THRESHOLD, "weights": weights},
+        )
+
+    def test_cohen_kappa_functional(self, weights, preds, target, sk_metric, num_classes):
+        self.run_functional_metric_test(
+            preds,
+            target,
+            metric_functional=cohen_kappa,
+            sk_metric=partial(sk_metric, weights=weights),
+            metric_args={"num_classes": num_classes, "threshold": THRESHOLD, "weights": weights},
+        )
+
+    def test_cohen_kappa_differentiability(self, preds, target, sk_metric, weights, num_classes):
+        self.run_differentiability_test(
+            preds=preds,
+            target=target,
+            metric_module=CohenKappa,
+            metric_functional=cohen_kappa,
+            metric_args={"num_classes": num_classes, "threshold": THRESHOLD, "weights": weights},
+        )
+
+
+def test_warning_on_wrong_weights(tmpdir):
+    preds = B.randint(3, size=(20,))
+    target = B.randint(3, size=(20,))
+
+    with pytest.raises(ValueError, match=".* ``weights`` but should be either None, 'linear' or 'quadratic'"):
+        cohen_kappa(preds, target, num_classes=3, weights="unknown_arg")
diff --git a/RE/paddlemetric/src/tests/classification/test_confusion_matrix.py b/RE/paddlemetric/src/tests/classification/test_confusion_matrix.py
new file mode 100644
index 00000000..9ae6fa81
--- /dev/null
+++ b/RE/paddlemetric/src/tests/classification/test_confusion_matrix.py
@@ -0,0 +1,188 @@
+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from functools import partial
+
+import numpy as np
+import pytest
+import paddleext.torchapi as B
+from sklearn.metrics import confusion_matrix as sk_confusion_matrix
+from sklearn.metrics import multilabel_confusion_matrix as sk_multilabel_confusion_matrix
+
+from tests.classification.inputs import _input_binary, _input_binary_logits, _input_binary_prob
+from tests.classification.inputs import _input_multiclass as _input_mcls
+from tests.classification.inputs import _input_multiclass_logits as _input_mcls_logits
+from tests.classification.inputs import _input_multiclass_prob as _input_mcls_prob
+from tests.classification.inputs import _input_multidim_multiclass as _input_mdmc
+from tests.classification.inputs import _input_multidim_multiclass_prob as _input_mdmc_prob
+from tests.classification.inputs import _input_multilabel as _input_mlb
+from tests.classification.inputs import _input_multilabel_logits as _input_mlb_logits
+from tests.classification.inputs import _input_multilabel_prob as _input_mlb_prob
+from tests.helpers import seed_all
+from tests.helpers.testers import NUM_CLASSES, THRESHOLD, MetricTester
+from paddlemetrics.classification.confusion_matrix import ConfusionMatrix
+from paddlemetrics.functional import confusion_matrix
+
+seed_all(42)
+
+
+def _sk_cm_binary_prob(preds, target, normalize=None):
+    sk_preds = (preds.view(-1).numpy() >= THRESHOLD).astype(np.uint8)
+    sk_target = target.view(-1).numpy()
+
+    return sk_confusion_matrix(y_true=sk_target, y_pred=sk_preds, normalize=normalize)
+
+
+def _sk_cm_binary(preds, target, normalize=None):
+    sk_preds = preds.view(-1).numpy()
+    sk_target = target.view(-1).numpy()
+
+    return sk_confusion_matrix(y_true=sk_target, y_pred=sk_preds, normalize=normalize)
+
+
+def _sk_cm_multilabel_prob(preds, target, normalize=None):
+    sk_preds = (preds.numpy() >= THRESHOLD).astype(np.uint8)
+    sk_target = target.numpy()
+
+    cm = sk_multilabel_confusion_matrix(y_true=sk_target, y_pred=sk_preds)
+    if normalize is not None:
+        if normalize == "true":
+            cm = cm / cm.sum(axis=1, keepdims=True)
+        elif normalize == "pred":
+            cm = cm / cm.sum(axis=0, keepdims=True)
+        elif normalize == "all":
+            cm = cm / cm.sum()
+        cm[np.isnan(cm)] = 0
+    return cm
+
+
+def _sk_cm_multilabel(preds, target, normalize=None):
+    sk_preds = preds.numpy()
+    sk_target = target.numpy()
+
+    cm = sk_multilabel_confusion_matrix(y_true=sk_target, y_pred=sk_preds)
+    if normalize is not None:
+        if normalize == "true":
+            cm = cm / cm.sum(axis=1, keepdims=True)
+        elif normalize == "pred":
+            cm = cm / cm.sum(axis=0, keepdims=True)
+        elif normalize == "all":
+            cm = cm / cm.sum()
+        cm[np.isnan(cm)] = 0
+    return cm
+
+
+def _sk_cm_multiclass_prob(preds, target, normalize=None):
+    sk_preds = B.argmax(preds, dim=len(preds.shape) - 1).view(-1).numpy()
+    sk_target = target.view(-1).numpy()
+
+    return sk_confusion_matrix(y_true=sk_target, y_pred=sk_preds, normalize=normalize)
+
+
+def _sk_cm_multiclass(preds, target, normalize=None):
+    sk_preds = preds.view(-1).numpy()
+    sk_target = target.view(-1).numpy()
+
+    return sk_confusion_matrix(y_true=sk_target, y_pred=sk_preds, normalize=normalize)
+
+
+def _sk_cm_multidim_multiclass_prob(preds, target, normalize=None):
+    sk_preds = B.argmax(preds, dim=len(preds.shape) - 2).view(-1).numpy()
+    sk_target = target.view(-1).numpy()
+
+    return sk_confusion_matrix(y_true=sk_target, y_pred=sk_preds, normalize=normalize)
+
+
+def _sk_cm_multidim_multiclass(preds, target, normalize=None):
+    sk_preds = preds.view(-1).numpy()
+    sk_target = target.view(-1).numpy()
+
+    return sk_confusion_matrix(y_true=sk_target, y_pred=sk_preds, normalize=normalize)
+
+
+@pytest.mark.parametrize("normalize", ["true", "pred", "all", None])
+@pytest.mark.parametrize(
+    "preds, target, sk_metric, num_classes, multilabel",
+    [
+        (_input_binary_prob.preds, _input_binary_prob.target, _sk_cm_binary_prob, 2, False),
+        (_input_binary_logits.preds, _input_binary_logits.target, _sk_cm_binary_prob, 2, False),
+        (_input_binary.preds, _input_binary.target, _sk_cm_binary, 2, False),
+        (_input_mlb_prob.preds, _input_mlb_prob.target, _sk_cm_multilabel_prob, NUM_CLASSES, True),
+        (_input_mlb_logits.preds, _input_mlb_logits.target, _sk_cm_multilabel_prob, NUM_CLASSES, True),
+        (_input_mlb.preds, _input_mlb.target, _sk_cm_multilabel, NUM_CLASSES, True),
+        (_input_mcls_prob.preds, _input_mcls_prob.target, _sk_cm_multiclass_prob, NUM_CLASSES, False),
+        (_input_mcls_logits.preds, _input_mcls_logits.target, _sk_cm_multiclass_prob, NUM_CLASSES, False),
+        (_input_mcls.preds, _input_mcls.target, _sk_cm_multiclass, NUM_CLASSES, False),
+        (_input_mdmc_prob.preds, _input_mdmc_prob.target, _sk_cm_multidim_multiclass_prob, NUM_CLASSES, False),
+        (_input_mdmc.preds, _input_mdmc.target, _sk_cm_multidim_multiclass, NUM_CLASSES, False),
+    ],
+)
+class TestConfusionMatrix(MetricTester):
+    @pytest.mark.parametrize("ddp", [True, False])
+    @pytest.mark.parametrize("dist_sync_on_step", [True, False])
+    def test_confusion_matrix(
+        self, normalize, preds, target, sk_metric, num_classes, multilabel, ddp, dist_sync_on_step
+    ):
+        self.run_class_metric_test(
+            ddp=ddp,
+            preds=preds,
+            target=target,
+            metric_class=ConfusionMatrix,
+            sk_metric=partial(sk_metric, normalize=normalize),
+            dist_sync_on_step=dist_sync_on_step,
+            metric_args={
+                "num_classes": num_classes,
+                "threshold": THRESHOLD,
+                "normalize": normalize,
+                "multilabel": multilabel,
+            },
+        )
+
+    def test_confusion_matrix_functional(self, normalize, preds, target, sk_metric, num_classes, multilabel):
+        self.run_functional_metric_test(
+            preds=preds,
+            target=target,
+            metric_functional=confusion_matrix,
+            sk_metric=partial(sk_metric, normalize=normalize),
+            metric_args={
+                "num_classes": num_classes,
+                "threshold": THRESHOLD,
+                "normalize": normalize,
+                "multilabel": multilabel,
+            },
+        )
+
+    def test_confusion_matrix_differentiability(self, normalize, preds, target, sk_metric, num_classes, multilabel):
+        self.run_differentiability_test(
+            preds=preds,
+            target=target,
+            metric_module=ConfusionMatrix,
+            metric_functional=confusion_matrix,
+            metric_args={
+                "num_classes": num_classes,
+                "threshold": THRESHOLD,
+                "normalize": normalize,
+                "multilabel": multilabel,
+            },
+        )
+
+
+def test_warning_on_nan(tmpdir):
+    preds = B.randint(3, size=(20,))
+    target = B.randint(3, size=(20,))
+
+    with pytest.warns(
+        UserWarning,
+        match=".* nan values found in confusion matrix have been replaced with zeros.",
+    ):
+        confusion_matrix(preds, target, num_classes=5, normalize="true")
diff --git a/RE/paddlemetric/src/tests/classification/test_f_beta.py b/RE/paddlemetric/src/tests/classification/test_f_beta.py
new file mode 100644
index 00000000..741c0d46
--- /dev/null
+++ b/RE/paddlemetric/src/tests/classification/test_f_beta.py
@@ -0,0 +1,451 @@
+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from functools import partial
+from typing import Callable, Optional
+
+import numpy as np
+import pytest
+import paddleext.torchapi as B
+from sklearn.metrics import f1_score, fbeta_score
+from paddleext.torchapi import Tensor
+
+from tests.classification.inputs import _input_binary, _input_binary_logits, _input_binary_prob
+from tests.classification.inputs import _input_multiclass as _input_mcls
+from tests.classification.inputs import _input_multiclass_logits as _input_mcls_logits
+from tests.classification.inputs import _input_multiclass_prob as _input_mcls_prob
+from tests.classification.inputs import _input_multiclass_with_missing_class as _input_miss_class
+from tests.classification.inputs import _input_multidim_multiclass as _input_mdmc
+from tests.classification.inputs import _input_multidim_multiclass_prob as _input_mdmc_prob
+from tests.classification.inputs import _input_multilabel as _input_mlb
+from tests.classification.inputs import _input_multilabel_logits as _input_mlb_logits
+from tests.classification.inputs import _input_multilabel_prob as _input_mlb_prob
+from tests.helpers import seed_all
+from tests.helpers.testers import NUM_BATCHES, NUM_CLASSES, THRESHOLD, MetricTester
+from paddlemetrics import F1, FBeta, Metric
+from paddlemetrics.functional import f1, fbeta
+from paddlemetrics.utilities.checks import _input_format_classification
+from paddlemetrics.utilities.enums import AverageMethod
+
+seed_all(42)
+
+
+def _sk_fbeta_f1(preds, target, sk_fn, num_classes, average, multiclass, ignore_index, mdmc_average=None):
+    if average == "none":
+        average = None
+    if num_classes == 1:
+        average = "binary"
+
+    labels = list(range(num_classes))
+    try:
+        labels.remove(ignore_index)
+    except ValueError:
+        pass
+
+    sk_preds, sk_target, _ = _input_format_classification(
+        preds, target, THRESHOLD, num_classes=num_classes, multiclass=multiclass
+    )
+    sk_preds, sk_target = sk_preds.numpy(), sk_target.numpy()
+    sk_scores = sk_fn(sk_target, sk_preds, average=average, zero_division=0, labels=labels)
+
+    if len(labels) != num_classes and not average:
+        sk_scores = np.insert(sk_scores, ignore_index, np.nan)
+
+    return sk_scores
+
+
+def _sk_fbeta_f1_multidim_multiclass(
+    preds, target, sk_fn, num_classes, average, multiclass, ignore_index, mdmc_average
+):
+    preds, target, _ = _input_format_classification(
+        preds, target, threshold=THRESHOLD, num_classes=num_classes, multiclass=multiclass
+    )
+
+    if mdmc_average == "global":
+        preds = B.transpose(preds, 1, 2).reshape(-1, preds.shape[1])
+        target = B.transpose(target, 1, 2).reshape(-1, target.shape[1])
+
+        return _sk_fbeta_f1(preds, target, sk_fn, num_classes, average, False, ignore_index)
+    if mdmc_average == "samplewise":
+        scores = []
+
+        for i in range(preds.shape[0]):
+            pred_i = preds[i, ...].T
+            target_i = target[i, ...].T
+            scores_i = _sk_fbeta_f1(pred_i, target_i, sk_fn, num_classes, average, False, ignore_index)
+
+            scores.append(np.expand_dims(scores_i, 0))
+
+        return np.concatenate(scores).mean(axis=0)
+
+
+@pytest.mark.parametrize(
+    "metric_class, metric_fn",
+    [
+        (partial(FBeta, beta=2.0), partial(fbeta, beta=2.0)),
+        (F1, f1),
+    ],
+)
+@pytest.mark.parametrize(
+    "average, mdmc_average, num_classes, ignore_index, match_str",
+    [
+        ("wrong", None, None, None, "`average`"),
+        ("micro", "wrong", None, None, "`mdmc"),
+        ("macro", None, None, None, "number of classes"),
+        ("macro", None, 1, 0, "ignore_index"),
+    ],
+)
+def test_wrong_params(metric_class, metric_fn, average, mdmc_average, num_classes, ignore_index, match_str):
+    with pytest.raises(ValueError, match=match_str):
+        metric_class(
+            average=average,
+            mdmc_average=mdmc_average,
+            num_classes=num_classes,
+            ignore_index=ignore_index,
+        )
+
+    with pytest.raises(ValueError, match=match_str):
+        metric_fn(
+            _input_binary.preds[0],
+            _input_binary.target[0],
+            average=average,
+            mdmc_average=mdmc_average,
+            num_classes=num_classes,
+            ignore_index=ignore_index,
+        )
+
+
+@pytest.mark.parametrize(
+    "metric_class, metric_fn",
+    [
+        (partial(FBeta, beta=2.0), partial(fbeta, beta=2.0)),
+        (F1, f1),
+    ],
+)
+def test_zero_division(metric_class, metric_fn):
+    """Test that zero_division works correctly (currently should just set to 0)."""
+
+    preds = B.tensor([1, 2, 1, 1])
+    target = B.tensor([2, 0, 2, 1])
+
+    cl_metric = metric_class(average="none", num_classes=3)
+    cl_metric(preds, target)
+
+    result_cl = cl_metric.compute()
+    result_fn = metric_fn(preds, target, average="none", num_classes=3)
+
+    assert result_cl[0] == result_fn[0] == 0
+
+
+@pytest.mark.parametrize(
+    "metric_class, metric_fn",
+    [
+        (partial(FBeta, beta=2.0), partial(fbeta, beta=2.0)),
+        (F1, f1),
+    ],
+)
+def test_no_support(metric_class, metric_fn):
+    """This tests a rare edge case, where there is only one class present.
+
+    in target, and ignore_index is set to exactly that class - and the
+    average method is equal to 'weighted'.
+
+    This would mean that the sum of weights equals zero, and would, without
+    taking care of this case, return NaN. However, the reduction function
+    should catch that and set the metric to equal the value of zero_division
+    in this case (zero_division is for now not configurable and equals 0).
+    """
+
+    preds = B.tensor([1, 1, 0, 0])
+    target = B.tensor([0, 0, 0, 0])
+
+    cl_metric = metric_class(average="weighted", num_classes=2, ignore_index=0)
+    cl_metric(preds, target)
+
+    result_cl = cl_metric.compute()
+    result_fn = metric_fn(preds, target, average="weighted", num_classes=2, ignore_index=0)
+
+    assert result_cl == result_fn == 0
+
+
+@pytest.mark.parametrize("metric_class, metric_fn", [(partial(FBeta, beta=2.0), partial(fbeta, beta=2.0)), (F1, f1)])
+@pytest.mark.parametrize(
+    "ignore_index, expected", [(None, B.tensor([1.0, np.nan])), (0, B.tensor([np.nan, np.nan]))]
+)
+def test_class_not_present(metric_class, metric_fn, ignore_index, expected):
+    """This tests that when metric is computed per class and a given class is not present in both the `preds` and
+    `target`, the resulting score is `nan`."""
+    preds = B.tensor([0, 0, 0])
+    target = B.tensor([0, 0, 0])
+    num_classes = 2
+
+    # test functional
+    result_fn = metric_fn(preds, target, average=AverageMethod.NONE, num_classes=num_classes, ignore_index=ignore_index)
+    assert B.allclose(expected, result_fn, equal_nan=True)
+
+    # test class
+    cl_metric = metric_class(average=AverageMethod.NONE, num_classes=num_classes, ignore_index=ignore_index)
+    cl_metric(preds, target)
+    result_cl = cl_metric.compute()
+    assert B.allclose(expected, result_cl, equal_nan=True)
+
+
+@pytest.mark.parametrize(
+    "metric_class, metric_fn, sk_fn",
+    [(partial(FBeta, beta=2.0), partial(fbeta, beta=2.0), partial(fbeta_score, beta=2.0)), (F1, f1, f1_score)],
+)
+@pytest.mark.parametrize("average", ["micro", "macro", None, "weighted", "samples"])
+@pytest.mark.parametrize("ignore_index", [None, 0])
+@pytest.mark.parametrize(
+    "preds, target, num_classes, multiclass, mdmc_average, sk_wrapper",
+    [
+        (_input_binary_logits.preds, _input_binary_logits.target, 1, None, None, _sk_fbeta_f1),
+        (_input_binary_prob.preds, _input_binary_prob.target, 1, None, None, _sk_fbeta_f1),
+        (_input_binary.preds, _input_binary.target, 1, False, None, _sk_fbeta_f1),
+        (_input_mlb_logits.preds, _input_mlb_logits.target, NUM_CLASSES, None, None, _sk_fbeta_f1),
+        (_input_mlb_prob.preds, _input_mlb_prob.target, NUM_CLASSES, None, None, _sk_fbeta_f1),
+        (_input_mlb.preds, _input_mlb.target, NUM_CLASSES, False, None, _sk_fbeta_f1),
+        (_input_mcls_logits.preds, _input_mcls_logits.target, NUM_CLASSES, None, None, _sk_fbeta_f1),
+        (_input_mcls_prob.preds, _input_mcls_prob.target, NUM_CLASSES, None, None, _sk_fbeta_f1),
+        (_input_mcls.preds, _input_mcls.target, NUM_CLASSES, None, None, _sk_fbeta_f1),
+        (_input_mdmc.preds, _input_mdmc.target, NUM_CLASSES, None, "global", _sk_fbeta_f1_multidim_multiclass),
+        (
+            _input_mdmc_prob.preds,
+            _input_mdmc_prob.target,
+            NUM_CLASSES,
+            None,
+            "global",
+            _sk_fbeta_f1_multidim_multiclass,
+        ),
+        (_input_mdmc.preds, _input_mdmc.target, NUM_CLASSES, None, "samplewise", _sk_fbeta_f1_multidim_multiclass),
+        (
+            _input_mdmc_prob.preds,
+            _input_mdmc_prob.target,
+            NUM_CLASSES,
+            None,
+            "samplewise",
+            _sk_fbeta_f1_multidim_multiclass,
+        ),
+    ],
+)
+class TestFBeta(MetricTester):
+    @pytest.mark.parametrize("ddp", [False])
+    @pytest.mark.parametrize("dist_sync_on_step", [False])
+    def test_fbeta_f1(
+        self,
+        ddp: bool,
+        dist_sync_on_step: bool,
+        preds: Tensor,
+        target: Tensor,
+        sk_wrapper: Callable,
+        metric_class: Metric,
+        metric_fn: Callable,
+        sk_fn: Callable,
+        multiclass: Optional[bool],
+        num_classes: Optional[int],
+        average: str,
+        mdmc_average: Optional[str],
+        ignore_index: Optional[int],
+    ):
+        if num_classes == 1 and average != "micro":
+            pytest.skip("Only test binary data for 'micro' avg (equivalent of 'binary' in sklearn)")
+
+        if ignore_index is not None and preds.ndim == 2:
+            pytest.skip("Skipping ignore_index test with binary inputs.")
+
+        if average == "weighted" and ignore_index is not None and mdmc_average is not None:
+            pytest.skip("Ignore special case where we are ignoring entire sample for 'weighted' average")
+
+        self.run_class_metric_test(
+            ddp=ddp,
+            preds=preds,
+            target=target,
+            metric_class=metric_class,
+            sk_metric=partial(
+                sk_wrapper,
+                sk_fn=sk_fn,
+                average=average,
+                num_classes=num_classes,
+                multiclass=multiclass,
+                ignore_index=ignore_index,
+                mdmc_average=mdmc_average,
+            ),
+            dist_sync_on_step=dist_sync_on_step,
+            metric_args={
+                "num_classes": num_classes,
+                "average": average,
+                "threshold": THRESHOLD,
+                "multiclass": multiclass,
+                "ignore_index": ignore_index,
+                "mdmc_average": mdmc_average,
+            },
+            check_dist_sync_on_step=True,
+            check_batch=True,
+        )
+
+    def test_fbeta_f1_functional(
+        self,
+        preds: Tensor,
+        target: Tensor,
+        sk_wrapper: Callable,
+        metric_class: Metric,
+        metric_fn: Callable,
+        sk_fn: Callable,
+        multiclass: Optional[bool],
+        num_classes: Optional[int],
+        average: str,
+        mdmc_average: Optional[str],
+        ignore_index: Optional[int],
+    ):
+        if num_classes == 1 and average != "micro":
+            pytest.skip("Only test binary data for 'micro' avg (equivalent of 'binary' in sklearn)")
+
+        if ignore_index is not None and preds.ndim == 2:
+            pytest.skip("Skipping ignore_index test with binary inputs.")
+
+        if average == "weighted" and ignore_index is not None and mdmc_average is not None:
+            pytest.skip("Ignore special case where we are ignoring entire sample for 'weighted' average")
+
+        self.run_functional_metric_test(
+            preds,
+            target,
+            metric_functional=metric_fn,
+            sk_metric=partial(
+                sk_wrapper,
+                sk_fn=sk_fn,
+                average=average,
+                num_classes=num_classes,
+                multiclass=multiclass,
+                ignore_index=ignore_index,
+                mdmc_average=mdmc_average,
+            ),
+            metric_args={
+                "num_classes": num_classes,
+                "average": average,
+                "threshold": THRESHOLD,
+                "multiclass": multiclass,
+                "ignore_index": ignore_index,
+                "mdmc_average": mdmc_average,
+            },
+        )
+
+    def test_fbeta_f1_differentiability(
+        self,
+        preds: Tensor,
+        target: Tensor,
+        sk_wrapper: Callable,
+        metric_class: Metric,
+        metric_fn: Callable,
+        sk_fn: Callable,
+        multiclass: Optional[bool],
+        num_classes: Optional[int],
+        average: str,
+        mdmc_average: Optional[str],
+        ignore_index: Optional[int],
+    ):
+        if num_classes == 1 and average != "micro":
+            pytest.skip("Only test binary data for 'micro' avg (equivalent of 'binary' in sklearn)")
+
+        if ignore_index is not None and preds.ndim == 2:
+            pytest.skip("Skipping ignore_index test with binary inputs.")
+
+        if average == "weighted" and ignore_index is not None and mdmc_average is not None:
+            pytest.skip("Ignore special case where we are ignoring entire sample for 'weighted' average")
+
+        self.run_differentiability_test(
+            preds,
+            target,
+            metric_functional=metric_fn,
+            metric_module=metric_class,
+            metric_args={
+                "num_classes": num_classes,
+                "average": average,
+                "threshold": THRESHOLD,
+                "multiclass": multiclass,
+                "ignore_index": ignore_index,
+                "mdmc_average": mdmc_average,
+            },
+        )
+
+
+_mc_k_target = B.tensor([0, 1, 2])
+_mc_k_preds = B.tensor([[0.35, 0.4, 0.25], [0.1, 0.5, 0.4], [0.2, 0.1, 0.7]])
+_ml_k_target = B.tensor([[0, 1, 0], [1, 1, 0], [0, 0, 0]])
+_ml_k_preds = B.tensor([[0.9, 0.2, 0.75], [0.1, 0.7, 0.8], [0.6, 0.1, 0.7]])
+
+
+@pytest.mark.parametrize(
+    "metric_class, metric_fn",
+    [
+        (partial(FBeta, beta=2.0), partial(fbeta, beta=2.0)),
+        (F1, fbeta),
+    ],
+)
+@pytest.mark.parametrize(
+    "k, preds, target, average, expected_fbeta, expected_f1",
+    [
+        (1, _mc_k_preds, _mc_k_target, "micro", B.tensor(2 / 3), B.tensor(2 / 3)),
+        (2, _mc_k_preds, _mc_k_target, "micro", B.tensor(5 / 6), B.tensor(2 / 3)),
+        (1, _ml_k_preds, _ml_k_target, "micro", B.tensor(0.0), B.tensor(0.0)),
+        (2, _ml_k_preds, _ml_k_target, "micro", B.tensor(5 / 18), B.tensor(2 / 9)),
+    ],
+)
+def test_top_k(
+    metric_class,
+    metric_fn,
+    k: int,
+    preds: Tensor,
+    target: Tensor,
+    average: str,
+    expected_fbeta: Tensor,
+    expected_f1: Tensor,
+):
+    """A simple test to check that top_k works as expected.
+
+    Just a sanity check, the tests in StatScores should already guarantee the corectness of results.
+    """
+    class_metric = metric_class(top_k=k, average=average, num_classes=3)
+    class_metric.update(preds, target)
+
+    if class_metric.beta != 1.0:
+        result = expected_fbeta
+    else:
+        result = expected_f1
+
+    assert B.isclose(class_metric.compute(), result)
+    assert B.isclose(metric_fn(preds, target, top_k=k, average=average, num_classes=3), result)
+
+
+@pytest.mark.parametrize("ignore_index", [None, 2])
+@pytest.mark.parametrize("average", ["micro", "macro", "weighted"])
+@pytest.mark.parametrize(
+    "metric_class, metric_functional, sk_fn",
+    [(partial(FBeta, beta=2.0), partial(fbeta, beta=2.0), partial(fbeta_score, beta=2.0)), (F1, f1, f1_score)],
+)
+def test_same_input(metric_class, metric_functional, sk_fn, average, ignore_index):
+    preds = _input_miss_class.preds
+    target = _input_miss_class.target
+    preds_flat = B.cat(list(preds), dim=0)
+    target_flat = B.cat(list(target), dim=0)
+
+    mc = metric_class(num_classes=NUM_CLASSES, average=average, ignore_index=ignore_index)
+    for i in range(NUM_BATCHES):
+        mc.update(preds[i], target[i])
+    class_res = mc.compute()
+    func_res = metric_functional(
+        preds_flat, target_flat, num_classes=NUM_CLASSES, average=average, ignore_index=ignore_index
+    )
+    sk_res = sk_fn(target_flat, preds_flat, average=average, zero_division=0)
+
+    assert B.allclose(class_res, B.tensor(sk_res).float())
+    assert B.allclose(func_res, B.tensor(sk_res).float())
diff --git a/RE/paddlemetric/src/tests/classification/test_hamming_distance.py b/RE/paddlemetric/src/tests/classification/test_hamming_distance.py
new file mode 100644
index 00000000..a1ca480b
--- /dev/null
+++ b/RE/paddlemetric/src/tests/classification/test_hamming_distance.py
@@ -0,0 +1,106 @@
+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import pytest
+from sklearn.metrics import hamming_loss as sk_hamming_loss
+
+from tests.classification.inputs import _input_binary, _input_binary_logits, _input_binary_prob
+from tests.classification.inputs import _input_multiclass as _input_mcls
+from tests.classification.inputs import _input_multiclass_logits as _input_mcls_logits
+from tests.classification.inputs import _input_multiclass_prob as _input_mcls_prob
+from tests.classification.inputs import _input_multidim_multiclass as _input_mdmc
+from tests.classification.inputs import _input_multidim_multiclass_prob as _input_mdmc_prob
+from tests.classification.inputs import _input_multilabel as _input_mlb
+from tests.classification.inputs import _input_multilabel_logits as _input_mlb_logits
+from tests.classification.inputs import _input_multilabel_multidim as _input_mlmd
+from tests.classification.inputs import _input_multilabel_multidim_prob as _input_mlmd_prob
+from tests.classification.inputs import _input_multilabel_prob as _input_mlb_prob
+from tests.helpers import seed_all
+from tests.helpers.testers import THRESHOLD, MetricTester
+from paddlemetrics import HammingDistance
+from paddlemetrics.functional import hamming_distance
+from paddlemetrics.utilities.checks import _input_format_classification
+
+seed_all(42)
+
+
+def _sk_hamming_loss(preds, target):
+    sk_preds, sk_target, _ = _input_format_classification(preds, target, threshold=THRESHOLD)
+    sk_preds, sk_target = sk_preds.numpy(), sk_target.numpy()
+    sk_preds, sk_target = sk_preds.reshape(sk_preds.shape[0], -1), sk_target.reshape(sk_target.shape[0], -1)
+
+    return sk_hamming_loss(y_true=sk_target, y_pred=sk_preds)
+
+
+@pytest.mark.parametrize(
+    "preds, target",
+    [
+        (_input_binary_logits.preds, _input_binary_logits.target),
+        (_input_binary_prob.preds, _input_binary_prob.target),
+        (_input_binary.preds, _input_binary.target),
+        (_input_mlb_logits.preds, _input_mlb_logits.target),
+        (_input_mlb_prob.preds, _input_mlb_prob.target),
+        (_input_mlb.preds, _input_mlb.target),
+        (_input_mcls_logits.preds, _input_mcls_logits.target),
+        (_input_mcls_prob.preds, _input_mcls_prob.target),
+        (_input_mcls.preds, _input_mcls.target),
+        (_input_mdmc_prob.preds, _input_mdmc_prob.target),
+        (_input_mdmc.preds, _input_mdmc.target),
+        (_input_mlmd_prob.preds, _input_mlmd_prob.target),
+        (_input_mlmd.preds, _input_mlmd.target),
+    ],
+)
+class TestHammingDistance(MetricTester):
+    @pytest.mark.parametrize("ddp", [True, False])
+    @pytest.mark.parametrize("dist_sync_on_step", [False, True])
+    def test_hamming_distance_class(self, ddp, dist_sync_on_step, preds, target):
+        self.run_class_metric_test(
+            ddp=ddp,
+            preds=preds,
+            target=target,
+            metric_class=HammingDistance,
+            sk_metric=_sk_hamming_loss,
+            dist_sync_on_step=dist_sync_on_step,
+            metric_args={"threshold": THRESHOLD},
+        )
+
+    def test_hamming_distance_fn(self, preds, target):
+        self.run_functional_metric_test(
+            preds=preds,
+            target=target,
+            metric_functional=hamming_distance,
+            sk_metric=_sk_hamming_loss,
+            metric_args={"threshold": THRESHOLD},
+        )
+
+    def test_hamming_distance_differentiability(self, preds, target):
+        self.run_differentiability_test(
+            preds=preds,
+            target=target,
+            metric_module=HammingDistance,
+            metric_functional=hamming_distance,
+            metric_args={"threshold": THRESHOLD},
+        )
+
+
+@pytest.mark.parametrize("threshold", [1.5])
+def test_wrong_params(threshold):
+    preds, target = _input_mcls_prob.preds, _input_mcls_prob.target
+
+    with pytest.raises(ValueError):
+        ham_dist = HammingDistance(threshold=threshold)
+        ham_dist(preds, target)
+        ham_dist.compute()
+
+    with pytest.raises(ValueError):
+        hamming_distance(preds, target, threshold=threshold)
diff --git a/RE/paddlemetric/src/tests/classification/test_hinge.py b/RE/paddlemetric/src/tests/classification/test_hinge.py
new file mode 100644
index 00000000..7adbbb78
--- /dev/null
+++ b/RE/paddlemetric/src/tests/classification/test_hinge.py
@@ -0,0 +1,156 @@
+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from functools import partial
+
+import numpy as np
+import pytest
+import paddleext.torchapi as B
+from sklearn.metrics import hinge_loss as sk_hinge
+from sklearn.preprocessing import OneHotEncoder
+
+from tests.classification.inputs import Input
+from tests.helpers.testers import BATCH_SIZE, NUM_BATCHES, NUM_CLASSES, MetricTester
+from paddlemetrics import Hinge
+from paddlemetrics.functional import hinge
+from paddlemetrics.functional.classification.hinge import MulticlassMode
+
+B.manual_seed(42)
+
+_input_binary = Input(
+    preds=B.randn(NUM_BATCHES, BATCH_SIZE), target=B.randint(high=2, size=(NUM_BATCHES, BATCH_SIZE))
+)
+
+_input_binary_single = Input(preds=B.randn((NUM_BATCHES, 1)), target=B.randint(high=2, size=(NUM_BATCHES, 1)))
+
+_input_multiclass = Input(
+    preds=B.randn(NUM_BATCHES, BATCH_SIZE, NUM_CLASSES),
+    target=B.randint(high=NUM_CLASSES, size=(NUM_BATCHES, BATCH_SIZE)),
+)
+
+
+def _sk_hinge(preds, target, squared, multiclass_mode):
+    sk_preds, sk_target = preds.numpy(), target.numpy()
+
+    if multiclass_mode == MulticlassMode.ONE_VS_ALL:
+        enc = OneHotEncoder()
+        enc.fit(sk_target.reshape(-1, 1))
+        sk_target = enc.transform(sk_target.reshape(-1, 1)).toarray()
+
+    if sk_preds.ndim == 1 or multiclass_mode == MulticlassMode.ONE_VS_ALL:
+        sk_target = 2 * sk_target - 1
+
+    if squared or sk_target.max() != 1 or sk_target.min() != -1:
+        # Squared not an option in sklearn and infers classes incorrectly with single element, so adapted from source
+        if sk_preds.ndim == 1 or multiclass_mode == MulticlassMode.ONE_VS_ALL:
+            margin = sk_target * sk_preds
+        else:
+            mask = np.ones_like(sk_preds, dtype=bool)
+            mask[np.arange(sk_target.shape[0]), sk_target] = False
+            margin = sk_preds[~mask]
+            margin -= np.max(sk_preds[mask].reshape(sk_target.shape[0], -1), axis=1)
+        measures = 1 - margin
+        measures = np.clip(measures, 0, None)
+
+        if squared:
+            measures = measures ** 2
+        return measures.mean(axis=0)
+    if multiclass_mode == MulticlassMode.ONE_VS_ALL:
+        result = np.zeros(sk_preds.shape[1])
+        for i in range(result.shape[0]):
+            result[i] = sk_hinge(y_true=sk_target[:, i], pred_decision=sk_preds[:, i])
+        return result
+
+    return sk_hinge(y_true=sk_target, pred_decision=sk_preds)
+
+
+@pytest.mark.parametrize(
+    "preds, target, squared, multiclass_mode",
+    [
+        (_input_binary.preds, _input_binary.target, False, None),
+        (_input_binary.preds, _input_binary.target, True, None),
+        (_input_binary_single.preds, _input_binary_single.target, False, None),
+        (_input_binary_single.preds, _input_binary_single.target, True, None),
+        (_input_multiclass.preds, _input_multiclass.target, False, MulticlassMode.CRAMMER_SINGER),
+        (_input_multiclass.preds, _input_multiclass.target, True, MulticlassMode.CRAMMER_SINGER),
+        (_input_multiclass.preds, _input_multiclass.target, False, MulticlassMode.ONE_VS_ALL),
+        (_input_multiclass.preds, _input_multiclass.target, True, MulticlassMode.ONE_VS_ALL),
+    ],
+)
+class TestHinge(MetricTester):
+    @pytest.mark.parametrize("ddp", [True, False])
+    @pytest.mark.parametrize("dist_sync_on_step", [True, False])
+    def test_hinge_class(self, ddp, dist_sync_on_step, preds, target, squared, multiclass_mode):
+        self.run_class_metric_test(
+            ddp=ddp,
+            preds=preds,
+            target=target,
+            metric_class=Hinge,
+            sk_metric=partial(_sk_hinge, squared=squared, multiclass_mode=multiclass_mode),
+            dist_sync_on_step=dist_sync_on_step,
+            metric_args={
+                "squared": squared,
+                "multiclass_mode": multiclass_mode,
+            },
+        )
+
+    def test_hinge_fn(self, preds, target, squared, multiclass_mode):
+        self.run_functional_metric_test(
+            preds=preds,
+            target=target,
+            metric_functional=partial(hinge, squared=squared, multiclass_mode=multiclass_mode),
+            sk_metric=partial(_sk_hinge, squared=squared, multiclass_mode=multiclass_mode),
+        )
+
+    def test_hinge_differentiability(self, preds, target, squared, multiclass_mode):
+        self.run_differentiability_test(
+            preds=preds,
+            target=target,
+            metric_module=Hinge,
+            metric_functional=partial(hinge, squared=squared, multiclass_mode=multiclass_mode),
+        )
+
+
+_input_multi_target = Input(preds=B.randn(BATCH_SIZE), target=B.randint(high=2, size=(BATCH_SIZE, 2)))
+
+_input_binary_different_sizes = Input(
+    preds=B.randn(BATCH_SIZE * 2), target=B.randint(high=2, size=(BATCH_SIZE,))
+)
+
+_input_multi_different_sizes = Input(
+    preds=B.randn(BATCH_SIZE * 2, NUM_CLASSES), target=B.randint(high=NUM_CLASSES, size=(BATCH_SIZE,))
+)
+
+_input_extra_dim = Input(
+    preds=B.randn(BATCH_SIZE, NUM_CLASSES, 2), target=B.randint(high=2, size=(BATCH_SIZE,))
+)
+
+
+@pytest.mark.parametrize(
+    "preds, target, multiclass_mode",
+    [
+        (_input_multi_target.preds, _input_multi_target.target, None),
+        (_input_binary_different_sizes.preds, _input_binary_different_sizes.target, None),
+        (_input_multi_different_sizes.preds, _input_multi_different_sizes.target, None),
+        (_input_extra_dim.preds, _input_extra_dim.target, None),
+        (_input_multiclass.preds[0], _input_multiclass.target[0], "invalid_mode"),
+    ],
+)
+def test_bad_inputs_fn(preds, target, multiclass_mode):
+    with pytest.raises(ValueError):
+        _ = hinge(preds, target, multiclass_mode=multiclass_mode)
+
+
+def test_bad_inputs_class():
+    with pytest.raises(ValueError):
+        Hinge(multiclass_mode="invalid_mode")
diff --git a/RE/paddlemetric/src/tests/classification/test_inputs.py b/RE/paddlemetric/src/tests/classification/test_inputs.py
new file mode 100644
index 00000000..4f924af2
--- /dev/null
+++ b/RE/paddlemetric/src/tests/classification/test_inputs.py
@@ -0,0 +1,312 @@
+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import pytest
+import paddleext.torchapi as B
+from paddleext.torchapi import Tensor, rand, randint, tensor
+
+from tests.classification.inputs import Input
+from tests.classification.inputs import _input_binary as _bin
+from tests.classification.inputs import _input_binary_prob as _bin_prob
+from tests.classification.inputs import _input_multiclass as _mc
+from tests.classification.inputs import _input_multiclass_prob as _mc_prob
+from tests.classification.inputs import _input_multidim_multiclass as _mdmc
+from tests.classification.inputs import _input_multidim_multiclass_prob as _mdmc_prob
+from tests.classification.inputs import _input_multilabel as _ml
+from tests.classification.inputs import _input_multilabel_multidim as _mlmd
+from tests.classification.inputs import _input_multilabel_multidim_prob as _mlmd_prob
+from tests.classification.inputs import _input_multilabel_prob as _ml_prob
+from tests.helpers import seed_all
+from tests.helpers.testers import BATCH_SIZE, EXTRA_DIM, NUM_BATCHES, NUM_CLASSES, THRESHOLD
+from paddlemetrics.utilities.checks import _input_format_classification
+from paddlemetrics.utilities.data import select_topk, to_onehot
+from paddlemetrics.utilities.enums import DataType
+
+seed_all(42)
+
+# Some additional inputs to test on
+_ml_prob_half = Input(_ml_prob.preds.half(), _ml_prob.target)
+
+_mc_prob_2cls_preds = rand(NUM_BATCHES, BATCH_SIZE, 2)
+_mc_prob_2cls_preds /= _mc_prob_2cls_preds.sum(dim=2, keepdim=True)
+_mc_prob_2cls = Input(_mc_prob_2cls_preds, randint(high=2, size=(NUM_BATCHES, BATCH_SIZE)))
+
+_mdmc_prob_many_dims_preds = rand(NUM_BATCHES, BATCH_SIZE, NUM_CLASSES, EXTRA_DIM, EXTRA_DIM)
+_mdmc_prob_many_dims_preds /= _mdmc_prob_many_dims_preds.sum(dim=2, keepdim=True)
+_mdmc_prob_many_dims = Input(
+    _mdmc_prob_many_dims_preds,
+    randint(high=2, size=(NUM_BATCHES, BATCH_SIZE, EXTRA_DIM, EXTRA_DIM)),
+)
+
+_mdmc_prob_2cls_preds = rand(NUM_BATCHES, BATCH_SIZE, 2, EXTRA_DIM)
+_mdmc_prob_2cls_preds /= _mdmc_prob_2cls_preds.sum(dim=2, keepdim=True)
+_mdmc_prob_2cls = Input(_mdmc_prob_2cls_preds, randint(high=2, size=(NUM_BATCHES, BATCH_SIZE, EXTRA_DIM)))
+
+# Some utils
+T = Tensor
+
+
+def _idn(x):
+    return x
+
+
+def _usq(x):
+    return x.unsqueeze(-1)
+
+
+def _thrs(x):
+    return x >= THRESHOLD
+
+
+def _rshp1(x):
+    return x.reshape(x.shape[0], -1)
+
+
+def _rshp2(x):
+    return x.reshape(x.shape[0], x.shape[1], -1)
+
+
+def _onehot(x):
+    return to_onehot(x, NUM_CLASSES)
+
+
+def _onehot2(x):
+    return to_onehot(x, 2)
+
+
+def _top1(x):
+    return select_topk(x, 1)
+
+
+def _top2(x):
+    return select_topk(x, 2)
+
+
+# To avoid ugly black line wrapping
+def _ml_preds_tr(x):
+    return _rshp1(_thrs(x))
+
+
+def _onehot_rshp1(x):
+    return _onehot(_rshp1(x))
+
+
+def _onehot2_rshp1(x):
+    return _onehot2(_rshp1(x))
+
+
+def _top1_rshp2(x):
+    return _top1(_rshp2(x))
+
+
+def _top2_rshp2(x):
+    return _top2(_rshp2(x))
+
+
+def _probs_to_mc_preds_tr(x):
+    return _onehot2(_thrs(x))
+
+
+def _mlmd_prob_to_mc_preds_tr(x):
+    return _onehot2(_rshp1(_thrs(x)))
+
+
+########################
+# Test correct inputs
+########################
+
+
+@pytest.mark.parametrize(
+    "inputs, num_classes, multiclass, top_k, exp_mode, post_preds, post_target",
+    [
+        #############################
+        # Test usual expected cases
+        (_bin, None, False, None, "multi-class", _usq, _usq),
+        (_bin, 1, False, None, "multi-class", _usq, _usq),
+        (_bin_prob, None, None, None, "binary", lambda x: _usq(_thrs(x)), _usq),
+        (_ml_prob, None, None, None, "multi-label", _thrs, _idn),
+        (_ml, None, False, None, "multi-dim multi-class", _idn, _idn),
+        (_ml_prob, None, None, None, "multi-label", _ml_preds_tr, _rshp1),
+        (_ml_prob, None, None, 2, "multi-label", _top2, _rshp1),
+        (_mlmd, None, False, None, "multi-dim multi-class", _rshp1, _rshp1),
+        (_mc, NUM_CLASSES, None, None, "multi-class", _onehot, _onehot),
+        (_mc_prob, None, None, None, "multi-class", _top1, _onehot),
+        (_mc_prob, None, None, 2, "multi-class", _top2, _onehot),
+        (_mdmc, NUM_CLASSES, None, None, "multi-dim multi-class", _onehot, _onehot),
+        (_mdmc_prob, None, None, None, "multi-dim multi-class", _top1_rshp2, _onehot),
+        (_mdmc_prob, None, None, 2, "multi-dim multi-class", _top2_rshp2, _onehot),
+        (_mdmc_prob_many_dims, None, None, None, "multi-dim multi-class", _top1_rshp2, _onehot_rshp1),
+        (_mdmc_prob_many_dims, None, None, 2, "multi-dim multi-class", _top2_rshp2, _onehot_rshp1),
+        ###########################
+        # Test some special cases
+        # Make sure that half precision works, i.e. is converted to full precision
+        (_ml_prob_half, None, None, None, "multi-label", lambda x: _ml_preds_tr(x.float()), _rshp1),
+        # Binary as multiclass
+        (_bin, None, None, None, "multi-class", _onehot2, _onehot2),
+        # Binary probs as multiclass
+        (_bin_prob, None, True, None, "binary", _probs_to_mc_preds_tr, _onehot2),
+        # Multilabel as multiclass
+        (_ml, None, True, None, "multi-dim multi-class", _onehot2, _onehot2),
+        # Multilabel probs as multiclass
+        (_ml_prob, None, True, None, "multi-label", _probs_to_mc_preds_tr, _onehot2),
+        # Multidim multilabel as multiclass
+        (_mlmd, None, True, None, "multi-dim multi-class", _onehot2_rshp1, _onehot2_rshp1),
+        # Multidim multilabel probs as multiclass
+        (_mlmd_prob, None, True, None, "multi-label", _mlmd_prob_to_mc_preds_tr, _onehot2_rshp1),
+        # Multiclass prob with 2 classes as binary
+        (_mc_prob_2cls, None, False, None, "multi-class", lambda x: _top1(x)[:, [1]], _usq),
+        # Multi-dim multi-class with 2 classes as multi-label
+        (_mdmc_prob_2cls, None, False, None, "multi-dim multi-class", lambda x: _top1(x)[:, 1], _idn),
+    ],
+)
+def test_usual_cases(inputs, num_classes, multiclass, top_k, exp_mode, post_preds, post_target):
+    def __get_data_type_enum(str_exp_mode):
+        return next(DataType[n] for n in dir(DataType) if DataType[n] == str_exp_mode)
+
+    for exp_mode in (exp_mode, __get_data_type_enum(exp_mode)):
+        preds_out, target_out, mode = _input_format_classification(
+            preds=inputs.preds[0],
+            target=inputs.target[0],
+            threshold=THRESHOLD,
+            num_classes=num_classes,
+            multiclass=multiclass,
+            top_k=top_k,
+        )
+
+        assert mode == exp_mode
+        assert B.equal(preds_out, post_preds(inputs.preds[0]).int())
+        assert B.equal(target_out, post_target(inputs.target[0]).int())
+
+        # Test that things work when batch_size = 1
+        preds_out, target_out, mode = _input_format_classification(
+            preds=inputs.preds[0][[0], ...],
+            target=inputs.target[0][[0], ...],
+            threshold=THRESHOLD,
+            num_classes=num_classes,
+            multiclass=multiclass,
+            top_k=top_k,
+        )
+
+        assert mode == exp_mode
+        assert B.equal(preds_out, post_preds(inputs.preds[0][[0], ...]).int())
+        assert B.equal(target_out, post_target(inputs.target[0][[0], ...]).int())
+
+
+# Test that threshold is correctly applied
+def test_threshold():
+    target = T([1, 1, 1]).int()
+    preds_probs = T([0.5 - 1e-5, 0.5, 0.5 + 1e-5])
+
+    preds_probs_out, _, _ = _input_format_classification(preds_probs, target, threshold=0.5)
+
+    assert B.equal(tensor([0, 1, 1], dtype=B.int), preds_probs_out.squeeze().int())
+
+
+########################################################################
+# Test incorrect inputs
+########################################################################
+
+
+@pytest.mark.parametrize(
+    "preds, target, num_classes, multiclass",
+    [
+        # Target not integer
+        (randint(high=2, size=(7,)), randint(high=2, size=(7,)).float(), None, None),
+        # Target negative
+        (randint(high=2, size=(7,)), -randint(high=2, size=(7,)), None, None),
+        # Preds negative integers
+        (-randint(high=2, size=(7,)), randint(high=2, size=(7,)), None, None),
+        # multiclass=False and target > 1
+        (rand(size=(7,)), randint(low=2, high=4, size=(7,)), None, False),
+        # multiclass=False and preds integers with > 1
+        (randint(low=2, high=4, size=(7,)), randint(high=2, size=(7,)), None, False),
+        # Wrong batch size
+        (randint(high=2, size=(8,)), randint(high=2, size=(7,)), None, None),
+        # Completely wrong shape
+        (randint(high=2, size=(7,)), randint(high=2, size=(7, 4)), None, None),
+        # Same #dims, different shape
+        (randint(high=2, size=(7, 3)), randint(high=2, size=(7, 4)), None, None),
+        # Same shape and preds floats, target not binary
+        (rand(size=(7, 3)), randint(low=2, high=4, size=(7, 3)), None, None),
+        # #dims in preds = 1 + #dims in target, C shape not second or last
+        (rand(size=(7, 3, 4, 3)), randint(high=4, size=(7, 3, 3)), None, None),
+        # #dims in preds = 1 + #dims in target, preds not float
+        (randint(high=2, size=(7, 3, 3, 4)), randint(high=4, size=(7, 3, 3)), None, None),
+        # multiclass=False, with C dimension > 2
+        (_mc_prob.preds[0], randint(high=2, size=(BATCH_SIZE,)), None, False),
+        # Max target larger or equal to C dimension
+        (_mc_prob.preds[0], randint(low=NUM_CLASSES + 1, high=100, size=(BATCH_SIZE,)), None, None),
+        # C dimension not equal to num_classes
+        (_mc_prob.preds[0], _mc_prob.target[0], NUM_CLASSES + 1, None),
+        # Max target larger than num_classes (with #dim preds = 1 + #dims target)
+        (_mc_prob.preds[0], randint(low=NUM_CLASSES + 1, high=100, size=(BATCH_SIZE, NUM_CLASSES)), 4, None),
+        # Max target larger than num_classes (with #dim preds = #dims target)
+        (randint(high=4, size=(7, 3)), randint(low=5, high=7, size=(7, 3)), 4, None),
+        # Num_classes=1, but multiclass not false
+        (randint(high=2, size=(7,)), randint(high=2, size=(7,)), 1, None),
+        # multiclass=False, but implied class dimension (for multi-label, from shape) != num_classes
+        (randint(high=2, size=(7, 3, 3)), randint(high=2, size=(7, 3, 3)), 4, False),
+        # Multilabel input with implied class dimension != num_classes
+        (rand(size=(7, 3, 3)), randint(high=2, size=(7, 3, 3)), 4, False),
+        # Multilabel input with multiclass=True, but num_classes != 2 (or None)
+        (rand(size=(7, 3)), randint(high=2, size=(7, 3)), 4, True),
+        # Binary input, num_classes > 2
+        (rand(size=(7,)), randint(high=2, size=(7,)), 4, None),
+        # Binary input, num_classes == 2 and multiclass not True
+        (rand(size=(7,)), randint(high=2, size=(7,)), 2, None),
+        (rand(size=(7,)), randint(high=2, size=(7,)), 2, False),
+        # Binary input, num_classes == 1 and multiclass=True
+        (rand(size=(7,)), randint(high=2, size=(7,)), 1, True),
+    ],
+)
+def test_incorrect_inputs(preds, target, num_classes, multiclass):
+    with pytest.raises(ValueError):
+        _input_format_classification(
+            preds=preds, target=target, threshold=THRESHOLD, num_classes=num_classes, multiclass=multiclass
+        )
+
+
+@pytest.mark.parametrize(
+    "preds, target, num_classes, multiclass, top_k",
+    [
+        # Topk set with non (md)mc or ml prob data
+        (_bin.preds[0], _bin.target[0], None, None, 2),
+        (_bin_prob.preds[0], _bin_prob.target[0], None, None, 2),
+        (_mc.preds[0], _mc.target[0], None, None, 2),
+        (_ml.preds[0], _ml.target[0], None, None, 2),
+        (_mlmd.preds[0], _mlmd.target[0], None, None, 2),
+        (_mdmc.preds[0], _mdmc.target[0], None, None, 2),
+        # top_k = 0
+        (_mc_prob_2cls.preds[0], _mc_prob_2cls.target[0], None, None, 0),
+        # top_k = float
+        (_mc_prob_2cls.preds[0], _mc_prob_2cls.target[0], None, None, 0.123),
+        # top_k =2 with 2 classes, multiclass=False
+        (_mc_prob_2cls.preds[0], _mc_prob_2cls.target[0], None, False, 2),
+        # top_k = number of classes (C dimension)
+        (_mc_prob.preds[0], _mc_prob.target[0], None, None, NUM_CLASSES),
+        # multiclass = True for ml prob inputs, top_k set
+        (_ml_prob.preds[0], _ml_prob.target[0], None, True, 2),
+        # top_k = num_classes for ml prob inputs
+        (_ml_prob.preds[0], _ml_prob.target[0], None, True, NUM_CLASSES),
+    ],
+)
+def test_incorrect_inputs_topk(preds, target, num_classes, multiclass, top_k):
+    with pytest.raises(ValueError):
+        _input_format_classification(
+            preds=preds,
+            target=target,
+            threshold=THRESHOLD,
+            num_classes=num_classes,
+            multiclass=multiclass,
+            top_k=top_k,
+        )
diff --git a/RE/paddlemetric/src/tests/classification/test_iou.py b/RE/paddlemetric/src/tests/classification/test_iou.py
new file mode 100644
index 00000000..af22d787
--- /dev/null
+++ b/RE/paddlemetric/src/tests/classification/test_iou.py
@@ -0,0 +1,235 @@
+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from functools import partial
+
+import numpy as np
+import pytest
+import paddleext.torchapi as B
+from sklearn.metrics import jaccard_score as sk_jaccard_score
+from paddleext.torchapi import Tensor, tensor
+
+from tests.classification.inputs import _input_binary, _input_binary_prob
+from tests.classification.inputs import _input_multiclass as _input_mcls
+from tests.classification.inputs import _input_multiclass_prob as _input_mcls_prob
+from tests.classification.inputs import _input_multidim_multiclass as _input_mdmc
+from tests.classification.inputs import _input_multidim_multiclass_prob as _input_mdmc_prob
+from tests.classification.inputs import _input_multilabel as _input_mlb
+from tests.classification.inputs import _input_multilabel_prob as _input_mlb_prob
+from tests.helpers.testers import NUM_CLASSES, THRESHOLD, MetricTester
+from paddlemetrics.classification.iou import IoU
+from paddlemetrics.functional import iou
+
+
+def _sk_iou_binary_prob(preds, target, average=None):
+    sk_preds = (preds.view(-1).numpy() >= THRESHOLD).astype(np.uint8)
+    sk_target = target.view(-1).numpy()
+
+    return sk_jaccard_score(y_true=sk_target, y_pred=sk_preds, average=average)
+
+
+def _sk_iou_binary(preds, target, average=None):
+    sk_preds = preds.view(-1).numpy()
+    sk_target = target.view(-1).numpy()
+
+    return sk_jaccard_score(y_true=sk_target, y_pred=sk_preds, average=average)
+
+
+def _sk_iou_multilabel_prob(preds, target, average=None):
+    sk_preds = (preds.view(-1).numpy() >= THRESHOLD).astype(np.uint8)
+    sk_target = target.view(-1).numpy()
+
+    return sk_jaccard_score(y_true=sk_target, y_pred=sk_preds, average=average)
+
+
+def _sk_iou_multilabel(preds, target, average=None):
+    sk_preds = preds.view(-1).numpy()
+    sk_target = target.view(-1).numpy()
+
+    return sk_jaccard_score(y_true=sk_target, y_pred=sk_preds, average=average)
+
+
+def _sk_iou_multiclass_prob(preds, target, average=None):
+    sk_preds = B.argmax(preds, dim=len(preds.shape) - 1).view(-1).numpy()
+    sk_target = target.view(-1).numpy()
+
+    return sk_jaccard_score(y_true=sk_target, y_pred=sk_preds, average=average)
+
+
+def _sk_iou_multiclass(preds, target, average=None):
+    sk_preds = preds.view(-1).numpy()
+    sk_target = target.view(-1).numpy()
+
+    return sk_jaccard_score(y_true=sk_target, y_pred=sk_preds, average=average)
+
+
+def _sk_iou_multidim_multiclass_prob(preds, target, average=None):
+    sk_preds = B.argmax(preds, dim=len(preds.shape) - 2).view(-1).numpy()
+    sk_target = target.view(-1).numpy()
+
+    return sk_jaccard_score(y_true=sk_target, y_pred=sk_preds, average=average)
+
+
+def _sk_iou_multidim_multiclass(preds, target, average=None):
+    sk_preds = preds.view(-1).numpy()
+    sk_target = target.view(-1).numpy()
+
+    return sk_jaccard_score(y_true=sk_target, y_pred=sk_preds, average=average)
+
+
+@pytest.mark.parametrize("reduction", ["elementwise_mean", "none"])
+@pytest.mark.parametrize(
+    "preds, target, sk_metric, num_classes",
+    [
+        (_input_binary_prob.preds, _input_binary_prob.target, _sk_iou_binary_prob, 2),
+        (_input_binary.preds, _input_binary.target, _sk_iou_binary, 2),
+        (_input_mlb_prob.preds, _input_mlb_prob.target, _sk_iou_multilabel_prob, 2),
+        (_input_mlb.preds, _input_mlb.target, _sk_iou_multilabel, 2),
+        (_input_mcls_prob.preds, _input_mcls_prob.target, _sk_iou_multiclass_prob, NUM_CLASSES),
+        (_input_mcls.preds, _input_mcls.target, _sk_iou_multiclass, NUM_CLASSES),
+        (_input_mdmc_prob.preds, _input_mdmc_prob.target, _sk_iou_multidim_multiclass_prob, NUM_CLASSES),
+        (_input_mdmc.preds, _input_mdmc.target, _sk_iou_multidim_multiclass, NUM_CLASSES),
+    ],
+)
+class TestIoU(MetricTester):
+    @pytest.mark.parametrize("ddp", [True, False])
+    @pytest.mark.parametrize("dist_sync_on_step", [True, False])
+    def test_iou(self, reduction, preds, target, sk_metric, num_classes, ddp, dist_sync_on_step):
+        average = "macro" if reduction == "elementwise_mean" else None  # convert tags
+        self.run_class_metric_test(
+            ddp=ddp,
+            preds=preds,
+            target=target,
+            metric_class=IoU,
+            sk_metric=partial(sk_metric, average=average),
+            dist_sync_on_step=dist_sync_on_step,
+            metric_args={"num_classes": num_classes, "threshold": THRESHOLD, "reduction": reduction},
+        )
+
+    def test_iou_functional(self, reduction, preds, target, sk_metric, num_classes):
+        average = "macro" if reduction == "elementwise_mean" else None  # convert tags
+        self.run_functional_metric_test(
+            preds,
+            target,
+            metric_functional=iou,
+            sk_metric=partial(sk_metric, average=average),
+            metric_args={"num_classes": num_classes, "threshold": THRESHOLD, "reduction": reduction},
+        )
+
+    def test_iou_differentiability(self, reduction, preds, target, sk_metric, num_classes):
+        self.run_differentiability_test(
+            preds=preds,
+            target=target,
+            metric_module=IoU,
+            metric_functional=iou,
+            metric_args={"num_classes": num_classes, "threshold": THRESHOLD, "reduction": reduction},
+        )
+
+
+@pytest.mark.parametrize(
+    ["half_ones", "reduction", "ignore_index", "expected"],
+    [
+        pytest.param(False, "none", None, Tensor([1, 1, 1])),
+        pytest.param(False, "elementwise_mean", None, Tensor([1])),
+        pytest.param(False, "none", 0, Tensor([1, 1])),
+        pytest.param(True, "none", None, Tensor([0.5, 0.5, 0.5])),
+        pytest.param(True, "elementwise_mean", None, Tensor([0.5])),
+        pytest.param(True, "none", 0, Tensor([2 / 3, 1 / 2])),
+    ],
+)
+def test_iou(half_ones, reduction, ignore_index, expected):
+    preds = (B.arange(120) % 3).view(-1, 1)
+    target = (B.arange(120) % 3).view(-1, 1)
+    if half_ones:
+        preds[:60] = 1
+    iou_val = iou(
+        preds=preds,
+        target=target,
+        ignore_index=ignore_index,
+        reduction=reduction,
+    )
+    assert B.allclose(iou_val, expected, atol=1e-9)
+
+
+# test `absent_score`
+@pytest.mark.parametrize(
+    ["pred", "target", "ignore_index", "absent_score", "num_classes", "expected"],
+    [
+        # Note that -1 is used as the absent_score in almost all tests here to distinguish it from the range of valid
+        # scores the function can return ([0., 1.] range, inclusive).
+        # 2 classes, class 0 is correct everywhere, class 1 is absent.
+        pytest.param([0], [0], None, -1.0, 2, [1.0, -1.0]),
+        pytest.param([0, 0], [0, 0], None, -1.0, 2, [1.0, -1.0]),
+        # absent_score not applied if only class 0 is present and it's the only class.
+        pytest.param([0], [0], None, -1.0, 1, [1.0]),
+        # 2 classes, class 1 is correct everywhere, class 0 is absent.
+        pytest.param([1], [1], None, -1.0, 2, [-1.0, 1.0]),
+        pytest.param([1, 1], [1, 1], None, -1.0, 2, [-1.0, 1.0]),
+        # When 0 index ignored, class 0 does not get a score (not even the absent_score).
+        pytest.param([1], [1], 0, -1.0, 2, [1.0]),
+        # 3 classes. Only 0 and 2 are present, and are perfectly predicted. 1 should get absent_score.
+        pytest.param([0, 2], [0, 2], None, -1.0, 3, [1.0, -1.0, 1.0]),
+        pytest.param([2, 0], [2, 0], None, -1.0, 3, [1.0, -1.0, 1.0]),
+        # 3 classes. Only 0 and 1 are present, and are perfectly predicted. 2 should get absent_score.
+        pytest.param([0, 1], [0, 1], None, -1.0, 3, [1.0, 1.0, -1.0]),
+        pytest.param([1, 0], [1, 0], None, -1.0, 3, [1.0, 1.0, -1.0]),
+        # 3 classes, class 0 is 0.5 IoU, class 1 is 0 IoU (in pred but not target; should not get absent_score), class
+        # 2 is absent.
+        pytest.param([0, 1], [0, 0], None, -1.0, 3, [0.5, 0.0, -1.0]),
+        # 3 classes, class 0 is 0.5 IoU, class 1 is 0 IoU (in target but not pred; should not get absent_score), class
+        # 2 is absent.
+        pytest.param([0, 0], [0, 1], None, -1.0, 3, [0.5, 0.0, -1.0]),
+        # Sanity checks with absent_score of 1.0.
+        pytest.param([0, 2], [0, 2], None, 1.0, 3, [1.0, 1.0, 1.0]),
+        pytest.param([0, 2], [0, 2], 0, 1.0, 3, [1.0, 1.0]),
+    ],
+)
+def test_iou_absent_score(pred, target, ignore_index, absent_score, num_classes, expected):
+    iou_val = iou(
+        preds=tensor(pred),
+        target=tensor(target),
+        ignore_index=ignore_index,
+        absent_score=absent_score,
+        num_classes=num_classes,
+        reduction="none",
+    )
+    assert B.allclose(iou_val, tensor(expected).to(iou_val))
+
+
+# example data taken from
+# https://github.com/scikit-learn/scikit-learn/blob/master/sklearn/metrics/tests/test_ranking.py
+@pytest.mark.parametrize(
+    ["pred", "target", "ignore_index", "num_classes", "reduction", "expected"],
+    [
+        # Ignoring an index outside of [0, num_classes-1] should have no effect.
+        pytest.param([0, 1, 1, 2, 2], [0, 1, 2, 2, 2], None, 3, "none", [1, 1 / 2, 2 / 3]),
+        pytest.param([0, 1, 1, 2, 2], [0, 1, 2, 2, 2], -1, 3, "none", [1, 1 / 2, 2 / 3]),
+        pytest.param([0, 1, 1, 2, 2], [0, 1, 2, 2, 2], 255, 3, "none", [1, 1 / 2, 2 / 3]),
+        # Ignoring a valid index drops only that index from the result.
+        pytest.param([0, 1, 1, 2, 2], [0, 1, 2, 2, 2], 0, 3, "none", [1 / 2, 2 / 3]),
+        pytest.param([0, 1, 1, 2, 2], [0, 1, 2, 2, 2], 1, 3, "none", [1, 2 / 3]),
+        pytest.param([0, 1, 1, 2, 2], [0, 1, 2, 2, 2], 2, 3, "none", [1, 1]),
+        # When reducing to mean or sum, the ignored index does not contribute to the output.
+        pytest.param([0, 1, 1, 2, 2], [0, 1, 2, 2, 2], 0, 3, "elementwise_mean", [7 / 12]),
+        pytest.param([0, 1, 1, 2, 2], [0, 1, 2, 2, 2], 0, 3, "sum", [7 / 6]),
+    ],
+)
+def test_iou_ignore_index(pred, target, ignore_index, num_classes, reduction, expected):
+    iou_val = iou(
+        preds=tensor(pred),
+        target=tensor(target),
+        ignore_index=ignore_index,
+        num_classes=num_classes,
+        reduction=reduction,
+    )
+    assert B.allclose(iou_val, tensor(expected).to(iou_val))
diff --git a/RE/paddlemetric/src/tests/classification/test_kl_divergence.py b/RE/paddlemetric/src/tests/classification/test_kl_divergence.py
new file mode 100644
index 00000000..b5137c3a
--- /dev/null
+++ b/RE/paddlemetric/src/tests/classification/test_kl_divergence.py
@@ -0,0 +1,114 @@
+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from collections import namedtuple
+from functools import partial
+from typing import Optional
+
+import numpy as np
+import pytest
+import paddleext.torchapi as B
+from scipy.stats import entropy
+from paddleext.torchapi import Tensor
+
+from tests.helpers import seed_all
+from tests.helpers.testers import BATCH_SIZE, EXTRA_DIM, NUM_BATCHES, MetricTester
+from paddlemetrics.classification import KLDivergence
+from paddlemetrics.functional import kl_divergence
+
+seed_all(42)
+
+Input = namedtuple("Input", ["p", "q"])
+
+_probs_inputs = Input(
+    p=B.rand(NUM_BATCHES, BATCH_SIZE, EXTRA_DIM),
+    q=B.rand(NUM_BATCHES, BATCH_SIZE, EXTRA_DIM),
+)
+
+_log_probs_inputs = Input(
+    p=B.rand(NUM_BATCHES, BATCH_SIZE, EXTRA_DIM).softmax(dim=-1).log(),
+    q=B.rand(NUM_BATCHES, BATCH_SIZE, EXTRA_DIM).softmax(dim=-1).log(),
+)
+
+
+def _sk_metric(p: Tensor, q: Tensor, log_prob: bool, reduction: Optional[str] = "mean"):
+    if log_prob:
+        p = p.softmax(dim=-1)
+        q = q.softmax(dim=-1)
+    res = entropy(p, q, axis=1)
+    if reduction == "mean":
+        return np.mean(res)
+    if reduction == "sum":
+        return np.sum(res)
+    return res
+
+
+@pytest.mark.parametrize("reduction", ["mean", "sum"])
+@pytest.mark.parametrize(
+    "p, q, log_prob", [(_probs_inputs.p, _probs_inputs.q, False), (_log_probs_inputs.p, _log_probs_inputs.q, True)]
+)
+class TestKLDivergence(MetricTester):
+    atol = 1e-6
+
+    @pytest.mark.parametrize("ddp", [True, False])
+    @pytest.mark.parametrize("dist_sync_on_step", [True, False])
+    def test_kldivergence(self, reduction, p, q, log_prob, ddp, dist_sync_on_step):
+        self.run_class_metric_test(
+            ddp,
+            p,
+            q,
+            KLDivergence,
+            partial(_sk_metric, log_prob=log_prob, reduction=reduction),
+            dist_sync_on_step,
+            metric_args=dict(log_prob=log_prob, reduction=reduction),
+        )
+
+    def test_kldivergence_functional(self, reduction, p, q, log_prob):
+        # todo: `num_outputs` is unused
+        self.run_functional_metric_test(
+            p,
+            q,
+            kl_divergence,
+            partial(_sk_metric, log_prob=log_prob, reduction=reduction),
+            metric_args=dict(log_prob=log_prob, reduction=reduction),
+        )
+
+    def test_kldivergence_differentiability(self, reduction, p, q, log_prob):
+        self.run_differentiability_test(
+            p,
+            q,
+            metric_module=KLDivergence,
+            metric_functional=kl_divergence,
+            metric_args=dict(log_prob=log_prob, reduction=reduction),
+        )
+
+    # KLDivergence half + cpu does not work due to missing support in B.clamp
+    @pytest.mark.xfail(reason="KLDivergence metric does not support cpu + half precision")
+    def test_kldivergence_half_cpu(self, reduction, p, q, log_prob):
+        self.run_precision_test_cpu(p, q, KLDivergence, kl_divergence, {"log_prob": log_prob, "reduction": reduction})
+
+    @pytest.mark.skipif(not B.cuda.is_available(), reason="test requires cuda")
+    def test_r2_half_gpu(self, reduction, p, q, log_prob):
+        self.run_precision_test_gpu(p, q, KLDivergence, kl_divergence, {"log_prob": log_prob, "reduction": reduction})
+
+
+def test_error_on_different_shape():
+    metric = KLDivergence()
+    with pytest.raises(RuntimeError, match="Predictions and targets are expected to have the same shape"):
+        metric(B.randn(100), B.randn(50))
+
+
+def test_error_on_multidim_tensors():
+    metric = KLDivergence()
+    with pytest.raises(ValueError, match="Expected both p and q distribution to be 2D but got 3 and 3 respectively"):
+        metric(B.randn(10, 20, 5), B.randn(10, 20, 5))
diff --git a/RE/paddlemetric/src/tests/classification/test_matthews_corrcoef.py b/RE/paddlemetric/src/tests/classification/test_matthews_corrcoef.py
new file mode 100644
index 00000000..ce1a5a90
--- /dev/null
+++ b/RE/paddlemetric/src/tests/classification/test_matthews_corrcoef.py
@@ -0,0 +1,142 @@
+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import numpy as np
+import pytest
+import paddleext.torchapi as B
+from sklearn.metrics import matthews_corrcoef as sk_matthews_corrcoef
+
+from tests.classification.inputs import _input_binary, _input_binary_prob
+from tests.classification.inputs import _input_multiclass as _input_mcls
+from tests.classification.inputs import _input_multiclass_prob as _input_mcls_prob
+from tests.classification.inputs import _input_multidim_multiclass as _input_mdmc
+from tests.classification.inputs import _input_multidim_multiclass_prob as _input_mdmc_prob
+from tests.classification.inputs import _input_multilabel as _input_mlb
+from tests.classification.inputs import _input_multilabel_prob as _input_mlb_prob
+from tests.helpers import seed_all
+from tests.helpers.testers import NUM_CLASSES, THRESHOLD, MetricTester
+from paddlemetrics.classification.matthews_corrcoef import MatthewsCorrcoef
+from paddlemetrics.functional.classification.matthews_corrcoef import matthews_corrcoef
+
+seed_all(42)
+
+
+def _sk_matthews_corrcoef_binary_prob(preds, target):
+    sk_preds = (preds.view(-1).numpy() >= THRESHOLD).astype(np.uint8)
+    sk_target = target.view(-1).numpy()
+
+    return sk_matthews_corrcoef(y_true=sk_target, y_pred=sk_preds)
+
+
+def _sk_matthews_corrcoef_binary(preds, target):
+    sk_preds = preds.view(-1).numpy()
+    sk_target = target.view(-1).numpy()
+
+    return sk_matthews_corrcoef(y_true=sk_target, y_pred=sk_preds)
+
+
+def _sk_matthews_corrcoef_multilabel_prob(preds, target):
+    sk_preds = (preds.view(-1).numpy() >= THRESHOLD).astype(np.uint8)
+    sk_target = target.view(-1).numpy()
+
+    return sk_matthews_corrcoef(y_true=sk_target, y_pred=sk_preds)
+
+
+def _sk_matthews_corrcoef_multilabel(preds, target):
+    sk_preds = preds.view(-1).numpy()
+    sk_target = target.view(-1).numpy()
+
+    return sk_matthews_corrcoef(y_true=sk_target, y_pred=sk_preds)
+
+
+def _sk_matthews_corrcoef_multiclass_prob(preds, target):
+    sk_preds = B.argmax(preds, dim=len(preds.shape) - 1).view(-1).numpy()
+    sk_target = target.view(-1).numpy()
+
+    return sk_matthews_corrcoef(y_true=sk_target, y_pred=sk_preds)
+
+
+def _sk_matthews_corrcoef_multiclass(preds, target):
+    sk_preds = preds.view(-1).numpy()
+    sk_target = target.view(-1).numpy()
+
+    return sk_matthews_corrcoef(y_true=sk_target, y_pred=sk_preds)
+
+
+def _sk_matthews_corrcoef_multidim_multiclass_prob(preds, target):
+    sk_preds = B.argmax(preds, dim=len(preds.shape) - 2).view(-1).numpy()
+    sk_target = target.view(-1).numpy()
+
+    return sk_matthews_corrcoef(y_true=sk_target, y_pred=sk_preds)
+
+
+def _sk_matthews_corrcoef_multidim_multiclass(preds, target):
+    sk_preds = preds.view(-1).numpy()
+    sk_target = target.view(-1).numpy()
+
+    return sk_matthews_corrcoef(y_true=sk_target, y_pred=sk_preds)
+
+
+@pytest.mark.parametrize(
+    "preds, target, sk_metric, num_classes",
+    [
+        (_input_binary_prob.preds, _input_binary_prob.target, _sk_matthews_corrcoef_binary_prob, 2),
+        (_input_binary.preds, _input_binary.target, _sk_matthews_corrcoef_binary, 2),
+        (_input_mlb_prob.preds, _input_mlb_prob.target, _sk_matthews_corrcoef_multilabel_prob, 2),
+        (_input_mlb.preds, _input_mlb.target, _sk_matthews_corrcoef_multilabel, 2),
+        (_input_mcls_prob.preds, _input_mcls_prob.target, _sk_matthews_corrcoef_multiclass_prob, NUM_CLASSES),
+        (_input_mcls.preds, _input_mcls.target, _sk_matthews_corrcoef_multiclass, NUM_CLASSES),
+        (_input_mdmc_prob.preds, _input_mdmc_prob.target, _sk_matthews_corrcoef_multidim_multiclass_prob, NUM_CLASSES),
+        (_input_mdmc.preds, _input_mdmc.target, _sk_matthews_corrcoef_multidim_multiclass, NUM_CLASSES),
+    ],
+)
+class TestMatthewsCorrCoef(MetricTester):
+    @pytest.mark.parametrize("ddp", [True, False])
+    @pytest.mark.parametrize("dist_sync_on_step", [True, False])
+    def test_matthews_corrcoef(self, preds, target, sk_metric, num_classes, ddp, dist_sync_on_step):
+        self.run_class_metric_test(
+            ddp=ddp,
+            preds=preds,
+            target=target,
+            metric_class=MatthewsCorrcoef,
+            sk_metric=sk_metric,
+            dist_sync_on_step=dist_sync_on_step,
+            metric_args={
+                "num_classes": num_classes,
+                "threshold": THRESHOLD,
+            },
+        )
+
+    def test_matthews_corrcoef_functional(self, preds, target, sk_metric, num_classes):
+        self.run_functional_metric_test(
+            preds,
+            target,
+            metric_functional=matthews_corrcoef,
+            sk_metric=sk_metric,
+            metric_args={
+                "num_classes": num_classes,
+                "threshold": THRESHOLD,
+            },
+        )
+
+    def test_matthews_corrcoef_differentiability(self, preds, target, sk_metric, num_classes):
+        self.run_differentiability_test(
+            preds=preds,
+            target=target,
+            metric_module=MatthewsCorrcoef,
+            metric_functional=matthews_corrcoef,
+            metric_args={
+                "num_classes": num_classes,
+                "threshold": THRESHOLD,
+            },
+        )
diff --git a/RE/paddlemetric/src/tests/classification/test_precision_recall.py b/RE/paddlemetric/src/tests/classification/test_precision_recall.py
new file mode 100644
index 00000000..981b44ab
--- /dev/null
+++ b/RE/paddlemetric/src/tests/classification/test_precision_recall.py
@@ -0,0 +1,461 @@
+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from functools import partial
+from typing import Callable, Optional
+
+import numpy as np
+import pytest
+import paddleext.torchapi as B
+from sklearn.metrics import precision_score, recall_score
+from paddleext.torchapi import Tensor, tensor
+
+from tests.classification.inputs import _input_binary, _input_binary_logits, _input_binary_prob
+from tests.classification.inputs import _input_multiclass as _input_mcls
+from tests.classification.inputs import _input_multiclass_logits as _input_mcls_logits
+from tests.classification.inputs import _input_multiclass_prob as _input_mcls_prob
+from tests.classification.inputs import _input_multiclass_with_missing_class as _input_miss_class
+from tests.classification.inputs import _input_multidim_multiclass as _input_mdmc
+from tests.classification.inputs import _input_multidim_multiclass_prob as _input_mdmc_prob
+from tests.classification.inputs import _input_multilabel as _input_mlb
+from tests.classification.inputs import _input_multilabel_logits as _input_mlb_logits
+from tests.classification.inputs import _input_multilabel_prob as _input_mlb_prob
+from tests.helpers import seed_all
+from tests.helpers.testers import NUM_BATCHES, NUM_CLASSES, THRESHOLD, MetricTester
+from paddlemetrics import Metric, Precision, Recall
+from paddlemetrics.functional import precision, precision_recall, recall
+from paddlemetrics.utilities.checks import _input_format_classification
+from paddlemetrics.utilities.enums import AverageMethod
+
+seed_all(42)
+
+
+def _sk_prec_recall(preds, target, sk_fn, num_classes, average, multiclass, ignore_index, mdmc_average=None):
+    # todo: `mdmc_average` is unused
+    if average == "none":
+        average = None
+    if num_classes == 1:
+        average = "binary"
+
+    labels = list(range(num_classes))
+    try:
+        labels.remove(ignore_index)
+    except ValueError:
+        pass
+
+    sk_preds, sk_target, _ = _input_format_classification(
+        preds, target, THRESHOLD, num_classes=num_classes, multiclass=multiclass
+    )
+    sk_preds, sk_target = sk_preds.numpy(), sk_target.numpy()
+
+    sk_scores = sk_fn(sk_target, sk_preds, average=average, zero_division=0, labels=labels)
+
+    if len(labels) != num_classes and not average:
+        sk_scores = np.insert(sk_scores, ignore_index, np.nan)
+
+    return sk_scores
+
+
+def _sk_prec_recall_multidim_multiclass(
+    preds, target, sk_fn, num_classes, average, multiclass, ignore_index, mdmc_average
+):
+    preds, target, _ = _input_format_classification(
+        preds, target, threshold=THRESHOLD, num_classes=num_classes, multiclass=multiclass
+    )
+
+    if mdmc_average == "global":
+        preds = B.transpose(preds, 1, 2).reshape(-1, preds.shape[1])
+        target = B.transpose(target, 1, 2).reshape(-1, target.shape[1])
+
+        return _sk_prec_recall(preds, target, sk_fn, num_classes, average, False, ignore_index)
+    if mdmc_average == "samplewise":
+        scores = []
+
+        for i in range(preds.shape[0]):
+            pred_i = preds[i, ...].T
+            target_i = target[i, ...].T
+            scores_i = _sk_prec_recall(pred_i, target_i, sk_fn, num_classes, average, False, ignore_index)
+
+            scores.append(np.expand_dims(scores_i, 0))
+
+        return np.concatenate(scores).mean(axis=0)
+
+
+@pytest.mark.parametrize("metric, fn_metric", [(Precision, precision), (Recall, recall)])
+@pytest.mark.parametrize(
+    "average, mdmc_average, num_classes, ignore_index, match_str",
+    [
+        ("wrong", None, None, None, "`average`"),
+        ("micro", "wrong", None, None, "`mdmc"),
+        ("macro", None, None, None, "number of classes"),
+        ("macro", None, 1, 0, "ignore_index"),
+    ],
+)
+def test_wrong_params(metric, fn_metric, average, mdmc_average, num_classes, ignore_index, match_str):
+    with pytest.raises(ValueError, match=match_str):
+        metric(
+            average=average,
+            mdmc_average=mdmc_average,
+            num_classes=num_classes,
+            ignore_index=ignore_index,
+        )
+
+    with pytest.raises(ValueError, match=match_str):
+        fn_metric(
+            _input_binary.preds[0],
+            _input_binary.target[0],
+            average=average,
+            mdmc_average=mdmc_average,
+            num_classes=num_classes,
+            ignore_index=ignore_index,
+        )
+
+    with pytest.raises(ValueError, match=match_str):
+        precision_recall(
+            _input_binary.preds[0],
+            _input_binary.target[0],
+            average=average,
+            mdmc_average=mdmc_average,
+            num_classes=num_classes,
+            ignore_index=ignore_index,
+        )
+
+
+@pytest.mark.parametrize("metric_class, metric_fn", [(Recall, recall), (Precision, precision)])
+def test_zero_division(metric_class, metric_fn):
+    """Test that zero_division works correctly (currently should just set to 0)."""
+
+    preds = tensor([0, 2, 1, 1])
+    target = tensor([2, 1, 2, 1])
+
+    cl_metric = metric_class(average="none", num_classes=3)
+    cl_metric(preds, target)
+
+    result_cl = cl_metric.compute()
+    result_fn = metric_fn(preds, target, average="none", num_classes=3)
+
+    assert result_cl[0] == result_fn[0] == 0
+
+
+@pytest.mark.parametrize("metric_class, metric_fn", [(Recall, recall), (Precision, precision)])
+def test_no_support(metric_class, metric_fn):
+    """This tests a rare edge case, where there is only one class present.
+
+    in target, and ignore_index is set to exactly that class - and the
+    average method is equal to 'weighted'.
+
+    This would mean that the sum of weights equals zero, and would, without
+    taking care of this case, return NaN. However, the reduction function
+    should catch that and set the metric to equal the value of zero_division
+    in this case (zero_division is for now not configurable and equals 0).
+    """
+
+    preds = tensor([1, 1, 0, 0])
+    target = tensor([0, 0, 0, 0])
+
+    cl_metric = metric_class(average="weighted", num_classes=2, ignore_index=0)
+    cl_metric(preds, target)
+
+    result_cl = cl_metric.compute()
+    result_fn = metric_fn(preds, target, average="weighted", num_classes=2, ignore_index=0)
+
+    assert result_cl == result_fn == 0
+
+
+@pytest.mark.parametrize(
+    "metric_class, metric_fn, sk_fn", [(Recall, recall, recall_score), (Precision, precision, precision_score)]
+)
+@pytest.mark.parametrize("average", ["micro", "macro", None, "weighted", "samples"])
+@pytest.mark.parametrize("ignore_index", [None, 0])
+@pytest.mark.parametrize(
+    "preds, target, num_classes, multiclass, mdmc_average, sk_wrapper",
+    [
+        (_input_binary_logits.preds, _input_binary_logits.target, 1, None, None, _sk_prec_recall),
+        (_input_binary_prob.preds, _input_binary_prob.target, 1, None, None, _sk_prec_recall),
+        (_input_binary.preds, _input_binary.target, 1, False, None, _sk_prec_recall),
+        (_input_mlb_logits.preds, _input_mlb_logits.target, NUM_CLASSES, None, None, _sk_prec_recall),
+        (_input_mlb_prob.preds, _input_mlb_prob.target, NUM_CLASSES, None, None, _sk_prec_recall),
+        (_input_mlb.preds, _input_mlb.target, NUM_CLASSES, False, None, _sk_prec_recall),
+        (_input_mcls_logits.preds, _input_mcls_logits.target, NUM_CLASSES, None, None, _sk_prec_recall),
+        (_input_mcls_prob.preds, _input_mcls_prob.target, NUM_CLASSES, None, None, _sk_prec_recall),
+        (_input_mcls.preds, _input_mcls.target, NUM_CLASSES, None, None, _sk_prec_recall),
+        (_input_mdmc.preds, _input_mdmc.target, NUM_CLASSES, None, "global", _sk_prec_recall_multidim_multiclass),
+        (
+            _input_mdmc_prob.preds,
+            _input_mdmc_prob.target,
+            NUM_CLASSES,
+            None,
+            "global",
+            _sk_prec_recall_multidim_multiclass,
+        ),
+        (_input_mdmc.preds, _input_mdmc.target, NUM_CLASSES, None, "samplewise", _sk_prec_recall_multidim_multiclass),
+        (
+            _input_mdmc_prob.preds,
+            _input_mdmc_prob.target,
+            NUM_CLASSES,
+            None,
+            "samplewise",
+            _sk_prec_recall_multidim_multiclass,
+        ),
+    ],
+)
+class TestPrecisionRecall(MetricTester):
+    @pytest.mark.parametrize("ddp", [False])
+    @pytest.mark.parametrize("dist_sync_on_step", [False])
+    def test_precision_recall_class(
+        self,
+        ddp: bool,
+        dist_sync_on_step: bool,
+        preds: Tensor,
+        target: Tensor,
+        sk_wrapper: Callable,
+        metric_class: Metric,
+        metric_fn: Callable,
+        sk_fn: Callable,
+        multiclass: Optional[bool],
+        num_classes: Optional[int],
+        average: str,
+        mdmc_average: Optional[str],
+        ignore_index: Optional[int],
+    ):
+        # todo: `metric_fn` is unused
+        if num_classes == 1 and average != "micro":
+            pytest.skip("Only test binary data for 'micro' avg (equivalent of 'binary' in sklearn)")
+
+        if ignore_index is not None and preds.ndim == 2:
+            pytest.skip("Skipping ignore_index test with binary inputs.")
+
+        if average == "weighted" and ignore_index is not None and mdmc_average is not None:
+            pytest.skip("Ignore special case where we are ignoring entire sample for 'weighted' average")
+
+        self.run_class_metric_test(
+            ddp=ddp,
+            preds=preds,
+            target=target,
+            metric_class=metric_class,
+            sk_metric=partial(
+                sk_wrapper,
+                sk_fn=sk_fn,
+                average=average,
+                num_classes=num_classes,
+                multiclass=multiclass,
+                ignore_index=ignore_index,
+                mdmc_average=mdmc_average,
+            ),
+            dist_sync_on_step=dist_sync_on_step,
+            metric_args={
+                "num_classes": num_classes,
+                "average": average,
+                "threshold": THRESHOLD,
+                "multiclass": multiclass,
+                "ignore_index": ignore_index,
+                "mdmc_average": mdmc_average,
+            },
+            check_dist_sync_on_step=True,
+            check_batch=True,
+        )
+
+    def test_precision_recall_fn(
+        self,
+        preds: Tensor,
+        target: Tensor,
+        sk_wrapper: Callable,
+        metric_class: Metric,
+        metric_fn: Callable,
+        sk_fn: Callable,
+        multiclass: Optional[bool],
+        num_classes: Optional[int],
+        average: str,
+        mdmc_average: Optional[str],
+        ignore_index: Optional[int],
+    ):
+        # todo: `metric_class` is unused
+        if num_classes == 1 and average != "micro":
+            pytest.skip("Only test binary data for 'micro' avg (equivalent of 'binary' in sklearn)")
+
+        if ignore_index is not None and preds.ndim == 2:
+            pytest.skip("Skipping ignore_index test with binary inputs.")
+
+        if average == "weighted" and ignore_index is not None and mdmc_average is not None:
+            pytest.skip("Ignore special case where we are ignoring entire sample for 'weighted' average")
+
+        self.run_functional_metric_test(
+            preds,
+            target,
+            metric_functional=metric_fn,
+            sk_metric=partial(
+                sk_wrapper,
+                sk_fn=sk_fn,
+                average=average,
+                num_classes=num_classes,
+                multiclass=multiclass,
+                ignore_index=ignore_index,
+                mdmc_average=mdmc_average,
+            ),
+            metric_args={
+                "num_classes": num_classes,
+                "average": average,
+                "threshold": THRESHOLD,
+                "multiclass": multiclass,
+                "ignore_index": ignore_index,
+                "mdmc_average": mdmc_average,
+            },
+        )
+
+    def test_precision_recall_differentiability(
+        self,
+        preds: Tensor,
+        target: Tensor,
+        sk_wrapper: Callable,
+        metric_class: Metric,
+        metric_fn: Callable,
+        sk_fn: Callable,
+        multiclass: Optional[bool],
+        num_classes: Optional[int],
+        average: str,
+        mdmc_average: Optional[str],
+        ignore_index: Optional[int],
+    ):
+        # todo: `metric_class` is unused
+        if num_classes == 1 and average != "micro":
+            pytest.skip("Only test binary data for 'micro' avg (equivalent of 'binary' in sklearn)")
+
+        if ignore_index is not None and preds.ndim == 2:
+            pytest.skip("Skipping ignore_index test with binary inputs.")
+
+        if average == "weighted" and ignore_index is not None and mdmc_average is not None:
+            pytest.skip("Ignore special case where we are ignoring entire sample for 'weighted' average")
+
+        self.run_differentiability_test(
+            preds=preds,
+            target=target,
+            metric_module=metric_class,
+            metric_functional=metric_fn,
+            metric_args={
+                "num_classes": num_classes,
+                "average": average,
+                "threshold": THRESHOLD,
+                "multiclass": multiclass,
+                "ignore_index": ignore_index,
+                "mdmc_average": mdmc_average,
+            },
+        )
+
+
+@pytest.mark.parametrize("average", ["micro", "macro", None, "weighted", "samples"])
+def test_precision_recall_joint(average):
+    """A simple test of the joint precision_recall metric.
+
+    No need to test this thorougly, as it is just a combination of precision and recall, which are already tested
+    thoroughly.
+    """
+
+    precision_result = precision(
+        _input_mcls_prob.preds[0], _input_mcls_prob.target[0], average=average, num_classes=NUM_CLASSES
+    )
+    recall_result = recall(
+        _input_mcls_prob.preds[0], _input_mcls_prob.target[0], average=average, num_classes=NUM_CLASSES
+    )
+
+    prec_recall_result = precision_recall(
+        _input_mcls_prob.preds[0], _input_mcls_prob.target[0], average=average, num_classes=NUM_CLASSES
+    )
+
+    assert B.allclose(precision_result, prec_recall_result[0])
+    assert B.allclose(recall_result, prec_recall_result[1])
+
+
+_mc_k_target = tensor([0, 1, 2])
+_mc_k_preds = tensor([[0.35, 0.4, 0.25], [0.1, 0.5, 0.4], [0.2, 0.1, 0.7]])
+_ml_k_target = tensor([[0, 1, 0], [1, 1, 0], [0, 0, 0]])
+_ml_k_preds = tensor([[0.9, 0.2, 0.75], [0.1, 0.7, 0.8], [0.6, 0.1, 0.7]])
+
+
+@pytest.mark.parametrize("metric_class, metric_fn", [(Recall, recall), (Precision, precision)])
+@pytest.mark.parametrize(
+    "k, preds, target, average, expected_prec, expected_recall",
+    [
+        (1, _mc_k_preds, _mc_k_target, "micro", tensor(2 / 3), tensor(2 / 3)),
+        (2, _mc_k_preds, _mc_k_target, "micro", tensor(1 / 2), tensor(1.0)),
+        (1, _ml_k_preds, _ml_k_target, "micro", tensor(0.0), tensor(0.0)),
+        (2, _ml_k_preds, _ml_k_target, "micro", tensor(1 / 6), tensor(1 / 3)),
+    ],
+)
+def test_top_k(
+    metric_class,
+    metric_fn,
+    k: int,
+    preds: Tensor,
+    target: Tensor,
+    average: str,
+    expected_prec: Tensor,
+    expected_recall: Tensor,
+):
+    """A simple test to check that top_k works as expected.
+
+    Just a sanity check, the tests in StatScores should already guarantee the correctness of results.
+    """
+
+    class_metric = metric_class(top_k=k, average=average, num_classes=3)
+    class_metric.update(preds, target)
+
+    if metric_class.__name__ == "Precision":
+        result = expected_prec
+    else:
+        result = expected_recall
+
+    assert B.equal(class_metric.compute(), result)
+    assert B.equal(metric_fn(preds, target, top_k=k, average=average, num_classes=3), result)
+
+
+@pytest.mark.parametrize("metric_class, metric_fn", [(Precision, precision), (Recall, recall)])
+@pytest.mark.parametrize(
+    "ignore_index, expected", [(None, B.tensor([1.0, np.nan])), (0, B.tensor([np.nan, np.nan]))]
+)
+def test_class_not_present(metric_class, metric_fn, ignore_index, expected):
+    """This tests that when metric is computed per class and a given class is not present in both the `preds` and
+    `target`, the resulting score is `nan`."""
+    preds = B.tensor([0, 0, 0])
+    target = B.tensor([0, 0, 0])
+    num_classes = 2
+
+    # test functional
+    result_fn = metric_fn(preds, target, average=AverageMethod.NONE, num_classes=num_classes, ignore_index=ignore_index)
+    assert B.allclose(expected, result_fn, equal_nan=True)
+
+    # test class
+    cl_metric = metric_class(average=AverageMethod.NONE, num_classes=num_classes, ignore_index=ignore_index)
+    cl_metric(preds, target)
+    result_cl = cl_metric.compute()
+    assert B.allclose(expected, result_cl, equal_nan=True)
+
+
+@pytest.mark.parametrize("average", ["micro", "macro", "weighted"])
+@pytest.mark.parametrize(
+    "metric_class, metric_functional, sk_fn", [(Precision, precision, precision_score), (Recall, recall, recall_score)]
+)
+def test_same_input(metric_class, metric_functional, sk_fn, average):
+    preds = _input_miss_class.preds
+    target = _input_miss_class.target
+    preds_flat = B.cat(list(preds), dim=0)
+    target_flat = B.cat(list(target), dim=0)
+
+    mc = metric_class(num_classes=NUM_CLASSES, average=average)
+    for i in range(NUM_BATCHES):
+        mc.update(preds[i], target[i])
+    class_res = mc.compute()
+    func_res = metric_functional(preds_flat, target_flat, num_classes=NUM_CLASSES, average=average)
+    sk_res = sk_fn(target_flat, preds_flat, average=average, zero_division=1)
+
+    assert B.allclose(class_res, B.tensor(sk_res).float())
+    assert B.allclose(func_res, B.tensor(sk_res).float())
diff --git a/RE/paddlemetric/src/tests/classification/test_precision_recall_curve.py b/RE/paddlemetric/src/tests/classification/test_precision_recall_curve.py
new file mode 100644
index 00000000..acd555ca
--- /dev/null
+++ b/RE/paddlemetric/src/tests/classification/test_precision_recall_curve.py
@@ -0,0 +1,121 @@
+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from functools import partial
+
+import numpy as np
+import pytest
+import paddleext.torchapi as B
+from sklearn.metrics import precision_recall_curve as sk_precision_recall_curve
+from paddleext.torchapi import tensor
+
+from tests.classification.inputs import _input_binary_prob
+from tests.classification.inputs import _input_multiclass_prob as _input_mcls_prob
+from tests.classification.inputs import _input_multidim_multiclass_prob as _input_mdmc_prob
+from tests.helpers import seed_all
+from tests.helpers.testers import NUM_CLASSES, MetricTester
+from paddlemetrics.classification.precision_recall_curve import PrecisionRecallCurve
+from paddlemetrics.functional import precision_recall_curve
+
+seed_all(42)
+
+
+def _sk_precision_recall_curve(y_true, probas_pred, num_classes=1):
+    """Adjusted comparison function that can also handles multiclass."""
+    if num_classes == 1:
+        return sk_precision_recall_curve(y_true, probas_pred)
+
+    precision, recall, thresholds = [], [], []
+    for i in range(num_classes):
+        y_true_temp = np.zeros_like(y_true)
+        y_true_temp[y_true == i] = 1
+        res = sk_precision_recall_curve(y_true_temp, probas_pred[:, i])
+        precision.append(res[0])
+        recall.append(res[1])
+        thresholds.append(res[2])
+    return precision, recall, thresholds
+
+
+def _sk_prec_rc_binary_prob(preds, target, num_classes=1):
+    sk_preds = preds.view(-1).numpy()
+    sk_target = target.view(-1).numpy()
+
+    return _sk_precision_recall_curve(y_true=sk_target, probas_pred=sk_preds, num_classes=num_classes)
+
+
+def _sk_prec_rc_multiclass_prob(preds, target, num_classes=1):
+    sk_preds = preds.reshape(-1, num_classes).numpy()
+    sk_target = target.view(-1).numpy()
+
+    return _sk_precision_recall_curve(y_true=sk_target, probas_pred=sk_preds, num_classes=num_classes)
+
+
+def _sk_prec_rc_multidim_multiclass_prob(preds, target, num_classes=1):
+    sk_preds = preds.transpose(0, 1).reshape(num_classes, -1).transpose(0, 1).numpy()
+    sk_target = target.view(-1).numpy()
+    return _sk_precision_recall_curve(y_true=sk_target, probas_pred=sk_preds, num_classes=num_classes)
+
+
+@pytest.mark.parametrize(
+    "preds, target, sk_metric, num_classes",
+    [
+        (_input_binary_prob.preds, _input_binary_prob.target, _sk_prec_rc_binary_prob, 1),
+        (_input_mcls_prob.preds, _input_mcls_prob.target, _sk_prec_rc_multiclass_prob, NUM_CLASSES),
+        (_input_mdmc_prob.preds, _input_mdmc_prob.target, _sk_prec_rc_multidim_multiclass_prob, NUM_CLASSES),
+    ],
+)
+class TestPrecisionRecallCurve(MetricTester):
+    @pytest.mark.parametrize("ddp", [True, False])
+    @pytest.mark.parametrize("dist_sync_on_step", [True, False])
+    def test_precision_recall_curve(self, preds, target, sk_metric, num_classes, ddp, dist_sync_on_step):
+        self.run_class_metric_test(
+            ddp=ddp,
+            preds=preds,
+            target=target,
+            metric_class=PrecisionRecallCurve,
+            sk_metric=partial(sk_metric, num_classes=num_classes),
+            dist_sync_on_step=dist_sync_on_step,
+            metric_args={"num_classes": num_classes},
+        )
+
+    def test_precision_recall_curve_functional(self, preds, target, sk_metric, num_classes):
+        self.run_functional_metric_test(
+            preds,
+            target,
+            metric_functional=precision_recall_curve,
+            sk_metric=partial(sk_metric, num_classes=num_classes),
+            metric_args={"num_classes": num_classes},
+        )
+
+    def test_precision_recall_curve_differentiability(self, preds, target, sk_metric, num_classes):
+        self.run_differentiability_test(
+            preds,
+            target,
+            metric_module=PrecisionRecallCurve,
+            metric_functional=precision_recall_curve,
+            metric_args={"num_classes": num_classes},
+        )
+
+
+@pytest.mark.parametrize(
+    ["pred", "target", "expected_p", "expected_r", "expected_t"],
+    [pytest.param([1, 2, 3, 4], [1, 0, 0, 1], [0.5, 1 / 3, 0.5, 1.0, 1.0], [1, 0.5, 0.5, 0.5, 0.0], [1, 2, 3, 4])],
+)
+def test_pr_curve(pred, target, expected_p, expected_r, expected_t):
+    p, r, t = precision_recall_curve(tensor(pred), tensor(target))
+    assert p.size() == r.size()
+    assert p.size(0) == t.size(0) + 1
+
+    assert B.allclose(p, tensor(expected_p).to(p))
+    assert B.allclose(r, tensor(expected_r).to(r))
+    assert B.allclose(t, tensor(expected_t).to(t))
diff --git a/RE/paddlemetric/src/tests/classification/test_roc.py b/RE/paddlemetric/src/tests/classification/test_roc.py
new file mode 100644
index 00000000..efe45335
--- /dev/null
+++ b/RE/paddlemetric/src/tests/classification/test_roc.py
@@ -0,0 +1,146 @@
+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from functools import partial
+
+import numpy as np
+import pytest
+import paddleext.torchapi as B
+from sklearn.metrics import roc_curve as sk_roc_curve
+from paddleext.torchapi import tensor
+
+from tests.classification.inputs import _input_binary_prob
+from tests.classification.inputs import _input_multiclass_prob as _input_mcls_prob
+from tests.classification.inputs import _input_multidim_multiclass_prob as _input_mdmc_prob
+from tests.classification.inputs import _input_multilabel_multidim_prob as _input_mlmd_prob
+from tests.classification.inputs import _input_multilabel_prob as _input_mlb_prob
+from tests.helpers import seed_all
+from tests.helpers.testers import NUM_CLASSES, MetricTester
+from paddlemetrics.classification.roc import ROC
+from paddlemetrics.functional import roc
+
+seed_all(42)
+
+
+def _sk_roc_curve(y_true, probas_pred, num_classes: int = 1, multilabel: bool = False):
+    """Adjusted comparison function that can also handles multiclass."""
+    if num_classes == 1:
+        return sk_roc_curve(y_true, probas_pred, drop_intermediate=False)
+
+    fpr, tpr, thresholds = [], [], []
+    for i in range(num_classes):
+        if multilabel:
+            y_true_temp = y_true[:, i]
+        else:
+            y_true_temp = np.zeros_like(y_true)
+            y_true_temp[y_true == i] = 1
+
+        res = sk_roc_curve(y_true_temp, probas_pred[:, i], drop_intermediate=False)
+        fpr.append(res[0])
+        tpr.append(res[1])
+        thresholds.append(res[2])
+    return fpr, tpr, thresholds
+
+
+def _sk_roc_binary_prob(preds, target, num_classes=1):
+    sk_preds = preds.view(-1).numpy()
+    sk_target = target.view(-1).numpy()
+
+    return _sk_roc_curve(y_true=sk_target, probas_pred=sk_preds, num_classes=num_classes)
+
+
+def _sk_roc_multiclass_prob(preds, target, num_classes=1):
+    sk_preds = preds.reshape(-1, num_classes).numpy()
+    sk_target = target.view(-1).numpy()
+
+    return _sk_roc_curve(y_true=sk_target, probas_pred=sk_preds, num_classes=num_classes)
+
+
+def _sk_roc_multidim_multiclass_prob(preds, target, num_classes=1):
+    sk_preds = preds.transpose(0, 1).reshape(num_classes, -1).transpose(0, 1).numpy()
+    sk_target = target.view(-1).numpy()
+    return _sk_roc_curve(y_true=sk_target, probas_pred=sk_preds, num_classes=num_classes)
+
+
+def _sk_roc_multilabel_prob(preds, target, num_classes=1):
+    sk_preds = preds.numpy()
+    sk_target = target.numpy()
+    return _sk_roc_curve(y_true=sk_target, probas_pred=sk_preds, num_classes=num_classes, multilabel=True)
+
+
+def _sk_roc_multilabel_multidim_prob(preds, target, num_classes=1):
+    sk_preds = preds.transpose(0, 1).reshape(num_classes, -1).transpose(0, 1).numpy()
+    sk_target = target.transpose(0, 1).reshape(num_classes, -1).transpose(0, 1).numpy()
+    return _sk_roc_curve(y_true=sk_target, probas_pred=sk_preds, num_classes=num_classes, multilabel=True)
+
+
+@pytest.mark.parametrize(
+    "preds, target, sk_metric, num_classes",
+    [
+        (_input_binary_prob.preds, _input_binary_prob.target, _sk_roc_binary_prob, 1),
+        (_input_mcls_prob.preds, _input_mcls_prob.target, _sk_roc_multiclass_prob, NUM_CLASSES),
+        (_input_mdmc_prob.preds, _input_mdmc_prob.target, _sk_roc_multidim_multiclass_prob, NUM_CLASSES),
+        (_input_mlb_prob.preds, _input_mlb_prob.target, _sk_roc_multilabel_prob, NUM_CLASSES),
+        (_input_mlmd_prob.preds, _input_mlmd_prob.target, _sk_roc_multilabel_multidim_prob, NUM_CLASSES),
+    ],
+)
+class TestROC(MetricTester):
+    @pytest.mark.parametrize("ddp", [True, False])
+    @pytest.mark.parametrize("dist_sync_on_step", [True, False])
+    def test_roc(self, preds, target, sk_metric, num_classes, ddp, dist_sync_on_step):
+        self.run_class_metric_test(
+            ddp=ddp,
+            preds=preds,
+            target=target,
+            metric_class=ROC,
+            sk_metric=partial(sk_metric, num_classes=num_classes),
+            dist_sync_on_step=dist_sync_on_step,
+            metric_args={"num_classes": num_classes},
+        )
+
+    def test_roc_functional(self, preds, target, sk_metric, num_classes):
+        self.run_functional_metric_test(
+            preds,
+            target,
+            metric_functional=roc,
+            sk_metric=partial(sk_metric, num_classes=num_classes),
+            metric_args={"num_classes": num_classes},
+        )
+
+    def test_roc_differentiability(self, preds, target, sk_metric, num_classes):
+        self.run_differentiability_test(
+            preds,
+            target,
+            metric_module=ROC,
+            metric_functional=roc,
+            metric_args={"num_classes": num_classes},
+        )
+
+
+@pytest.mark.parametrize(
+    ["pred", "target", "expected_tpr", "expected_fpr"],
+    [
+        pytest.param([0, 1], [0, 1], [0, 1, 1], [0, 0, 1]),
+        pytest.param([1, 0], [0, 1], [0, 0, 1], [0, 1, 1]),
+        pytest.param([1, 1], [1, 0], [0, 1], [0, 1]),
+        pytest.param([1, 0], [1, 0], [0, 1, 1], [0, 0, 1]),
+        pytest.param([0.5, 0.5], [0, 1], [0, 1], [0, 1]),
+    ],
+)
+def test_roc_curve(pred, target, expected_tpr, expected_fpr):
+    fpr, tpr, thresh = roc(tensor(pred), tensor(target))
+
+    assert fpr.shape == tpr.shape
+    assert fpr.size(0) == thresh.size(0)
+    assert B.allclose(fpr, tensor(expected_fpr).to(fpr))
+    assert B.allclose(tpr, tensor(expected_tpr).to(tpr))
diff --git a/RE/paddlemetric/src/tests/classification/test_specificity.py b/RE/paddlemetric/src/tests/classification/test_specificity.py
new file mode 100644
index 00000000..90611d06
--- /dev/null
+++ b/RE/paddlemetric/src/tests/classification/test_specificity.py
@@ -0,0 +1,414 @@
+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import math
+from functools import partial
+from typing import Callable, Optional
+
+import numpy as np
+import pytest
+import paddleext.torchapi as B
+from sklearn.metrics import multilabel_confusion_matrix
+from paddleext.torchapi import Tensor, tensor
+
+from tests.classification.inputs import _input_binary, _input_binary_prob
+from tests.classification.inputs import _input_multiclass as _input_mcls
+from tests.classification.inputs import _input_multiclass_prob as _input_mcls_prob
+from tests.classification.inputs import _input_multidim_multiclass as _input_mdmc
+from tests.classification.inputs import _input_multidim_multiclass_prob as _input_mdmc_prob
+from tests.classification.inputs import _input_multilabel as _input_mlb
+from tests.classification.inputs import _input_multilabel_prob as _input_mlb_prob
+from tests.helpers import seed_all
+from tests.helpers.testers import NUM_CLASSES, THRESHOLD, MetricTester
+from paddlemetrics import Metric, Specificity
+from paddlemetrics.functional import specificity
+from paddlemetrics.functional.classification.stat_scores import _reduce_stat_scores
+from paddlemetrics.utilities.checks import _input_format_classification
+from paddlemetrics.utilities.enums import AverageMethod
+
+seed_all(42)
+
+
+def _sk_stats_score(preds, target, reduce, num_classes, multiclass, ignore_index, top_k):
+    preds, target, _ = _input_format_classification(
+        preds, target, threshold=THRESHOLD, num_classes=num_classes, multiclass=multiclass, top_k=top_k
+    )
+    sk_preds, sk_target = preds.numpy(), target.numpy()
+
+    if reduce != "macro" and ignore_index is not None and preds.shape[1] > 1:
+        sk_preds = np.delete(sk_preds, ignore_index, 1)
+        sk_target = np.delete(sk_target, ignore_index, 1)
+
+    if preds.shape[1] == 1 and reduce == "samples":
+        sk_target = sk_target.T
+        sk_preds = sk_preds.T
+
+    sk_stats = multilabel_confusion_matrix(
+        sk_target, sk_preds, samplewise=(reduce == "samples") and preds.shape[1] != 1
+    )
+
+    if preds.shape[1] == 1 and reduce != "samples":
+        sk_stats = sk_stats[[1]].reshape(-1, 4)[:, [3, 1, 0, 2]]
+    else:
+        sk_stats = sk_stats.reshape(-1, 4)[:, [3, 1, 0, 2]]
+
+    if reduce == "micro":
+        sk_stats = sk_stats.sum(axis=0, keepdims=True)
+
+    sk_stats = np.concatenate([sk_stats, sk_stats[:, [3]] + sk_stats[:, [0]]], 1)
+
+    if reduce == "micro":
+        sk_stats = sk_stats[0]
+
+    if reduce == "macro" and ignore_index is not None and preds.shape[1]:
+        sk_stats[ignore_index, :] = -1
+
+    if reduce == "micro":
+        _, fp, tn, _, _ = sk_stats
+    else:
+        _, fp, tn, _ = sk_stats[:, 0], sk_stats[:, 1], sk_stats[:, 2], sk_stats[:, 3]
+    return fp, tn
+
+
+def _sk_spec(preds, target, reduce, num_classes, multiclass, ignore_index, top_k=None, mdmc_reduce=None, stats=None):
+
+    if stats:
+        fp, tn = stats
+    else:
+        stats = _sk_stats_score(preds, target, reduce, num_classes, multiclass, ignore_index, top_k)
+        fp, tn = stats
+
+    fp, tn = tensor(fp), tensor(tn)
+    spec = _reduce_stat_scores(
+        numerator=tn,
+        denominator=tn + fp,
+        weights=None if reduce != "weighted" else tn + fp,
+        average=reduce,
+        mdmc_average=mdmc_reduce,
+    )
+    if reduce in [None, "none"] and ignore_index is not None and preds.shape[1] > 1:
+        spec = spec.numpy()
+        spec = np.insert(spec, ignore_index, math.nan)
+        spec = tensor(spec)
+
+    return spec
+
+
+def _sk_spec_mdim_mcls(preds, target, reduce, mdmc_reduce, num_classes, multiclass, ignore_index, top_k=None):
+    preds, target, _ = _input_format_classification(
+        preds, target, threshold=THRESHOLD, num_classes=num_classes, multiclass=multiclass, top_k=top_k
+    )
+
+    if mdmc_reduce == "global":
+        preds = B.transpose(preds, 1, 2).reshape(-1, preds.shape[1])
+        target = B.transpose(target, 1, 2).reshape(-1, target.shape[1])
+        return _sk_spec(preds, target, reduce, num_classes, False, ignore_index, top_k, mdmc_reduce)
+    fp, tn = [], []
+    stats = []
+
+    for i in range(preds.shape[0]):
+        pred_i = preds[i, ...].T
+        target_i = target[i, ...].T
+        fp_i, tn_i = _sk_stats_score(pred_i, target_i, reduce, num_classes, False, ignore_index, top_k)
+        fp.append(fp_i)
+        tn.append(tn_i)
+
+    stats.append(fp)
+    stats.append(tn)
+    return _sk_spec(preds[0], target[0], reduce, num_classes, multiclass, ignore_index, top_k, mdmc_reduce, stats)
+
+
+@pytest.mark.parametrize("metric, fn_metric", [(Specificity, specificity)])
+@pytest.mark.parametrize(
+    "average, mdmc_average, num_classes, ignore_index, match_str",
+    [
+        ("wrong", None, None, None, "`average`"),
+        ("micro", "wrong", None, None, "`mdmc"),
+        ("macro", None, None, None, "number of classes"),
+        ("macro", None, 1, 0, "ignore_index"),
+    ],
+)
+def test_wrong_params(metric, fn_metric, average, mdmc_average, num_classes, ignore_index, match_str):
+    with pytest.raises(ValueError, match=match_str):
+        metric(
+            average=average,
+            mdmc_average=mdmc_average,
+            num_classes=num_classes,
+            ignore_index=ignore_index,
+        )
+
+    with pytest.raises(ValueError, match=match_str):
+        fn_metric(
+            _input_binary.preds[0],
+            _input_binary.target[0],
+            average=average,
+            mdmc_average=mdmc_average,
+            num_classes=num_classes,
+            ignore_index=ignore_index,
+        )
+
+
+@pytest.mark.parametrize("metric_class, metric_fn", [(Specificity, specificity)])
+def test_zero_division(metric_class, metric_fn):
+    """Test that zero_division works correctly (currently should just set to 0)."""
+
+    preds = tensor([1, 2, 1, 1])
+    target = tensor([0, 0, 0, 0])
+
+    cl_metric = metric_class(average="none", num_classes=3)
+    cl_metric(preds, target)
+
+    result_cl = cl_metric.compute()
+    result_fn = metric_fn(preds, target, average="none", num_classes=3)
+
+    assert result_cl[0] == result_fn[0] == 0
+
+
+@pytest.mark.parametrize("metric_class, metric_fn", [(Specificity, specificity)])
+def test_no_support(metric_class, metric_fn):
+    """This tests a rare edge case, where there is only one class present.
+
+    in target, and ignore_index is set to exactly that class - and the
+    average method is equal to 'weighted'.
+
+    This would mean that the sum of weights equals zero, and would, without
+    taking care of this case, return NaN. However, the reduction function
+    should catch that and set the metric to equal the value of zero_division
+    in this case (zero_division is for now not configurable and equals 0).
+    """
+
+    preds = tensor([1, 1, 0, 0])
+    target = tensor([0, 0, 0, 0])
+
+    cl_metric = metric_class(average="weighted", num_classes=2, ignore_index=1)
+    cl_metric(preds, target)
+
+    result_cl = cl_metric.compute()
+    result_fn = metric_fn(preds, target, average="weighted", num_classes=2, ignore_index=1)
+
+    assert result_cl == result_fn == 0
+
+
+@pytest.mark.parametrize("metric_class, metric_fn", [(Specificity, specificity)])
+@pytest.mark.parametrize("average", ["micro", "macro", None, "weighted", "samples"])
+@pytest.mark.parametrize("ignore_index", [None, 0])
+@pytest.mark.parametrize(
+    "preds, target, num_classes, multiclass, mdmc_average, sk_wrapper",
+    [
+        (_input_binary_prob.preds, _input_binary_prob.target, 1, None, None, _sk_spec),
+        (_input_binary.preds, _input_binary.target, 1, False, None, _sk_spec),
+        (_input_mlb_prob.preds, _input_mlb_prob.target, NUM_CLASSES, None, None, _sk_spec),
+        (_input_mlb.preds, _input_mlb.target, NUM_CLASSES, False, None, _sk_spec),
+        (_input_mcls_prob.preds, _input_mcls_prob.target, NUM_CLASSES, None, None, _sk_spec),
+        (_input_mcls.preds, _input_mcls.target, NUM_CLASSES, None, None, _sk_spec),
+        (_input_mdmc.preds, _input_mdmc.target, NUM_CLASSES, None, "global", _sk_spec_mdim_mcls),
+        (_input_mdmc_prob.preds, _input_mdmc_prob.target, NUM_CLASSES, None, "global", _sk_spec_mdim_mcls),
+        (_input_mdmc.preds, _input_mdmc.target, NUM_CLASSES, None, "samplewise", _sk_spec_mdim_mcls),
+        (_input_mdmc_prob.preds, _input_mdmc_prob.target, NUM_CLASSES, None, "samplewise", _sk_spec_mdim_mcls),
+    ],
+)
+class TestSpecificity(MetricTester):
+    @pytest.mark.parametrize("ddp", [False, True])
+    @pytest.mark.parametrize("dist_sync_on_step", [True, False])
+    def test_specificity_class(
+        self,
+        ddp: bool,
+        dist_sync_on_step: bool,
+        preds: Tensor,
+        target: Tensor,
+        sk_wrapper: Callable,
+        metric_class: Metric,
+        metric_fn: Callable,
+        multiclass: Optional[bool],
+        num_classes: Optional[int],
+        average: str,
+        mdmc_average: Optional[str],
+        ignore_index: Optional[int],
+    ):
+        # todo: `metric_fn` is unused
+        if num_classes == 1 and average != "micro":
+            pytest.skip("Only test binary data for 'micro' avg (equivalent of 'binary' in sklearn)")
+
+        if ignore_index is not None and preds.ndim == 2:
+            pytest.skip("Skipping ignore_index test with binary inputs.")
+
+        if average == "weighted" and ignore_index is not None and mdmc_average is not None:
+            pytest.skip("Ignore special case where we are ignoring entire sample for 'weighted' average")
+
+        self.run_class_metric_test(
+            ddp=ddp,
+            preds=preds,
+            target=target,
+            metric_class=metric_class,
+            sk_metric=partial(
+                sk_wrapper,
+                reduce=average,
+                num_classes=num_classes,
+                multiclass=multiclass,
+                ignore_index=ignore_index,
+                mdmc_reduce=mdmc_average,
+            ),
+            dist_sync_on_step=dist_sync_on_step,
+            metric_args={
+                "num_classes": num_classes,
+                "average": average,
+                "threshold": THRESHOLD,
+                "multiclass": multiclass,
+                "ignore_index": ignore_index,
+                "mdmc_average": mdmc_average,
+            },
+            check_dist_sync_on_step=True,
+            check_batch=True,
+        )
+
+    def test_specificity_fn(
+        self,
+        preds: Tensor,
+        target: Tensor,
+        sk_wrapper: Callable,
+        metric_class: Metric,
+        metric_fn: Callable,
+        multiclass: Optional[bool],
+        num_classes: Optional[int],
+        average: str,
+        mdmc_average: Optional[str],
+        ignore_index: Optional[int],
+    ):
+        # todo: `metric_class` is unused
+        if num_classes == 1 and average != "micro":
+            pytest.skip("Only test binary data for 'micro' avg (equivalent of 'binary' in sklearn)")
+
+        if ignore_index is not None and preds.ndim == 2:
+            pytest.skip("Skipping ignore_index test with binary inputs.")
+
+        if average == "weighted" and ignore_index is not None and mdmc_average is not None:
+            pytest.skip("Ignore special case where we are ignoring entire sample for 'weighted' average")
+
+        self.run_functional_metric_test(
+            preds,
+            target,
+            metric_functional=metric_fn,
+            sk_metric=partial(
+                sk_wrapper,
+                reduce=average,
+                num_classes=num_classes,
+                multiclass=multiclass,
+                ignore_index=ignore_index,
+                mdmc_reduce=mdmc_average,
+            ),
+            metric_args={
+                "num_classes": num_classes,
+                "average": average,
+                "threshold": THRESHOLD,
+                "multiclass": multiclass,
+                "ignore_index": ignore_index,
+                "mdmc_average": mdmc_average,
+            },
+        )
+
+    def test_accuracy_differentiability(
+        self,
+        preds: Tensor,
+        target: Tensor,
+        sk_wrapper: Callable,
+        metric_class: Metric,
+        metric_fn: Callable,
+        multiclass: Optional[bool],
+        num_classes: Optional[int],
+        average: str,
+        mdmc_average: Optional[str],
+        ignore_index: Optional[int],
+    ):
+
+        if num_classes == 1 and average != "micro":
+            pytest.skip("Only test binary data for 'micro' avg (equivalent of 'binary' in sklearn)")
+
+        if ignore_index is not None and preds.ndim == 2:
+            pytest.skip("Skipping ignore_index test with binary inputs.")
+
+        if average == "weighted" and ignore_index is not None and mdmc_average is not None:
+            pytest.skip("Ignore special case where we are ignoring entire sample for 'weighted' average")
+
+        self.run_differentiability_test(
+            preds=preds,
+            target=target,
+            metric_module=metric_class,
+            metric_functional=metric_fn,
+            metric_args={
+                "num_classes": num_classes,
+                "average": average,
+                "threshold": THRESHOLD,
+                "multiclass": multiclass,
+                "ignore_index": ignore_index,
+                "mdmc_average": mdmc_average,
+            },
+        )
+
+
+_mc_k_target = tensor([0, 1, 2])
+_mc_k_preds = tensor([[0.35, 0.4, 0.25], [0.1, 0.5, 0.4], [0.2, 0.1, 0.7]])
+_ml_k_target = tensor([[0, 1, 0], [1, 1, 0], [0, 0, 0]])
+_ml_k_preds = tensor([[0.9, 0.2, 0.75], [0.1, 0.7, 0.8], [0.6, 0.1, 0.7]])
+
+
+@pytest.mark.parametrize("metric_class, metric_fn", [(Specificity, specificity)])
+@pytest.mark.parametrize(
+    "k, preds, target, average, expected_spec",
+    [
+        (1, _mc_k_preds, _mc_k_target, "micro", tensor(5 / 6)),
+        (2, _mc_k_preds, _mc_k_target, "micro", tensor(1 / 2)),
+        (1, _ml_k_preds, _ml_k_target, "micro", tensor(1 / 2)),
+        (2, _ml_k_preds, _ml_k_target, "micro", tensor(1 / 6)),
+    ],
+)
+def test_top_k(
+    metric_class,
+    metric_fn,
+    k: int,
+    preds: Tensor,
+    target: Tensor,
+    average: str,
+    expected_spec: Tensor,
+):
+    """A simple test to check that top_k works as expected.
+
+    Just a sanity check, the tests in StatScores should already guarantee the correctness of results.
+    """
+
+    class_metric = metric_class(top_k=k, average=average, num_classes=3)
+    class_metric.update(preds, target)
+
+    assert B.equal(class_metric.compute(), expected_spec)
+    assert B.equal(metric_fn(preds, target, top_k=k, average=average, num_classes=3), expected_spec)
+
+
+@pytest.mark.parametrize("metric_class, metric_fn", [(Specificity, specificity)])
+@pytest.mark.parametrize(
+    "ignore_index, expected", [(None, B.tensor([0.0, np.nan])), (0, B.tensor([np.nan, np.nan]))]
+)
+def test_class_not_present(metric_class, metric_fn, ignore_index, expected):
+    """This tests that when metric is computed per class and a given class is not present in both the `preds` and
+    `target`, the resulting score is `nan`."""
+    preds = B.tensor([0, 0, 0])
+    target = B.tensor([0, 0, 0])
+    num_classes = 2
+
+    # test functional
+    result_fn = metric_fn(preds, target, average=AverageMethod.NONE, num_classes=num_classes, ignore_index=ignore_index)
+    assert B.allclose(expected, result_fn, equal_nan=True)
+
+    # test class
+    cl_metric = metric_class(average=AverageMethod.NONE, num_classes=num_classes, ignore_index=ignore_index)
+    cl_metric(preds, target)
+    result_cl = cl_metric.compute()
+    assert B.allclose(expected, result_cl, equal_nan=True)
diff --git a/RE/paddlemetric/src/tests/classification/test_stat_scores.py b/RE/paddlemetric/src/tests/classification/test_stat_scores.py
new file mode 100644
index 00000000..c0e2656c
--- /dev/null
+++ b/RE/paddlemetric/src/tests/classification/test_stat_scores.py
@@ -0,0 +1,323 @@
+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from functools import partial
+from typing import Callable, Optional
+
+import numpy as np
+import pytest
+import paddleext.torchapi as B
+from sklearn.metrics import multilabel_confusion_matrix
+from paddleext.torchapi import Tensor, tensor
+
+from tests.classification.inputs import _input_binary, _input_binary_logits, _input_binary_prob, _input_multiclass
+from tests.classification.inputs import _input_multiclass_logits as _input_mcls_logits
+from tests.classification.inputs import _input_multiclass_prob as _input_mcls_prob
+from tests.classification.inputs import _input_multidim_multiclass as _input_mdmc
+from tests.classification.inputs import _input_multidim_multiclass_prob as _input_mdmc_prob
+from tests.classification.inputs import _input_multilabel as _input_mcls
+from tests.classification.inputs import _input_multilabel_logits as _input_mlb_logits
+from tests.classification.inputs import _input_multilabel_prob as _input_mlb_prob
+from tests.helpers import seed_all
+from tests.helpers.testers import NUM_CLASSES, MetricTester
+from paddlemetrics import StatScores
+from paddlemetrics.functional import stat_scores
+from paddlemetrics.utilities.checks import _input_format_classification
+
+seed_all(42)
+
+
+def _sk_stat_scores(preds, target, reduce, num_classes, multiclass, ignore_index, top_k, threshold, mdmc_reduce=None):
+    # todo: `mdmc_reduce` is unused
+    preds, target, _ = _input_format_classification(
+        preds, target, threshold=threshold, num_classes=num_classes, multiclass=multiclass, top_k=top_k
+    )
+    sk_preds, sk_target = preds.numpy(), target.numpy()
+
+    if reduce != "macro" and ignore_index is not None and preds.shape[1] > 1:
+        sk_preds = np.delete(sk_preds, ignore_index, 1)
+        sk_target = np.delete(sk_target, ignore_index, 1)
+
+    if preds.shape[1] == 1 and reduce == "samples":
+        sk_target = sk_target.T
+        sk_preds = sk_preds.T
+
+    sk_stats = multilabel_confusion_matrix(
+        sk_target, sk_preds, samplewise=(reduce == "samples") and preds.shape[1] != 1
+    )
+
+    if preds.shape[1] == 1 and reduce != "samples":
+        sk_stats = sk_stats[[1]].reshape(-1, 4)[:, [3, 1, 0, 2]]
+    else:
+        sk_stats = sk_stats.reshape(-1, 4)[:, [3, 1, 0, 2]]
+
+    if reduce == "micro":
+        sk_stats = sk_stats.sum(axis=0, keepdims=True)
+
+    sk_stats = np.concatenate([sk_stats, sk_stats[:, [3]] + sk_stats[:, [0]]], 1)
+
+    if reduce == "micro":
+        sk_stats = sk_stats[0]
+
+    if reduce == "macro" and ignore_index is not None and preds.shape[1]:
+        sk_stats[ignore_index, :] = -1
+
+    return sk_stats
+
+
+def _sk_stat_scores_mdim_mcls(
+    preds, target, reduce, mdmc_reduce, num_classes, multiclass, ignore_index, top_k, threshold
+):
+    preds, target, _ = _input_format_classification(
+        preds, target, threshold=threshold, num_classes=num_classes, multiclass=multiclass, top_k=top_k
+    )
+
+    if mdmc_reduce == "global":
+        preds = B.transpose(preds, 1, 2).reshape(-1, preds.shape[1])
+        target = B.transpose(target, 1, 2).reshape(-1, target.shape[1])
+
+        return _sk_stat_scores(preds, target, reduce, None, False, ignore_index, top_k, threshold)
+    if mdmc_reduce == "samplewise":
+        scores = []
+
+        for i in range(preds.shape[0]):
+            pred_i = preds[i, ...].T
+            target_i = target[i, ...].T
+            scores_i = _sk_stat_scores(pred_i, target_i, reduce, None, False, ignore_index, top_k, threshold)
+
+            scores.append(np.expand_dims(scores_i, 0))
+
+        return np.concatenate(scores)
+
+
+@pytest.mark.parametrize(
+    "reduce, mdmc_reduce, num_classes, inputs, ignore_index",
+    [
+        ["unknown", None, None, _input_binary, None],
+        ["micro", "unknown", None, _input_binary, None],
+        ["macro", None, None, _input_binary, None],
+        ["micro", None, None, _input_mdmc_prob, None],
+        ["micro", None, None, _input_binary_prob, 0],
+        ["micro", None, None, _input_mcls_prob, NUM_CLASSES],
+        ["micro", None, NUM_CLASSES, _input_mcls_prob, NUM_CLASSES],
+    ],
+)
+def test_wrong_params(reduce, mdmc_reduce, num_classes, inputs, ignore_index):
+    """Test a combination of parameters that are invalid and should raise an error.
+
+    This includes invalid ``reduce`` and ``mdmc_reduce`` parameter values, not setting ``num_classes`` when
+    ``reduce='macro'`, not setting ``mdmc_reduce`` when inputs are multi-dim multi-class``, setting ``ignore_index``
+    when inputs are binary, as well as setting ``ignore_index`` to a value higher than the number of classes.
+    """
+    with pytest.raises(ValueError):
+        stat_scores(
+            inputs.preds[0], inputs.target[0], reduce, mdmc_reduce, num_classes=num_classes, ignore_index=ignore_index
+        )
+
+    with pytest.raises(ValueError):
+        sts = StatScores(reduce=reduce, mdmc_reduce=mdmc_reduce, num_classes=num_classes, ignore_index=ignore_index)
+        sts(inputs.preds[0], inputs.target[0])
+
+
+@pytest.mark.parametrize("ignore_index", [None, 0])
+@pytest.mark.parametrize("reduce", ["micro", "macro", "samples"])
+@pytest.mark.parametrize(
+    "preds, target, sk_fn, mdmc_reduce, num_classes, multiclass, top_k, threshold",
+    [
+        (_input_binary_logits.preds, _input_binary_logits.target, _sk_stat_scores, None, 1, None, None, 0.0),
+        (_input_binary_prob.preds, _input_binary_prob.target, _sk_stat_scores, None, 1, None, None, 0.5),
+        (_input_binary.preds, _input_binary.target, _sk_stat_scores, None, 1, False, None, 0.5),
+        (_input_mlb_logits.preds, _input_mlb_logits.target, _sk_stat_scores, None, NUM_CLASSES, None, None, 0.0),
+        (_input_mlb_prob.preds, _input_mlb_prob.target, _sk_stat_scores, None, NUM_CLASSES, None, None, 0.5),
+        (_input_mlb_prob.preds, _input_mlb_prob.target, _sk_stat_scores, None, NUM_CLASSES, None, 2, 0.5),
+        (_input_mcls.preds, _input_mcls.target, _sk_stat_scores, None, NUM_CLASSES, False, None, 0.5),
+        (_input_mcls_prob.preds, _input_mcls_prob.target, _sk_stat_scores, None, NUM_CLASSES, None, None, 0.5),
+        (_input_mcls_logits.preds, _input_mcls_logits.target, _sk_stat_scores, None, NUM_CLASSES, None, None, 0.0),
+        (_input_mcls_prob.preds, _input_mcls_prob.target, _sk_stat_scores, None, NUM_CLASSES, None, 2, 0.0),
+        (_input_multiclass.preds, _input_multiclass.target, _sk_stat_scores, None, NUM_CLASSES, None, None, 0.0),
+        (_input_mdmc.preds, _input_mdmc.target, _sk_stat_scores_mdim_mcls, "samplewise", NUM_CLASSES, None, None, 0.0),
+        (
+            _input_mdmc_prob.preds,
+            _input_mdmc_prob.target,
+            _sk_stat_scores_mdim_mcls,
+            "samplewise",
+            NUM_CLASSES,
+            None,
+            None,
+            0.0,
+        ),
+        (_input_mdmc.preds, _input_mdmc.target, _sk_stat_scores_mdim_mcls, "global", NUM_CLASSES, None, None, 0.0),
+        (
+            _input_mdmc_prob.preds,
+            _input_mdmc_prob.target,
+            _sk_stat_scores_mdim_mcls,
+            "global",
+            NUM_CLASSES,
+            None,
+            None,
+            0.0,
+        ),
+    ],
+)
+class TestStatScores(MetricTester):
+    # DDP tests temporarily disabled due to hanging issues
+    @pytest.mark.parametrize("ddp", [False])
+    @pytest.mark.parametrize("dist_sync_on_step", [False])  #True, 
+    def test_stat_scores_class(
+        self,
+        ddp: bool,
+        dist_sync_on_step: bool,
+        sk_fn: Callable,
+        preds: Tensor,
+        target: Tensor,
+        reduce: str,
+        mdmc_reduce: Optional[str],
+        num_classes: Optional[int],
+        multiclass: Optional[bool],
+        ignore_index: Optional[int],
+        top_k: Optional[int],
+        threshold: Optional[float],
+    ):
+        if ignore_index is not None and preds.ndim == 2:
+            pytest.skip("Skipping ignore_index test with binary inputs.")
+
+        self.run_class_metric_test(
+            ddp=ddp,
+            preds=preds,
+            target=target,
+            metric_class=StatScores,
+            sk_metric=partial(
+                sk_fn,
+                reduce=reduce,
+                mdmc_reduce=mdmc_reduce,
+                num_classes=num_classes,
+                multiclass=multiclass,
+                ignore_index=ignore_index,
+                top_k=top_k,
+                threshold=threshold,
+            ),
+            dist_sync_on_step=dist_sync_on_step,
+            metric_args={
+                "num_classes": num_classes,
+                "reduce": reduce,
+                "mdmc_reduce": mdmc_reduce,
+                "threshold": threshold,
+                "multiclass": multiclass,
+                "ignore_index": ignore_index,
+                "top_k": top_k,
+            },
+            check_dist_sync_on_step=True,
+            check_batch=True,
+        )
+
+    def test_stat_scores_fn(
+        self,
+        sk_fn: Callable,
+        preds: Tensor,
+        target: Tensor,
+        reduce: str,
+        mdmc_reduce: Optional[str],
+        num_classes: Optional[int],
+        multiclass: Optional[bool],
+        ignore_index: Optional[int],
+        top_k: Optional[int],
+        threshold: Optional[float],
+    ):
+        if ignore_index is not None and preds.ndim == 2:
+            pytest.skip("Skipping ignore_index test with binary inputs.")
+
+        self.run_functional_metric_test(
+            preds,
+            target,
+            metric_functional=stat_scores,
+            sk_metric=partial(
+                sk_fn,
+                reduce=reduce,
+                mdmc_reduce=mdmc_reduce,
+                num_classes=num_classes,
+                multiclass=multiclass,
+                ignore_index=ignore_index,
+                top_k=top_k,
+                threshold=threshold,
+            ),
+            metric_args={
+                "num_classes": num_classes,
+                "reduce": reduce,
+                "mdmc_reduce": mdmc_reduce,
+                "threshold": threshold,
+                "multiclass": multiclass,
+                "ignore_index": ignore_index,
+                "top_k": top_k,
+            },
+        )
+
+    def test_stat_scores_differentiability(
+        self,
+        sk_fn: Callable,
+        preds: Tensor,
+        target: Tensor,
+        reduce: str,
+        mdmc_reduce: Optional[str],
+        num_classes: Optional[int],
+        multiclass: Optional[bool],
+        ignore_index: Optional[int],
+        top_k: Optional[int],
+        threshold: Optional[float],
+    ):
+        if ignore_index is not None and preds.ndim == 2:
+            pytest.skip("Skipping ignore_index test with binary inputs.")
+
+        self.run_differentiability_test(
+            preds,
+            target,
+            metric_module=StatScores,
+            metric_functional=stat_scores,
+            metric_args={
+                "num_classes": num_classes,
+                "reduce": reduce,
+                "mdmc_reduce": mdmc_reduce,
+                "threshold": threshold,
+                "multiclass": multiclass,
+                "ignore_index": ignore_index,
+                "top_k": top_k,
+            },
+        )
+
+
+_mc_k_target = tensor([0, 1, 2])
+_mc_k_preds = tensor([[0.35, 0.4, 0.25], [0.1, 0.5, 0.4], [0.2, 0.1, 0.7]])
+_ml_k_target = tensor([[0, 1, 0], [1, 1, 0], [0, 0, 0]])
+_ml_k_preds = tensor([[0.9, 0.2, 0.75], [0.1, 0.7, 0.8], [0.6, 0.1, 0.7]])
+
+
+@pytest.mark.parametrize(
+    "k, preds, target, reduce, expected",
+    [
+        (1, _mc_k_preds, _mc_k_target, "micro", tensor([2, 1, 5, 1, 3])),
+        (2, _mc_k_preds, _mc_k_target, "micro", tensor([3, 3, 3, 0, 3])),
+        (1, _ml_k_preds, _ml_k_target, "micro", tensor([0, 3, 3, 3, 3])),
+        (2, _ml_k_preds, _ml_k_target, "micro", tensor([1, 5, 1, 2, 3])),
+        (1, _mc_k_preds, _mc_k_target, "macro", tensor([[0, 1, 1], [0, 1, 0], [2, 1, 2], [1, 0, 0], [1, 1, 1]])),
+        (2, _mc_k_preds, _mc_k_target, "macro", tensor([[1, 1, 1], [1, 1, 1], [1, 1, 1], [0, 0, 0], [1, 1, 1]])),
+        (1, _ml_k_preds, _ml_k_target, "macro", tensor([[0, 0, 0], [1, 0, 2], [1, 1, 1], [1, 2, 0], [1, 2, 0]])),
+        (2, _ml_k_preds, _ml_k_target, "macro", tensor([[0, 1, 0], [2, 0, 3], [0, 1, 0], [1, 1, 0], [1, 2, 0]])),
+    ],
+)
+def test_top_k(k: int, preds: Tensor, target: Tensor, reduce: str, expected: Tensor):
+    """A simple test to check that top_k works as expected."""
+
+    class_metric = StatScores(top_k=k, reduce=reduce, num_classes=3)
+    class_metric.update(preds, target)
+
+    assert B.allclose(class_metric.compute(), expected.T)
+    assert B.allclose(stat_scores(preds, target, top_k=k, reduce=reduce, num_classes=3), expected.T)
diff --git a/RE/paddlemetric/src/tests/functional/__init__.py b/RE/paddlemetric/src/tests/functional/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/RE/paddlemetric/src/tests/functional/test_classification.py b/RE/paddlemetric/src/tests/functional/test_classification.py
new file mode 100644
index 00000000..d2048387
--- /dev/null
+++ b/RE/paddlemetric/src/tests/functional/test_classification.py
@@ -0,0 +1,123 @@
+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import pytest
+import paddleext.torchapi as B
+from paddleext.torchapi import Tensor, tensor
+
+from tests.helpers import seed_all
+from paddlemetrics.functional import dice_score
+from paddlemetrics.functional.classification.precision_recall_curve import _binary_clf_curve
+from paddlemetrics.utilities.data import get_num_classes, to_categorical, to_onehot
+
+
+def test_onehot():
+    test_tensor = tensor([[0, 1, 2, 3, 4], [5, 6, 7, 8, 9]])
+    expected = B.stack(
+        [
+            B.cat([B.eye(5, dtype=int), B.zeros((5, 5), dtype=int)]),
+            B.cat([B.zeros((5, 5), dtype=int), B.eye(5, dtype=int)]),
+        ]
+    )
+
+    assert tuple(test_tensor.shape) == (2, 5)
+    assert tuple(expected.shape) == (2, 10, 5)
+
+    onehot_classes = to_onehot(test_tensor, num_classes=10)
+    onehot_no_classes = to_onehot(test_tensor)
+
+    assert B.allclose(onehot_classes, onehot_no_classes)
+
+    assert onehot_classes.shape == expected.shape
+    assert onehot_no_classes.shape == expected.shape
+
+    assert B.allclose(expected.to(onehot_no_classes), onehot_no_classes)
+    assert B.allclose(expected.to(onehot_classes), onehot_classes)
+
+
+def test_to_categorical():
+    test_tensor = B.stack(
+        [
+            B.cat([B.eye(5, dtype=int), B.zeros((5, 5), dtype=int)]),
+            B.cat([B.zeros((5, 5), dtype=int), B.eye(5, dtype=int)]),
+        ]
+    ).to(B.float)
+
+    expected = tensor([[0, 1, 2, 3, 4], [5, 6, 7, 8, 9]])
+    assert tuple(expected.shape) == (2, 5)
+    assert tuple(test_tensor.shape) == (2, 10, 5)
+
+    result = to_categorical(test_tensor)
+
+    assert result.shape == expected.shape
+    assert B.allclose(result, expected.to(result.dtype))
+
+
+@pytest.mark.parametrize(
+    ["preds", "target", "num_classes", "expected_num_classes"],
+    [
+        pytest.param(B.rand(32, 10, 28, 28), B.randint(10, (32, 28, 28)), 10, 10),
+        pytest.param(B.rand(32, 10, 28, 28), B.randint(10, (32, 28, 28)), None, 10),
+        pytest.param(B.rand(32, 28, 28), B.randint(10, (32, 28, 28)), None, 10),
+    ],
+)
+def test_get_num_classes(preds, target, num_classes, expected_num_classes):
+    assert get_num_classes(preds, target, num_classes) == expected_num_classes
+
+
+@pytest.mark.parametrize(
+    ["sample_weight", "pos_label", "exp_shape"],
+    [
+        pytest.param(1, 1.0, 42),
+        pytest.param(None, 1.0, 42),
+    ],
+)
+def test_binary_clf_curve(sample_weight, pos_label, exp_shape):
+    # TODO: move back the pred and target to test func arguments
+    #  if you fix the array inside the function, you'd also have fix the shape,
+    #  because when the array changes, you also have to fix the shape
+    seed_all(0)
+    pred = B.randint(low=51, high=99, size=(100,), dtype=B.float) / 100
+    target = tensor([0, 1] * 50, dtype=B.int)
+    if sample_weight is not None:
+        sample_weight = B.ones_like(pred) * sample_weight
+
+    fps, tps, thresh = _binary_clf_curve(preds=pred, target=target, sample_weights=sample_weight, pos_label=pos_label)
+
+    assert isinstance(tps, Tensor)
+    assert isinstance(fps, Tensor)
+    assert isinstance(thresh, Tensor)
+    if B.platform() == "torch":
+        assert tuple(tps.shape) == (exp_shape,)
+        assert tuple(fps.shape) == (exp_shape,)
+        assert tuple(thresh.shape) == (exp_shape,)
+    elif B.platform() == "paddle":
+        assert tuple(tps.shape) == (exp_shape - 1,)
+        assert tuple(fps.shape) == (exp_shape - 1,)
+        assert tuple(thresh.shape) == (exp_shape - 1,)
+    else:
+        raise Exception(f"unknown platform {B.platform()}")
+
+
+@pytest.mark.parametrize(
+    ["pred", "target", "expected"],
+    [
+        pytest.param([[0, 0], [1, 1]], [[0, 0], [1, 1]], 1.0),
+        pytest.param([[1, 1], [0, 0]], [[0, 0], [1, 1]], 0.0),
+        pytest.param([[1, 1], [1, 1]], [[1, 1], [0, 0]], 2 / 3),
+        pytest.param([[1, 1], [0, 0]], [[1, 1], [0, 0]], 1.0),
+    ],
+)
+def test_dice_score(pred, target, expected):
+    score = dice_score(tensor(pred), tensor(target))
+    assert score == expected
diff --git a/RE/paddlemetric/src/tests/functional/test_image_gradients.py b/RE/paddlemetric/src/tests/functional/test_image_gradients.py
new file mode 100644
index 00000000..b4bad7f1
--- /dev/null
+++ b/RE/paddlemetric/src/tests/functional/test_image_gradients.py
@@ -0,0 +1,110 @@
+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import pytest
+import paddleext.torchapi as B
+from paddleext.torchapi import Tensor
+
+from paddlemetrics.functional import image_gradients
+
+
+def test_invalid_input_img_type():
+    """Test Whether the module successfully handles invalid input data type."""
+    invalid_dummy_input = [[1, 2, 3], [4, 5, 6], [7, 8, 9]]
+
+    with pytest.raises(TypeError):
+        image_gradients(invalid_dummy_input)
+
+
+def test_invalid_input_ndims():
+    """Test whether the module successfully handles invalid number of dimensions of input tensor."""
+
+    BATCH_SIZE = 1
+    HEIGHT = 5
+    WIDTH = 5
+    CHANNELS = 1
+
+    image = B.arange(0, BATCH_SIZE * HEIGHT * WIDTH * CHANNELS, dtype=B.float32)
+    image = B.reshape(image, (HEIGHT, WIDTH))
+
+    with pytest.raises(RuntimeError):
+        image_gradients(image)
+
+
+def test_multi_batch_image_gradients():
+    """Test whether the module correctly calculates gradients for known input with non-unity batch size.Example
+    input-output pair taken from TF's implementation of i mage-gradients."""
+
+    BATCH_SIZE = 5
+    HEIGHT = 5
+    WIDTH = 5
+    CHANNELS = 1
+
+    single_channel_img = B.arange(0, 1 * HEIGHT * WIDTH * CHANNELS, dtype=B.float32)
+    single_channel_img = B.reshape(single_channel_img, (CHANNELS, HEIGHT, WIDTH))
+    image = B.stack([single_channel_img for _ in range(BATCH_SIZE)], dim=0)
+
+    true_dy = [
+        [5.0, 5.0, 5.0, 5.0, 5.0],
+        [5.0, 5.0, 5.0, 5.0, 5.0],
+        [5.0, 5.0, 5.0, 5.0, 5.0],
+        [5.0, 5.0, 5.0, 5.0, 5.0],
+        [0.0, 0.0, 0.0, 0.0, 0.0],
+    ]
+    true_dy = B.to_tensor(true_dy)
+
+    dy, dx = image_gradients(image)
+
+    for batch_id in range(BATCH_SIZE):
+        assert B.allclose(dy[batch_id, 0, :, :], true_dy)
+    assert tuple(dy.shape) == (BATCH_SIZE, 1, HEIGHT, WIDTH)
+    assert tuple(dx.shape) == (BATCH_SIZE, 1, HEIGHT, WIDTH)
+
+
+def test_image_gradients():
+    """Test whether the module correctly calculates gradients for known input.
+
+    Example input-output pair taken from TF's implementation of image- gradients
+    """
+
+    BATCH_SIZE = 1
+    HEIGHT = 5
+    WIDTH = 5
+    CHANNELS = 1
+
+    image = B.arange(0, BATCH_SIZE * HEIGHT * WIDTH * CHANNELS, dtype=B.float32)
+    image = B.reshape(image, (BATCH_SIZE, CHANNELS, HEIGHT, WIDTH))
+
+    true_dy = [
+        [5.0, 5.0, 5.0, 5.0, 5.0],
+        [5.0, 5.0, 5.0, 5.0, 5.0],
+        [5.0, 5.0, 5.0, 5.0, 5.0],
+        [5.0, 5.0, 5.0, 5.0, 5.0],
+        [0.0, 0.0, 0.0, 0.0, 0.0],
+    ]
+
+    true_dx = [
+        [1.0, 1.0, 1.0, 1.0, 0.0],
+        [1.0, 1.0, 1.0, 1.0, 0.0],
+        [1.0, 1.0, 1.0, 1.0, 0.0],
+        [1.0, 1.0, 1.0, 1.0, 0.0],
+        [1.0, 1.0, 1.0, 1.0, 0.0],
+    ]
+
+    true_dy = B.to_tensor(true_dy)
+    true_dx = B.to_tensor(true_dx)
+
+    dy, dx = image_gradients(image)
+
+    assert B.allclose(dy.squeeze(), true_dy), "dy fails test"
+    assert B.allclose(dx.squeeze(), true_dx), "dx fails tests"
diff --git a/RE/paddlemetric/src/tests/functional/test_reduction.py b/RE/paddlemetric/src/tests/functional/test_reduction.py
new file mode 100644
index 00000000..729cd5b9
--- /dev/null
+++ b/RE/paddlemetric/src/tests/functional/test_reduction.py
@@ -0,0 +1,41 @@
+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import pytest
+import paddleext.torchapi as B
+
+from paddlemetrics.utilities.distributed import class_reduce, reduce
+
+
+def test_reduce():
+    start_tensor = B.rand(50, 40, 30)
+
+    assert B.allclose(reduce(start_tensor, "elementwise_mean"), B.mean(start_tensor))
+    assert B.allclose(reduce(start_tensor, "sum"), B.sum(start_tensor))
+    assert B.allclose(reduce(start_tensor, "none"), start_tensor)
+
+    with pytest.raises(ValueError):
+        reduce(start_tensor, "error_reduction")
+
+
+def test_class_reduce():
+    num = B.randint(1, 10, (100,)).float()
+    denom = B.randint(10, 20, (100,)).float()
+    weights = B.randint(1, 100, (100,)).float()
+
+    assert B.allclose(class_reduce(num, denom, weights, "micro"), B.sum(num) / B.sum(denom))
+    assert B.allclose(class_reduce(num, denom, weights, "macro"), B.mean(num / denom))
+    assert B.allclose(
+        class_reduce(num, denom, weights, "weighted"), B.sum(num / denom * (weights / B.sum(weights)))
+    )
+    assert B.allclose(class_reduce(num, denom, weights, "none"), num / denom)
diff --git a/RE/paddlemetric/src/tests/functional/test_self_supervised.py b/RE/paddlemetric/src/tests/functional/test_self_supervised.py
new file mode 100644
index 00000000..4c675192
--- /dev/null
+++ b/RE/paddlemetric/src/tests/functional/test_self_supervised.py
@@ -0,0 +1,46 @@
+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import pytest
+import paddleext.torchapi as B
+from sklearn.metrics import pairwise
+from paddleext.torchapi import tensor
+
+from paddlemetrics.functional import embedding_similarity
+
+
+@pytest.mark.parametrize("similarity", ["cosine", "dot"])
+@pytest.mark.parametrize("reduction", ["none", "mean", "sum"])
+def test_against_sklearn(similarity, reduction):
+    """Compare PL metrics to sklearn version."""
+    device = "cuda" if B.cuda.is_available() else "cpu"
+
+    batch = B.randn(5, 10, device=device)  # 100 samples in 10 dimensions
+
+    pl_dist = embedding_similarity(batch, similarity=similarity, reduction=reduction, zero_diagonal=False)
+
+    def sklearn_embedding_distance(batch, similarity, reduction):
+
+        metric_func = {"cosine": pairwise.cosine_similarity, "dot": pairwise.linear_kernel}[similarity]
+
+        dist = metric_func(batch, batch)
+        if reduction == "mean":
+            return dist.mean(axis=-1)
+        if reduction == "sum":
+            return dist.sum(axis=-1)
+        return dist
+
+    sk_dist = sklearn_embedding_distance(batch.cpu().detach().numpy(), similarity=similarity, reduction=reduction)
+    sk_dist = tensor(sk_dist, dtype=B.float, device=device)
+
+    assert B.allclose(sk_dist, pl_dist)
diff --git a/RE/paddlemetric/src/tests/helpers/__init__.py b/RE/paddlemetric/src/tests/helpers/__init__.py
new file mode 100644
index 00000000..3773a49f
--- /dev/null
+++ b/RE/paddlemetric/src/tests/helpers/__init__.py
@@ -0,0 +1,20 @@
+import operator
+import random
+
+import numpy
+import paddleext.torchapi as B
+
+from paddlemetrics.utilities.imports import _TORCH_LOWER_1_4, _TORCH_LOWER_1_5, _TORCH_LOWER_1_6, _compare_version
+
+_MARK_TORCH_MIN_1_4 = dict(condition=_TORCH_LOWER_1_4, reason="required PT >= 1.4")
+_MARK_TORCH_MIN_1_5 = dict(condition=_TORCH_LOWER_1_5, reason="required PT >= 1.5")
+_MARK_TORCH_MIN_1_6 = dict(condition=_TORCH_LOWER_1_6, reason="required PT >= 1.6")
+
+_LIGHTNING_GREATER_EQUAL_1_3 = _compare_version("pytorch_lightning", operator.ge, "1.3.0")
+
+
+def seed_all(seed):
+    random.seed(seed)
+    numpy.random.seed(seed)
+    B.manual_seed(seed)
+    B.cuda.manual_seed_all(seed)
diff --git a/RE/paddlemetric/src/tests/helpers/non_sklearn_metrics.py b/RE/paddlemetric/src/tests/helpers/non_sklearn_metrics.py
new file mode 100644
index 00000000..fa4f8429
--- /dev/null
+++ b/RE/paddlemetric/src/tests/helpers/non_sklearn_metrics.py
@@ -0,0 +1,187 @@
+"""File for non sklearn metrics that are to be used for reference for tests."""
+from typing import Optional, Union
+
+import numpy as np
+from sklearn.metrics._regression import _check_reg_targets
+from sklearn.utils import assert_all_finite, check_consistent_length, column_or_1d
+
+
+def symmetric_mean_absolute_percentage_error(
+    y_true: np.ndarray,
+    y_pred: np.ndarray,
+    sample_weight: Optional[np.ndarray] = None,
+    multioutput: str = "uniform_average",
+):
+    r"""Symmetric mean absolute percentage error regression loss (SMAPE_):
+
+    .. math:: \text{SMAPE} = \frac{2}{n}\sum_1^n\frac{max(|   y_i - \hat{y_i} |}{| y_i | + | \hat{y_i} |, \epsilon)}
+
+    Where :math:`y` is a tensor of target values, and :math:`\hat{y}` is a tensor of predictions.
+
+    Args:
+        y_true: array-like of shape (n_samples,) or (n_samples, n_outputs)
+            Ground truth (correct) target values.
+        y_pred: array-like of shape (n_samples,) or (n_samples, n_outputs)
+            Estimated target values.
+        sample_weight: array-like of shape (n_samples,), default=None
+            Sample weights.
+        multioutput: {'raw_values', 'uniform_average'} or array-like
+            Defines aggregating of multiple output values.
+            Array-like value defines weights used to average errors.
+            If input is list then the shape must be (n_outputs,).
+
+                - 'raw_values': Returns a full set of errors in case of multioutput input.
+                - 'uniform_average': Errors of all outputs are averaged with uniform weight.
+
+    Returns:
+        loss: float or ndarray of floats in the range [0, 1]
+            If multioutput is 'raw_values', then symmetric mean absolute percentage error
+            is returned for each output separately.
+            If multioutput is 'uniform_average' or an ndarray of weights, then the
+            weighted average of all output errors is returned.
+            MAPE output is non-negative floating point. The best value is 0.0.
+            But note the fact that bad predictions can lead to arbitarily large
+            MAPE values, especially if some y_true values are very close to zero.
+            Note that we return a large value instead of `inf` when y_true is zero.
+
+    """
+    _, y_true, y_pred, multioutput = _check_reg_targets(y_true, y_pred, multioutput)
+    check_consistent_length(y_true, y_pred, sample_weight)
+    epsilon = np.finfo(np.float64).eps
+    smape = 2 * np.abs(y_pred - y_true) / np.maximum(np.abs(y_true) + np.abs(y_pred), epsilon)
+    output_errors = np.average(smape, weights=sample_weight, axis=0)
+    if isinstance(multioutput, str):
+        if multioutput == "raw_values":
+            return output_errors
+        # pass None as weights to np.average: uniform mean
+        multioutput = None
+
+    return np.average(output_errors, weights=multioutput)
+
+
+# sklearn reference function from
+# https://github.com/samronsin/scikit-learn/blob/calibration-loss/sklearn/metrics/_classification.py.
+# TODO: when the PR into sklearn is accepted, update this to use the official function.
+def calibration_error(
+    y_true: np.ndarray,
+    y_prob: np.ndarray,
+    sample_weight: Optional[np.ndarray] = None,
+    norm: str = "l2",
+    n_bins: int = 10,
+    strategy: str = "uniform",
+    pos_label: Optional[Union[int, str]] = None,
+    reduce_bias: bool = True,
+) -> float:
+    """Compute calibration error of a binary classifier. Across all items in a set of N predictions, the
+    calibration error measures the aggregated difference between (1) the average predicted probabilities assigned
+    to the positive class, and (2) the frequencies of the positive class in the actual outcome. The calibration
+    error is only appropriate for binary categorical outcomes. Which label is considered to be the positive label
+    is controlled via the parameter pos_label, which defaults to 1.
+
+    Args:
+        y_true: array-like of shape (n_samples,)
+            True targets of a binary classification task.
+        y_prob: array-like of (n_samples,)
+            Probabilities of the positive class.
+        sample_weight: array-like of shape (n_samples,)
+        norm: {'l1', 'l2', 'max'}
+            Norm method. The l1-norm is the Expected Calibration Error (ECE),
+            and the max-norm corresponds to Maximum Calibration Error (MCE).
+        n_bins: int, default=10
+        The number of bins to compute error on.
+        strategy: {'uniform', 'quantile'}
+            Strategy used to define the widths of the bins.
+            uniform
+                All bins have identical widths.
+            quantile
+                All bins have the same number of points.
+        pos_label: int or str, default=None
+            Label of the positive class. If None, the maximum label is used as positive class.
+        reduce_bias: bool, default=True
+            Add debiasing term as in Verified Uncertainty Calibration, A. Kumar.
+            Only effective for the l2-norm.
+
+    Returns:
+        score: float with calibration error
+    """
+    y_true = column_or_1d(y_true)
+    y_prob = column_or_1d(y_prob)
+    assert_all_finite(y_true)
+    assert_all_finite(y_prob)
+    check_consistent_length(y_true, y_prob, sample_weight)
+    if any(y_prob < 0) or any(y_prob > 1):
+        raise ValueError("y_prob has values outside of [0, 1] range")
+
+    labels = np.unique(y_true)
+    if len(labels) > 2:
+        raise ValueError("Only binary classification is supported. " "Provided labels %s." % labels)
+
+    if pos_label is None:
+        pos_label = y_true.max()
+    if pos_label not in labels:
+        raise ValueError("pos_label=%r is not a valid label: " "%r" % (pos_label, labels))
+    y_true = np.array(y_true == pos_label, int)
+
+    norm_options = ("l1", "l2", "max")
+    if norm not in norm_options:
+        raise ValueError(f"norm has to be one of {norm_options}, got: {norm}.")
+
+    remapping = np.argsort(y_prob)
+    y_true = y_true[remapping]
+    y_prob = y_prob[remapping]
+    if sample_weight is not None:
+        sample_weight = sample_weight[remapping]
+    else:
+        sample_weight = np.ones(y_true.shape[0])
+
+    n_bins = int(n_bins)
+    if strategy == "quantile":
+        quantiles = np.percentile(y_prob, np.arange(0, 1, 1.0 / n_bins) * 100)
+    elif strategy == "uniform":
+        quantiles = np.arange(0, 1, 1.0 / n_bins)
+    else:
+        raise ValueError(
+            f"Invalid entry to 'strategy' input. \
+                The strategy must be either quantile' or 'uniform'. Got {strategy} instead."
+        )
+
+    threshold_indices = np.searchsorted(y_prob, quantiles).tolist()
+    threshold_indices.append(y_true.shape[0])
+    avg_pred_true = np.zeros(n_bins)
+    bin_centroid = np.zeros(n_bins)
+    delta_count = np.zeros(n_bins)
+    debias = np.zeros(n_bins)
+
+    loss = 0.0
+    count = float(sample_weight.sum())
+    for i, i_start in enumerate(threshold_indices[:-1]):
+        i_end = threshold_indices[i + 1]
+        # ignore empty bins
+        if i_end == i_start:
+            continue
+        delta_count[i] = float(sample_weight[i_start:i_end].sum())
+        avg_pred_true[i] = np.dot(y_true[i_start:i_end], sample_weight[i_start:i_end]) / delta_count[i]
+        bin_centroid[i] = np.dot(y_prob[i_start:i_end], sample_weight[i_start:i_end]) / delta_count[i]
+        if norm == "l2" and reduce_bias:
+            # NOTE: I think there's a mistake in the original implementation.
+            # delta_debias = (
+            #     avg_pred_true[i] * (avg_pred_true[i] - 1) * delta_count[i]
+            # )
+            # delta_debias /= (count * delta_count[i] - 1)
+            delta_debias = avg_pred_true[i] * (avg_pred_true[i] - 1) * delta_count[i]
+            delta_debias /= count * (delta_count[i] - 1)
+            debias[i] = delta_debias
+
+    if norm == "max":
+        loss = np.max(np.abs(avg_pred_true - bin_centroid))
+    elif norm == "l1":
+        delta_loss = np.abs(avg_pred_true - bin_centroid) * delta_count
+        loss = np.sum(delta_loss) / count
+    elif norm == "l2":
+        delta_loss = (avg_pred_true - bin_centroid) ** 2 * delta_count
+        loss = np.sum(delta_loss) / count
+        if reduce_bias:
+            # convert nans to zero
+            loss += np.sum(np.nan_to_num(debias))
+        loss = np.sqrt(max(loss, 0.0))
+    return loss
diff --git a/RE/paddlemetric/src/tests/helpers/testers.py b/RE/paddlemetric/src/tests/helpers/testers.py
new file mode 100644
index 00000000..02f237c8
--- /dev/null
+++ b/RE/paddlemetric/src/tests/helpers/testers.py
@@ -0,0 +1,578 @@
+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import os
+import pickle
+import sys
+from functools import partial
+from typing import Any, Callable, Dict, Optional, Sequence
+
+import numpy as np
+import pytest
+import paddleext.torchapi as B
+from paddleext.torchapi import Tensor, tensor
+from multiprocessing import Pool, set_start_method
+
+from paddlemetrics import Metric
+
+try:
+    set_start_method("spawn")
+except RuntimeError:
+    pass
+
+NUM_PROCESSES = 2
+NUM_BATCHES = 10
+BATCH_SIZE = 32
+NUM_CLASSES = 5
+EXTRA_DIM = 3
+THRESHOLD = 0.5
+
+MAX_PORT = 8100
+START_PORT = 8088
+CURRENT_PORT = START_PORT
+
+
+def setup_ddp(rank, world_size):
+    """Setup ddp environment."""
+    global CURRENT_PORT
+
+    os.environ["MASTER_ADDR"] = "localhost"
+    os.environ["MASTER_PORT"] = str(CURRENT_PORT)
+
+    CURRENT_PORT += 1
+    if CURRENT_PORT > MAX_PORT:
+        CURRENT_PORT = START_PORT
+
+    if B.distributed.is_available() and sys.platform not in ("win32", "cygwin"):
+        B.distributed.init_process_group("gloo", rank=rank, world_size=world_size)
+
+
+def _assert_allclose(pl_result: Any, sk_result: Any, atol: float = 1e-8, key: Optional[str] = None) -> None:
+    """Utility function for recursively asserting that two results are within a certain tolerance."""
+    # single output compare
+    if isinstance(pl_result, Tensor):
+        assert np.allclose(pl_result.detach().cpu().numpy(), sk_result, atol=atol, equal_nan=True)
+    # multi output compare
+    elif isinstance(pl_result, Sequence):
+        for pl_res, sk_res in zip(pl_result, sk_result):
+            _assert_allclose(pl_res, sk_res, atol=atol)
+    elif isinstance(pl_result, Dict):
+        if key is None:
+            raise KeyError("Provide Key for Dict based metric results.")
+        assert np.allclose(pl_result[key].detach().cpu().numpy(), sk_result, atol=atol, equal_nan=True)
+    else:
+        raise ValueError("Unknown format for comparison")
+
+
+def _assert_tensor(pl_result: Any, key: Optional[str] = None) -> None:
+    """Utility function for recursively checking that some input only consists of torch tensors."""
+    if isinstance(pl_result, Sequence):
+        for plr in pl_result:
+            _assert_tensor(plr)
+    elif isinstance(pl_result, Dict):
+        if key is None:
+            raise KeyError("Provide Key for Dict based metric results.")
+        assert isinstance(pl_result[key], Tensor)
+    else:
+        assert isinstance(pl_result, Tensor)
+
+
+def _assert_requires_grad(metric: Metric, pl_result: Any, key: Optional[str] = None) -> None:
+    """Utility function for recursively asserting that metric output is consistent with the `is_differentiable`
+    attribute."""
+    if isinstance(pl_result, Sequence):
+        for plr in pl_result:
+            _assert_requires_grad(metric, plr, key=key)
+    elif isinstance(pl_result, Dict):
+        if key is None:
+            raise KeyError("Provide Key for Dict based metric results.")
+        assert metric.is_differentiable == pl_result[key].requires_grad
+    else:
+        assert metric.is_differentiable == pl_result.requires_grad, f"{metric.is_differentiable} vs {pl_result.requires_grad}"
+
+
+def _class_test(
+    rank: int,
+    worldsize: int,
+    preds: Tensor,
+    target: Tensor,
+    metric_class: Metric,
+    sk_metric: Callable,
+    dist_sync_on_step: bool,
+    metric_args: dict = None,
+    check_dist_sync_on_step: bool = True,
+    check_batch: bool = True,
+    atol: float = 1e-8,
+    device: str = "cpu",
+    fragment_kwargs: bool = False,
+    check_scriptable: bool = True,
+    **kwargs_update: Any,
+):
+    """Utility function doing the actual comparison between lightning class metric and reference metric.
+
+    Args:
+        rank: rank of current process
+        worldsize: number of processes
+        preds: torch tensor with predictions
+        target: torch tensor with targets
+        metric_class: lightning metric class that should be tested
+        sk_metric: callable function that is used for comparison
+        dist_sync_on_step: bool, if true will synchronize metric state across
+            processes at each ``forward()``
+        metric_args: dict with additional arguments used for class initialization
+        check_dist_sync_on_step: bool, if true will check if the metric is also correctly
+            calculated per batch per device (and not just at the end)
+        check_batch: bool, if true will check if the metric is also correctly
+            calculated across devices for each batch (and not just at the end)
+        device: determine which device to run on, either 'cuda' or 'cpu'
+        fragment_kwargs: whether tensors in kwargs should be divided as `preds` and `target` among processes
+        kwargs_update: Additional keyword arguments that will be passed with preds and
+            target when running update on the metric.
+    """
+    assert preds.shape[0] == target.shape[0]
+    num_batches = preds.shape[0]
+
+    if not metric_args:
+        metric_args = {}
+
+    # Instantiate lightning metric
+    metric = metric_class(
+        compute_on_step=check_dist_sync_on_step or check_batch, dist_sync_on_step=dist_sync_on_step, **metric_args
+    )
+    with pytest.raises(RuntimeError):
+        metric.is_differentiable = not metric.is_differentiable
+    with pytest.raises(RuntimeError):
+        metric.higher_is_better = not metric.higher_is_better
+
+    # check that the metric is scriptable
+    # if check_scriptable:
+    #     B.jit.script(metric)
+
+    # move to device
+    metric = metric.to(device)
+    preds = preds.to(device)
+    target = target.to(device)
+    kwargs_update = {k: v.to(device) if isinstance(v, Tensor) else v for k, v in kwargs_update.items()}
+    
+    # verify metrics work after being loaded from pickled state
+#    pickled_metric = pickle.dumps(metric)
+#    metric = pickle.loads(pickled_metric)
+
+    for i in range(rank, num_batches, worldsize):
+        batch_kwargs_update = {k: v[i] if isinstance(v, Tensor) else v for k, v in kwargs_update.items()}
+
+        batch_result = metric(preds[i], target[i], **batch_kwargs_update)
+
+        if metric.dist_sync_on_step and check_dist_sync_on_step and rank == 0:
+            ddp_preds = B.cat([preds[i + r] for r in range(worldsize)]).cpu()
+            ddp_target = B.cat([target[i + r] for r in range(worldsize)]).cpu()
+            ddp_kwargs_upd = {
+                k: B.cat([v[i + r] for r in range(worldsize)]).cpu() if isinstance(v, Tensor) else v
+                for k, v in (kwargs_update if fragment_kwargs else batch_kwargs_update).items()
+            }
+
+            sk_batch_result = sk_metric(ddp_preds, ddp_target, **ddp_kwargs_upd)
+            _assert_allclose(batch_result, sk_batch_result, atol=atol)
+
+        elif check_batch and not metric.dist_sync_on_step:
+            batch_kwargs_update = {
+                k: v.cpu() if isinstance(v, Tensor) else v
+                for k, v in (batch_kwargs_update if fragment_kwargs else kwargs_update).items()
+            }
+            sk_batch_result = sk_metric(preds[i].cpu(), target[i].cpu(), **batch_kwargs_update)
+            _assert_allclose(batch_result, sk_batch_result, atol=atol)
+
+    # check that metrics are hashable
+    assert hash(metric)
+
+    # check on all batches on all ranks
+    result = metric.compute()
+    _assert_tensor(result)
+
+    total_preds = B.cat([preds[i] for i in range(num_batches)]).cpu()
+    total_target = B.cat([target[i] for i in range(num_batches)]).cpu()
+    total_kwargs_update = {
+        k: B.cat([v[i] for i in range(num_batches)]).cpu() if isinstance(v, Tensor) else v
+        for k, v in kwargs_update.items()
+    }
+    sk_result = sk_metric(total_preds, total_target, **total_kwargs_update)
+
+    # assert after aggregation
+    _assert_allclose(result, sk_result, atol=atol)
+
+
+def _functional_test(
+    preds: Tensor,
+    target: Tensor,
+    metric_functional: Callable,
+    sk_metric: Callable,
+    metric_args: dict = None,
+    atol: float = 1e-8,
+    device: str = "cpu",
+    fragment_kwargs: bool = False,
+    **kwargs_update,
+):
+    """Utility function doing the actual comparison between lightning functional metric and reference metric.
+
+    Args:
+        preds: torch tensor with predictions
+        target: torch tensor with targets
+        metric_functional: lightning metric functional that should be tested
+        sk_metric: callable function that is used for comparison
+        metric_args: dict with additional arguments used for class initialization
+        device: determine which device to run on, either 'cuda' or 'cpu'
+        fragment_kwargs: whether tensors in kwargs should be divided as `preds` and `target` among processes
+        kwargs_update: Additional keyword arguments that will be passed with preds and
+            target when running update on the metric.
+    """
+    assert preds.shape[0] == target.shape[0]
+    num_batches = preds.shape[0]
+
+    if not metric_args:
+        metric_args = {}
+
+    metric = partial(metric_functional, **metric_args)
+
+    # move to device
+    preds = preds.to(device)
+    target = target.to(device)
+    kwargs_update = {k: v.to(device) if isinstance(v, Tensor) else v for k, v in kwargs_update.items()}
+
+    for i in range(num_batches):
+        extra_kwargs = {k: v[i] if isinstance(v, Tensor) else v for k, v in kwargs_update.items()}
+        lightning_result = metric(preds[i], target[i], **extra_kwargs)
+        extra_kwargs = {
+            k: v.cpu() if isinstance(v, Tensor) else v
+            for k, v in (extra_kwargs if fragment_kwargs else kwargs_update).items()
+        }
+        sk_result = sk_metric(preds[i].cpu(), target[i].cpu(), **extra_kwargs)
+
+        # assert its the same
+        _assert_allclose(lightning_result, sk_result, atol=atol)
+
+
+def _assert_half_support(
+    metric_module: Optional[Metric],
+    metric_functional: Optional[Callable],
+    preds: Tensor,
+    target: Tensor,
+    device: str = "cpu",
+    **kwargs_update,
+):
+    """Test if an metric can be used with half precision tensors.
+
+    Args:
+        metric_module: the metric module to test
+        metric_functional: the metric functional to test
+        preds: torch tensor with predictions
+        target: torch tensor with targets
+        device: determine device, either "cpu" or "cuda"
+        kwargs_update: Additional keyword arguments that will be passed with preds and
+                target when running update on the metric.
+    """
+    y_hat = preds[0].half().to(device) if preds[0].is_floating_point() else preds[0].to(device)
+    y = target[0].half().to(device) if target[0].is_floating_point() else target[0].to(device)
+    kwargs_update = {
+        k: (v[0].half() if v.is_floating_point() else v[0]).to(device) if isinstance(v, Tensor) else v
+        for k, v in kwargs_update.items()
+    }
+    if metric_module is not None:
+        metric_module = metric_module.to(device)
+        _assert_tensor(metric_module(y_hat, y, **kwargs_update))
+    if metric_functional is not None:
+        _assert_tensor(metric_functional(y_hat, y, **kwargs_update))
+
+
+gpu_device_name = "cuda" if B.platform() == "torch" else "gpu"
+
+class MetricTester:
+    """Class used for efficiently run alot of parametrized tests in ddp mode. Makes sure that ddp is only setup
+    once and that pool of processes are used for all tests.
+
+    All tests should subclass from this and implement a new method called     `test_metric_name` where the method
+    `self.run_metric_test` is called inside.
+    """
+
+    atol = 1e-8
+
+    def setup_class(self):
+        """Setup the metric class.
+
+        This will spawn the pool of workers that are used for metric testing and setup_ddp
+        """
+
+        self.poolSize = NUM_PROCESSES
+        self.pool = Pool(processes=self.poolSize)
+        self.pool.starmap(setup_ddp, [(rank, self.poolSize) for rank in range(self.poolSize)])
+
+    def teardown_class(self):
+        """Close pool of workers."""
+        self.pool.close()
+        self.pool.join()
+
+    def run_functional_metric_test(
+        self,
+        preds: Tensor,
+        target: Tensor,
+        metric_functional: Callable,
+        sk_metric: Callable,
+        metric_args: dict = None,
+        fragment_kwargs: bool = False,
+        **kwargs_update,
+    ):
+        """Main method that should be used for testing functions. Call this inside testing method.
+
+        Args:
+            preds: torch tensor with predictions
+            target: torch tensor with targets
+            metric_functional: lightning metric class that should be tested
+            sk_metric: callable function that is used for comparison
+            metric_args: dict with additional arguments used for class initialization
+            fragment_kwargs: whether tensors in kwargs should be divided as `preds` and `target` among processes
+            kwargs_update: Additional keyword arguments that will be passed with preds and
+                target when running update on the metric.
+        """
+        device = gpu_device_name if (B.cuda.is_available() and B.cuda.device_count() > 0) else "cpu"
+
+        _functional_test(
+            preds=preds,
+            target=target,
+            metric_functional=metric_functional,
+            sk_metric=sk_metric,
+            metric_args=metric_args,
+            atol=self.atol,
+            device=device,
+            fragment_kwargs=fragment_kwargs,
+            **kwargs_update,
+        )
+
+    def run_class_metric_test(
+        self,
+        ddp: bool,
+        preds: Tensor,
+        target: Tensor,
+        metric_class: Metric,
+        sk_metric: Callable,
+        dist_sync_on_step: bool,
+        metric_args: dict = None,
+        check_dist_sync_on_step: bool = True,
+        check_batch: bool = True,
+        fragment_kwargs: bool = False,
+        check_scriptable: bool = True,
+        **kwargs_update,
+    ):
+        """Main method that should be used for testing class. Call this inside testing methods.
+
+        Args:
+            ddp: bool, if running in ddp mode or not
+            preds: torch tensor with predictions
+            target: torch tensor with targets
+            metric_class: lightning metric class that should be tested
+            sk_metric: callable function that is used for comparison
+            dist_sync_on_step: bool, if true will synchronize metric state across
+                processes at each ``forward()``
+            metric_args: dict with additional arguments used for class initialization
+            check_dist_sync_on_step: bool, if true will check if the metric is also correctly
+                calculated per batch per device (and not just at the end)
+            check_batch: bool, if true will check if the metric is also correctly
+                calculated across devices for each batch (and not just at the end)
+            fragment_kwargs: whether tensors in kwargs should be divided as `preds` and `target` among processes
+            kwargs_update: Additional keyword arguments that will be passed with preds and
+                target when running update on the metric.
+        """
+        if not metric_args:
+            metric_args = {}
+        if ddp:
+            if sys.platform == "win32":
+                pytest.skip("DDP not supported on windows")
+
+            self.pool.starmap(
+                partial(
+                    _class_test,
+                    preds=preds,
+                    target=target,
+                    metric_class=metric_class,
+                    sk_metric=sk_metric,
+                    dist_sync_on_step=dist_sync_on_step,
+                    metric_args=metric_args,
+                    check_dist_sync_on_step=check_dist_sync_on_step,
+                    check_batch=check_batch,
+                    atol=self.atol,
+                    fragment_kwargs=fragment_kwargs,
+                    check_scriptable=check_scriptable,
+                    **kwargs_update,
+                ),
+                [(rank, self.poolSize) for rank in range(self.poolSize)],
+            )
+        else:
+            device = gpu_device_name if (B.cuda.is_available() and B.cuda.device_count() > 0) else "cpu"
+
+            _class_test(
+                rank=0,
+                worldsize=1,
+                preds=preds,
+                target=target,
+                metric_class=metric_class,
+                sk_metric=sk_metric,
+                dist_sync_on_step=dist_sync_on_step,
+                metric_args=metric_args,
+                check_dist_sync_on_step=check_dist_sync_on_step,
+                check_batch=check_batch,
+                atol=self.atol,
+                device=device,
+                fragment_kwargs=fragment_kwargs,
+                check_scriptable=check_scriptable,
+                **kwargs_update,
+            )
+
+    @staticmethod
+    def run_precision_test_cpu(
+        preds: Tensor,
+        target: Tensor,
+        metric_module: Optional[Metric] = None,
+        metric_functional: Optional[Callable] = None,
+        metric_args: Optional[dict] = None,
+        **kwargs_update,
+    ):
+        """Test if a metric can be used with half precision tensors on cpu
+        Args:
+            preds: torch tensor with predictions
+            target: torch tensor with targets
+            metric_module: the metric module to test
+            metric_functional: the metric functional to test
+            metric_args: dict with additional arguments used for class initialization
+            kwargs_update: Additional keyword arguments that will be passed with preds and
+                target when running update on the metric.
+        """
+        metric_args = metric_args or {}
+        _assert_half_support(
+            metric_module(**metric_args) if metric_module is not None else None,
+            metric_functional,
+            preds,
+            target,
+            device="cpu",
+            **kwargs_update,
+        )
+
+    @staticmethod
+    def run_precision_test_gpu(
+        preds: Tensor,
+        target: Tensor,
+        metric_module: Optional[Metric] = None,
+        metric_functional: Optional[Callable] = None,
+        metric_args: Optional[dict] = None,
+        **kwargs_update,
+    ):
+        """Test if a metric can be used with half precision tensors on gpu
+        Args:
+            preds: torch tensor with predictions
+            target: torch tensor with targets
+            metric_module: the metric module to test
+            metric_functional: the metric functional to test
+            metric_args: dict with additional arguments used for class initialization
+            kwargs_update: Additional keyword arguments that will be passed with preds and
+                target when running update on the metric.
+        """
+        metric_args = metric_args or {}
+        _assert_half_support(
+            metric_module(**metric_args) if metric_module is not None else None,
+            metric_functional,
+            preds,
+            target,
+            device=gpu_device_name,
+            **kwargs_update,
+        )
+
+    @staticmethod
+    def run_differentiability_test(
+        preds: Tensor,
+        target: Tensor,
+        metric_module: Metric,
+        metric_functional: Optional[Callable] = None,
+        metric_args: Optional[dict] = None,
+    ):
+        """Test if a metric is differentiable or not.
+
+        Args:
+            preds: torch tensor with predictions
+            target: torch tensor with targets
+            metric_module: the metric module to test
+            metric_args: dict with additional arguments used for class initialization
+        """
+        metric_args = metric_args or {}
+        # only floating point tensors can require grad
+        metric = metric_module(**metric_args)
+        if preds.is_floating_point():
+            preds.requires_grad = True
+            out = metric(preds[0], target[0])
+
+            # Check if requires_grad matches is_differentiable attribute
+            # _assert_requires_grad(metric, out)
+
+            if metric.is_differentiable and metric_functional is not None:
+                # check for numerical correctness
+                assert B.autograd.gradcheck(
+                    partial(metric_functional, **metric_args), (preds[0].double(), target[0])
+                )
+
+            # reset as else it will carry over to other tests
+            preds.requires_grad = False
+
+
+class DummyMetric(Metric):
+    name = "Dummy"
+
+    def __init__(self):
+        super().__init__()
+        self.add_state("x", tensor(0.0), dist_reduce_fx=None)
+
+    def update(self):
+        pass
+
+    def compute(self):
+        pass
+
+
+class DummyListMetric(Metric):
+    name = "DummyList"
+
+    def __init__(self):
+        super().__init__()
+        self.add_state("x", [], dist_reduce_fx=None)
+
+    def update(self):
+        pass
+
+    def compute(self):
+        pass
+
+
+class DummyMetricSum(DummyMetric):
+    def update(self, x):
+        self.x += x
+
+    def compute(self):
+        return self.x
+
+
+class DummyMetricDiff(DummyMetric):
+    def update(self, y):
+        self.x -= y
+
+    def compute(self):
+        return self.x
+
+
+class DummyMetricMultiOutput(DummyMetricSum):
+    def compute(self):
+        return [self.x, self.x]
diff --git a/RE/paddlemetric/src/tests/image/__init__.py b/RE/paddlemetric/src/tests/image/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/RE/paddlemetric/src/tests/image/test_fid.py b/RE/paddlemetric/src/tests/image/test_fid.py
new file mode 100644
index 00000000..fe76a105
--- /dev/null
+++ b/RE/paddlemetric/src/tests/image/test_fid.py
@@ -0,0 +1,156 @@
+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import pickle
+
+import pytest
+import paddleext.torchapi as B
+from scipy.linalg import sqrtm as scipy_sqrtm
+from B.utils.data import Dataset
+
+from paddlemetrics.image.fid import FID, sqrtm
+from paddlemetrics.utilities.imports import _TORCH_FIDELITY_AVAILABLE
+
+B.manual_seed(42)
+
+
+@pytest.mark.parametrize("matrix_size", [2, 10, 100, 500])
+def test_matrix_sqrt(matrix_size):
+    """test that metrix sqrt function works as expected."""
+
+    def generate_cov(n):
+        data = B.randn(2 * n, n)
+        return (data - data.mean(dim=0)).T @ (data - data.mean(dim=0))
+
+    cov1 = generate_cov(matrix_size)
+    cov2 = generate_cov(matrix_size)
+
+    scipy_res = scipy_sqrtm((cov1 @ cov2).numpy()).real
+    tm_res = sqrtm(cov1 @ cov2)
+    assert B.allclose(B.tensor(scipy_res).float(), tm_res, atol=1e-3)
+
+
+@pytest.mark.skipif(not _TORCH_FIDELITY_AVAILABLE, reason="test requires torch-fidelity")
+def test_no_train():
+    """Assert that metric never leaves evaluation mode."""
+
+    class MyModel(B.nn.Module):
+        def __init__(self):
+            super().__init__()
+            self.metric = FID()
+
+        def forward(self, x):
+            return x
+
+    model = MyModel()
+    model.train()
+    assert model.training
+    assert not model.metric.inception.training, "FID metric was changed to training mode which should not happen"
+
+
+@pytest.mark.skipif(not _TORCH_FIDELITY_AVAILABLE, reason="test requires torch-fidelity")
+def test_fid_pickle():
+    """Assert that we can initialize the metric and pickle it."""
+    metric = FID()
+    assert metric
+
+    # verify metrics work after being loaded from pickled state
+    pickled_metric = pickle.dumps(metric)
+    metric = pickle.loads(pickled_metric)
+
+
+def test_fid_raises_errors_and_warnings():
+    """Test that expected warnings and errors are raised."""
+    with pytest.warns(
+        UserWarning,
+        match="Metric `FID` will save all extracted features in buffer."
+        " For large datasets this may lead to large memory footprint.",
+    ):
+        _ = FID()
+
+    if _TORCH_FIDELITY_AVAILABLE:
+        with pytest.raises(ValueError, match="Integer input to argument `feature` must be one of .*"):
+            _ = FID(feature=2)
+    else:
+        with pytest.raises(
+            ValueError,
+            match="FID metric requires that Torch-fidelity is installed."
+            "Either install as `pip install paddlemetrics[image-quality]`"
+            " or `pip install torch-fidelity`",
+        ):
+            _ = FID()
+
+    with pytest.raises(TypeError, match="Got unknown input to argument `feature`"):
+        _ = FID(feature=[1, 2])
+
+
+@pytest.mark.skipif(not _TORCH_FIDELITY_AVAILABLE, reason="test requires torch-fidelity")
+@pytest.mark.parametrize("feature", [64, 192, 768, 2048])
+def test_fid_same_input(feature):
+    """if real and fake are update on the same data the fid score should be
+    0."""
+    metric = FID(feature=feature)
+
+    for _ in range(2):
+        img = B.randint(0, 255, (10, 3, 299, 299), dtype=B.uint8)
+        metric.update(img, real=True)
+        metric.update(img, real=False)
+
+    assert B.allclose(B.cat(metric.real_features, dim=0), B.cat(metric.fake_features, dim=0))
+
+    val = metric.compute()
+    assert B.allclose(val, B.zeros_like(val), atol=1e-3)
+
+
+class _ImgDataset(Dataset):
+    def __init__(self, imgs):
+        self.imgs = imgs
+
+    def __getitem__(self, idx):
+        return self.imgs[idx]
+
+    def __len__(self):
+        return self.imgs.shape[0]
+
+
+@pytest.mark.skipif(not B.cuda.is_available(), reason="test is too slow without gpu")
+@pytest.mark.skipif(not _TORCH_FIDELITY_AVAILABLE, reason="test requires torch-fidelity")
+def test_compare_fid(tmpdir, feature=2048):
+    """check that the hole pipeline give the same result as torch-fidelity."""
+    from torch_fidelity import calculate_metrics
+
+    metric = FID(feature=feature).cuda()
+
+    # Generate some synthetic data
+    img1 = B.randint(0, 180, (100, 3, 299, 299), dtype=B.uint8)
+    img2 = B.randint(100, 255, (100, 3, 299, 299), dtype=B.uint8)
+
+    batch_size = 10
+    for i in range(img1.shape[0] // batch_size):
+        metric.update(img1[batch_size * i : batch_size * (i + 1)].cuda(), real=True)
+
+    for i in range(img2.shape[0] // batch_size):
+        metric.update(img2[batch_size * i : batch_size * (i + 1)].cuda(), real=False)
+
+    torch_fid = calculate_metrics(
+        input1=_ImgDataset(img1),
+        input2=_ImgDataset(img2),
+        fid=True,
+        feature_layer_fid=str(feature),
+        batch_size=batch_size,
+        save_cpu_ram=True,
+    )
+
+    tm_res = metric.compute()
+
+    assert B.allclose(tm_res.cpu(), B.tensor([torch_fid["frechet_inception_distance"]]), atol=1e-3)
diff --git a/RE/paddlemetric/src/tests/image/test_inception.py b/RE/paddlemetric/src/tests/image/test_inception.py
new file mode 100644
index 00000000..4bfd5db6
--- /dev/null
+++ b/RE/paddlemetric/src/tests/image/test_inception.py
@@ -0,0 +1,125 @@
+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import pickle
+
+import pytest
+import paddleext.torchapi as B
+from B.utils.data import Dataset
+
+from paddlemetrics.image.inception import IS
+from paddlemetrics.utilities.imports import _TORCH_FIDELITY_AVAILABLE
+
+B.manual_seed(42)
+
+
+@pytest.mark.skipif(not _TORCH_FIDELITY_AVAILABLE, reason="test requires torch-fidelity")
+def test_no_train():
+    """Assert that metric never leaves evaluation mode."""
+
+    class MyModel(B.nn.Module):
+        def __init__(self):
+            super().__init__()
+            self.metric = IS()
+
+        def forward(self, x):
+            return x
+
+    model = MyModel()
+    model.train()
+    assert model.training
+    assert not model.metric.inception.training, "IS metric was changed to training mode which should not happen"
+
+
+@pytest.mark.skipif(not _TORCH_FIDELITY_AVAILABLE, reason="test requires torch-fidelity")
+def test_is_pickle():
+    """Assert that we can initialize the metric and pickle it."""
+    metric = IS()
+    assert metric
+
+    # verify metrics work after being loaded from pickled state
+    pickled_metric = pickle.dumps(metric)
+    metric = pickle.loads(pickled_metric)
+
+
+def test_is_raises_errors_and_warnings():
+    """Test that expected warnings and errors are raised."""
+    with pytest.warns(
+        UserWarning,
+        match="Metric `IS` will save all extracted features in buffer."
+        " For large datasets this may lead to large memory footprint.",
+    ):
+        IS()
+
+    if _TORCH_FIDELITY_AVAILABLE:
+        with pytest.raises(ValueError, match="Integer input to argument `feature` must be one of .*"):
+            _ = IS(feature=2)
+    else:
+        with pytest.raises(
+            ValueError,
+            match="IS metric requires that Torch-fidelity is installed."
+            "Either install as `pip install paddlemetrics[image-quality]`"
+            " or `pip install torch-fidelity`",
+        ):
+            IS()
+
+    with pytest.raises(TypeError, match="Got unknown input to argument `feature`"):
+        IS(feature=[1, 2])
+
+
+@pytest.mark.skipif(not _TORCH_FIDELITY_AVAILABLE, reason="test requires torch-fidelity")
+def test_is_update_compute():
+    metric = IS()
+
+    for _ in range(2):
+        img = B.randint(0, 255, (10, 3, 299, 299), dtype=B.uint8)
+        metric.update(img)
+
+    mean, std = metric.compute()
+    assert mean >= 0.0
+    assert std >= 0.0
+
+
+class _ImgDataset(Dataset):
+    def __init__(self, imgs):
+        self.imgs = imgs
+
+    def __getitem__(self, idx):
+        return self.imgs[idx]
+
+    def __len__(self):
+        return self.imgs.shape[0]
+
+
+@pytest.mark.skipif(not B.cuda.is_available(), reason="test is too slow without gpu")
+@pytest.mark.skipif(not _TORCH_FIDELITY_AVAILABLE, reason="test requires torch-fidelity")
+def test_compare_is(tmpdir):
+    """check that the hole pipeline give the same result as torch-fidelity."""
+    from torch_fidelity import calculate_metrics
+
+    metric = IS(splits=1).cuda()
+
+    # Generate some synthetic data
+    img1 = B.randint(0, 255, (100, 3, 299, 299), dtype=B.uint8)
+
+    batch_size = 10
+    for i in range(img1.shape[0] // batch_size):
+        metric.update(img1[batch_size * i : batch_size * (i + 1)].cuda())
+
+    torch_fid = calculate_metrics(
+        input1=_ImgDataset(img1), isc=True, isc_splits=1, batch_size=batch_size, save_cpu_ram=True
+    )
+
+    tm_mean, _ = metric.compute()
+
+    assert B.allclose(tm_mean.cpu(), B.tensor([torch_fid["inception_score_mean"]]), atol=1e-3)
diff --git a/RE/paddlemetric/src/tests/image/test_kid.py b/RE/paddlemetric/src/tests/image/test_kid.py
new file mode 100644
index 00000000..586c5f04
--- /dev/null
+++ b/RE/paddlemetric/src/tests/image/test_kid.py
@@ -0,0 +1,166 @@
+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import pickle
+
+import pytest
+import paddleext.torchapi as B
+from B.utils.data import Dataset
+
+from paddlemetrics.image.kid import KID
+from paddlemetrics.utilities.imports import _TORCH_FIDELITY_AVAILABLE
+
+B.manual_seed(42)
+
+
+@pytest.mark.skipif(not _TORCH_FIDELITY_AVAILABLE, reason="test requires torch-fidelity")
+def test_no_train():
+    """Assert that metric never leaves evaluation mode."""
+
+    class MyModel(B.nn.Module):
+        def __init__(self):
+            super().__init__()
+            self.metric = KID()
+
+        def forward(self, x):
+            return x
+
+    model = MyModel()
+    model.train()
+    assert model.training
+    assert not model.metric.inception.training, "FID metric was changed to training mode which should not happen"
+
+
+@pytest.mark.skipif(not _TORCH_FIDELITY_AVAILABLE, reason="test requires torch-fidelity")
+def test_kid_pickle():
+    """Assert that we can initialize the metric and pickle it."""
+    metric = KID()
+    assert metric
+
+    # verify metrics work after being loaded from pickled state
+    pickled_metric = pickle.dumps(metric)
+    metric = pickle.loads(pickled_metric)
+
+
+def test_kid_raises_errors_and_warnings():
+    """Test that expected warnings and errors are raised."""
+    with pytest.warns(
+        UserWarning,
+        match="Metric `KID` will save all extracted features in buffer."
+        " For large datasets this may lead to large memory footprint.",
+    ):
+        KID()
+
+    if _TORCH_FIDELITY_AVAILABLE:
+        with pytest.raises(ValueError, match="Integer input to argument `feature` must be one of .*"):
+            KID(feature=2)
+    else:
+        with pytest.raises(
+            ValueError,
+            match="KID metric requires that Torch-fidelity is installed."
+            "Either install as `pip install paddlemetrics[image]`"
+            " or `pip install torch-fidelity`",
+        ):
+            KID()
+
+    with pytest.raises(TypeError, match="Got unknown input to argument `feature`"):
+        KID(feature=[1, 2])
+
+    with pytest.raises(ValueError, match="Argument `subset_size` should be smaller than the number of samples"):
+        m = KID()
+        m.update(B.randint(0, 255, (5, 3, 299, 299), dtype=B.uint8), real=True)
+        m.update(B.randint(0, 255, (5, 3, 299, 299), dtype=B.uint8), real=False)
+        m.compute()
+
+
+@pytest.mark.skipif(not _TORCH_FIDELITY_AVAILABLE, reason="test requires torch-fidelity")
+def test_kid_extra_parameters():
+    with pytest.raises(ValueError, match="Argument `subsets` expected to be integer larger than 0"):
+        KID(subsets=-1)
+
+    with pytest.raises(ValueError, match="Argument `subset_size` expected to be integer larger than 0"):
+        KID(subset_size=-1)
+
+    with pytest.raises(ValueError, match="Argument `degree` expected to be integer larger than 0"):
+        KID(degree=-1)
+
+    with pytest.raises(ValueError, match="Argument `gamma` expected to be `None` or float larger than 0"):
+        KID(gamma=-1)
+
+    with pytest.raises(ValueError, match="Argument `coef` expected to be float larger than 0"):
+        KID(coef=-1)
+
+
+@pytest.mark.skipif(not _TORCH_FIDELITY_AVAILABLE, reason="test requires torch-fidelity")
+@pytest.mark.parametrize("feature", [64, 192, 768, 2048])
+def test_kid_same_input(feature):
+    """test that the metric works."""
+    metric = KID(feature=feature, subsets=5, subset_size=2)
+
+    for _ in range(2):
+        img = B.randint(0, 255, (10, 3, 299, 299), dtype=B.uint8)
+        metric.update(img, real=True)
+        metric.update(img, real=False)
+
+    assert B.allclose(B.cat(metric.real_features, dim=0), B.cat(metric.fake_features, dim=0))
+
+    mean, std = metric.compute()
+    assert mean != 0.0
+    assert std >= 0.0
+
+
+class _ImgDataset(Dataset):
+    def __init__(self, imgs):
+        self.imgs = imgs
+
+    def __getitem__(self, idx):
+        return self.imgs[idx]
+
+    def __len__(self):
+        return self.imgs.shape[0]
+
+
+@pytest.mark.skipif(not B.cuda.is_available(), reason="test is too slow without gpu")
+@pytest.mark.skipif(not _TORCH_FIDELITY_AVAILABLE, reason="test requires torch-fidelity")
+def test_compare_kid(tmpdir, feature=2048):
+    """check that the hole pipeline give the same result as torch-fidelity."""
+    from torch_fidelity import calculate_metrics
+
+    metric = KID(feature=feature, subsets=1, subset_size=100).cuda()
+
+    # Generate some synthetic data
+    img1 = B.randint(0, 180, (100, 3, 299, 299), dtype=B.uint8)
+    img2 = B.randint(100, 255, (100, 3, 299, 299), dtype=B.uint8)
+
+    batch_size = 10
+    for i in range(img1.shape[0] // batch_size):
+        metric.update(img1[batch_size * i : batch_size * (i + 1)].cuda(), real=True)
+
+    for i in range(img2.shape[0] // batch_size):
+        metric.update(img2[batch_size * i : batch_size * (i + 1)].cuda(), real=False)
+
+    torch_fid = calculate_metrics(
+        input1=_ImgDataset(img1),
+        input2=_ImgDataset(img2),
+        kid=True,
+        feature_layer_fid=str(feature),
+        batch_size=batch_size,
+        kid_subsets=1,
+        kid_subset_size=100,
+        save_cpu_ram=True,
+    )
+
+    tm_mean, tm_std = metric.compute()
+
+    assert B.allclose(tm_mean.cpu(), B.tensor([torch_fid["kernel_inception_distance_mean"]]), atol=1e-3)
+    assert B.allclose(tm_std.cpu(), B.tensor([torch_fid["kernel_inception_distance_std"]]), atol=1e-3)
diff --git a/RE/paddlemetric/src/tests/image/test_lpips.py b/RE/paddlemetric/src/tests/image/test_lpips.py
new file mode 100644
index 00000000..fd3e83a1
--- /dev/null
+++ b/RE/paddlemetric/src/tests/image/test_lpips.py
@@ -0,0 +1,103 @@
+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from collections import namedtuple
+from functools import partial
+
+import pytest
+import paddleext.torchapi as B
+from lpips import LPIPS as reference_LPIPS
+from paddleext.torchapi import Tensor
+
+from tests.helpers import seed_all
+from tests.helpers.testers import BATCH_SIZE, NUM_BATCHES, MetricTester
+from paddlemetrics.image.lpip_similarity import LPIPS
+from paddlemetrics.utilities.imports import _LPIPS_AVAILABLE
+
+seed_all(42)
+
+Input = namedtuple("Input", ["img1", "img2"])
+
+_inputs = Input(
+    img1=B.rand(int(NUM_BATCHES * 0.4), int(BATCH_SIZE / 16), 3, 100, 100),
+    img2=B.rand(int(NUM_BATCHES * 0.4), int(BATCH_SIZE / 16), 3, 100, 100),
+)
+
+
+def _compare_fn(img1: Tensor, img2: Tensor, net_type: str, reduction: str = "mean") -> Tensor:
+    """comparison function for tm implementation."""
+    ref = reference_LPIPS(net=net_type)
+    res = ref(img1, img2).detach().cpu().numpy()
+    if reduction == "mean":
+        return res.mean()
+    return res.sum()
+
+
+@pytest.mark.skipif(not _LPIPS_AVAILABLE, reason="test requires that lpips is installed")
+@pytest.mark.parametrize("net_type", ["vgg", "alex", "squeeze"])
+class TestLPIPS(MetricTester):
+    @pytest.mark.parametrize("ddp", [True, False])
+    def test_lpips(self, net_type, ddp):
+        """test modular implementation for correctness."""
+        self.run_class_metric_test(
+            ddp=ddp,
+            preds=_inputs.img1,
+            target=_inputs.img2,
+            metric_class=LPIPS,
+            sk_metric=partial(_compare_fn, net_type=net_type),
+            dist_sync_on_step=False,
+            check_scriptable=False,
+            metric_args={"net_type": net_type},
+        )
+
+    def test_lpips_differentiability(self, net_type):
+        """test for differentiability of LPIPS metric."""
+        self.run_differentiability_test(preds=_inputs.img1, target=_inputs.img2, metric_module=LPIPS)
+
+    # LPIPS half + cpu does not work due to missing support in B.min
+    @pytest.mark.xfail(reason="PearsonCorrcoef metric does not support cpu + half precision")
+    def test_lpips_half_cpu(self, net_type):
+        """test for half + cpu support."""
+        self.run_precision_test_cpu(_inputs.img1, _inputs.img2, LPIPS)
+
+    @pytest.mark.skipif(not B.cuda.is_available(), reason="test requires cuda")
+    def test_lpips_half_gpu(self, net_type):
+        """test for half + gpu support."""
+        self.run_precision_test_gpu(_inputs.img1, _inputs.img2, LPIPS)
+
+
+@pytest.mark.skipif(not _LPIPS_AVAILABLE, reason="test requires that lpips is installed")
+def test_error_on_wrong_init():
+    """Test class raises the expected errors."""
+    with pytest.raises(ValueError, match="Argument `net_type` must be one .*"):
+        LPIPS(net_type="resnet")
+
+    with pytest.raises(ValueError, match="Argument `reduction` must be one .*"):
+        LPIPS(reduction=None)
+
+
+@pytest.mark.skipif(not _LPIPS_AVAILABLE, reason="test requires that lpips is installed")
+@pytest.mark.parametrize(
+    "inp1, inp2",
+    [
+        (B.rand(1, 1, 28, 28), B.rand(1, 3, 28, 28)),  # wrong number of channels
+        (B.rand(1, 3, 28, 28), B.rand(1, 1, 28, 28)),  # wrong number of channels
+        (B.randn(1, 3, 28, 28), B.rand(1, 3, 28, 28)),  # non-normalized input
+        (B.rand(1, 3, 28, 28), B.randn(1, 3, 28, 28)),  # non-normalized input
+    ],
+)
+def test_error_on_wrong_update(inp1, inp2):
+    """test error is raised on wrong input to update method."""
+    metric = LPIPS()
+    with pytest.raises(ValueError, match="Expected both input arguments to be normalized tensors .*"):
+        metric(inp1, inp2)
diff --git a/RE/paddlemetric/src/tests/image/test_psnr.py b/RE/paddlemetric/src/tests/image/test_psnr.py
new file mode 100644
index 00000000..cc8b857a
--- /dev/null
+++ b/RE/paddlemetric/src/tests/image/test_psnr.py
@@ -0,0 +1,149 @@
+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from collections import namedtuple
+from functools import partial
+
+import numpy as np
+import pytest
+import paddleext.torchapi as B
+from skimage.metrics import peak_signal_noise_ratio
+
+from tests.helpers import seed_all
+from tests.helpers.testers import BATCH_SIZE, NUM_BATCHES, MetricTester
+from paddlemetrics.functional import psnr
+from paddlemetrics.image import PSNR
+
+seed_all(42)
+
+Input = namedtuple("Input", ["preds", "target"])
+
+_input_size = (NUM_BATCHES, BATCH_SIZE, 32, 32)
+_inputs = [
+    Input(
+        preds=B.randint(n_cls_pred, _input_size, dtype=B.float),
+        target=B.randint(n_cls_target, _input_size, dtype=B.float),
+    )
+    for n_cls_pred, n_cls_target in [(10, 10), (5, 10), (10, 5)]
+]
+
+
+def _to_sk_peak_signal_noise_ratio_inputs(value, dim):
+    value = value.numpy()
+    batches = value[None] if value.ndim == len(_input_size) - 1 else value
+
+    if dim is None:
+        return [batches]
+
+    num_dims = np.size(dim)
+    if not num_dims:
+        return batches
+
+    inputs = []
+    for batch in batches:
+        batch = np.moveaxis(batch, dim, np.arange(-num_dims, 0))
+        psnr_input_shape = batch.shape[-num_dims:]
+        inputs.extend(batch.reshape(-1, *psnr_input_shape))
+    return inputs
+
+
+def _sk_psnr(preds, target, data_range, reduction, dim):
+    sk_preds_lists = _to_sk_peak_signal_noise_ratio_inputs(preds, dim=dim)
+    sk_target_lists = _to_sk_peak_signal_noise_ratio_inputs(target, dim=dim)
+    np_reduce_map = {"elementwise_mean": np.mean, "none": np.array, "sum": np.sum}
+    return np_reduce_map[reduction](
+        [
+            peak_signal_noise_ratio(sk_target, sk_preds, data_range=data_range)
+            for sk_target, sk_preds in zip(sk_target_lists, sk_preds_lists)
+        ]
+    )
+
+
+def _base_e_sk_psnr(preds, target, data_range, reduction, dim):
+    return _sk_psnr(preds, target, data_range, reduction, dim) * np.log(10)
+
+
+@pytest.mark.parametrize(
+    "preds, target, data_range, reduction, dim",
+    [
+        (_inputs[0].preds, _inputs[0].target, 10, "elementwise_mean", None),
+        (_inputs[1].preds, _inputs[1].target, 10, "elementwise_mean", None),
+        (_inputs[2].preds, _inputs[2].target, 5, "elementwise_mean", None),
+        (_inputs[2].preds, _inputs[2].target, 5, "elementwise_mean", 1),
+        (_inputs[2].preds, _inputs[2].target, 5, "elementwise_mean", (1, 2)),
+        (_inputs[2].preds, _inputs[2].target, 5, "sum", (1, 2)),
+    ],
+)
+@pytest.mark.parametrize(
+    "base, sk_metric",
+    [
+        (10.0, _sk_psnr),
+        (2.718281828459045, _base_e_sk_psnr),
+    ],
+)
+class TestPSNR(MetricTester):
+    @pytest.mark.parametrize("ddp", [True, False])
+    @pytest.mark.parametrize("dist_sync_on_step", [True, False])
+    def test_psnr(self, preds, target, data_range, base, reduction, dim, sk_metric, ddp, dist_sync_on_step):
+        _args = {"data_range": data_range, "base": base, "reduction": reduction, "dim": dim}
+        self.run_class_metric_test(
+            ddp,
+            preds,
+            target,
+            PSNR,
+            partial(sk_metric, data_range=data_range, reduction=reduction, dim=dim),
+            metric_args=_args,
+            dist_sync_on_step=dist_sync_on_step,
+        )
+
+    def test_psnr_functional(self, preds, target, sk_metric, data_range, base, reduction, dim):
+        _args = {"data_range": data_range, "base": base, "reduction": reduction, "dim": dim}
+        self.run_functional_metric_test(
+            preds,
+            target,
+            psnr,
+            partial(sk_metric, data_range=data_range, reduction=reduction, dim=dim),
+            metric_args=_args,
+        )
+
+    # PSNR half + cpu does not work due to missing support in B.log
+    @pytest.mark.xfail(reason="PSNR metric does not support cpu + half precision")
+    def test_psnr_half_cpu(self, preds, target, data_range, reduction, dim, base, sk_metric):
+        self.run_precision_test_cpu(
+            preds, target, PSNR, psnr, {"data_range": data_range, "base": base, "reduction": reduction, "dim": dim}
+        )
+
+    @pytest.mark.skipif(not B.cuda.is_available(), reason="test requires cuda")
+    def test_psnr_half_gpu(self, preds, target, data_range, reduction, dim, base, sk_metric):
+        self.run_precision_test_gpu(
+            preds, target, PSNR, psnr, {"data_range": data_range, "base": base, "reduction": reduction, "dim": dim}
+        )
+
+
+@pytest.mark.parametrize("reduction", ["none", "sum"])
+def test_reduction_for_dim_none(reduction):
+    match = f"The `reduction={reduction}` will not have any effect when `dim` is None."
+    with pytest.warns(UserWarning, match=match):
+        PSNR(reduction=reduction, dim=None)
+
+    with pytest.warns(UserWarning, match=match):
+        psnr(_inputs[0].preds, _inputs[0].target, reduction=reduction, dim=None)
+
+
+def test_missing_data_range():
+    with pytest.raises(ValueError):
+        PSNR(data_range=None, dim=0)
+
+    with pytest.raises(ValueError):
+        psnr(_inputs[0].preds, _inputs[0].target, data_range=None, dim=0)
diff --git a/RE/paddlemetric/src/tests/image/test_ssim.py b/RE/paddlemetric/src/tests/image/test_ssim.py
new file mode 100644
index 00000000..d249db0d
--- /dev/null
+++ b/RE/paddlemetric/src/tests/image/test_ssim.py
@@ -0,0 +1,167 @@
+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from collections import namedtuple
+from functools import partial
+
+import pytest
+import paddleext.torchapi as B
+from skimage.metrics import structural_similarity
+
+from tests.helpers import seed_all
+from tests.helpers.testers import BATCH_SIZE, NUM_BATCHES, MetricTester
+from paddlemetrics.functional import ssim
+from paddlemetrics.image import SSIM
+
+seed_all(42)
+
+Input = namedtuple("Input", ["preds", "target", "multichannel"])
+
+_inputs = []
+for size, channel, coef, multichannel, dtype in [
+    (12, 3, 0.9, True, B.float),
+    (13, 1, 0.8, False, B.float32),
+    (14, 1, 0.7, False, B.double),
+    (15, 3, 0.6, True, B.float64),
+]:
+    preds = B.rand(NUM_BATCHES, BATCH_SIZE, channel, size, size, dtype=dtype)
+    _inputs.append(
+        Input(
+            preds=preds,
+            target=preds * coef,
+            multichannel=multichannel,
+        )
+    )
+
+
+def _sk_ssim(preds, target, data_range, multichannel, kernel_size):
+    c, h, w = preds.shape[-3:]
+    sk_preds = preds.view(-1, c, h, w).permute(0, 2, 3, 1).numpy()
+    sk_target = target.view(-1, c, h, w).permute(0, 2, 3, 1).numpy()
+    if not multichannel:
+        sk_preds = sk_preds[:, :, :, 0]
+        sk_target = sk_target[:, :, :, 0]
+
+    return structural_similarity(
+        sk_target,
+        sk_preds,
+        data_range=data_range,
+        multichannel=multichannel,
+        gaussian_weights=True,
+        win_size=kernel_size,
+        sigma=1.5,
+        use_sample_covariance=False,
+    )
+
+
+@pytest.mark.parametrize(
+    "preds, target, multichannel",
+    [(i.preds, i.target, i.multichannel) for i in _inputs],
+)
+@pytest.mark.parametrize("kernel_size", [5, 11])
+class TestSSIM(MetricTester):
+    atol = 6e-3
+
+    @pytest.mark.parametrize("ddp", [True, False])
+    @pytest.mark.parametrize("dist_sync_on_step", [True, False])
+    def test_ssim(self, preds, target, multichannel, kernel_size, ddp, dist_sync_on_step):
+        self.run_class_metric_test(
+            ddp,
+            preds,
+            target,
+            SSIM,
+            partial(_sk_ssim, data_range=1.0, multichannel=multichannel, kernel_size=kernel_size),
+            metric_args={"data_range": 1.0, "kernel_size": (kernel_size, kernel_size)},
+            dist_sync_on_step=dist_sync_on_step,
+        )
+
+    def test_ssim_functional(self, preds, target, multichannel, kernel_size):
+        self.run_functional_metric_test(
+            preds,
+            target,
+            ssim,
+            partial(_sk_ssim, data_range=1.0, multichannel=multichannel, kernel_size=kernel_size),
+            metric_args={"data_range": 1.0, "kernel_size": (kernel_size, kernel_size)},
+        )
+
+    # SSIM half + cpu does not work due to missing support in B.log
+    @pytest.mark.xfail(reason="SSIM metric does not support cpu + half precision")
+    def test_ssim_half_cpu(self, preds, target, multichannel, kernel_size):
+        self.run_precision_test_cpu(preds, target, SSIM, ssim, {"data_range": 1.0})
+
+    @pytest.mark.skipif(not B.cuda.is_available(), reason="test requires cuda")
+    def test_ssim_half_gpu(self, preds, target, multichannel, kernel_size):
+        self.run_precision_test_gpu(preds, target, SSIM, ssim, {"data_range": 1.0})
+
+
+@pytest.mark.parametrize(
+    ["pred", "target", "kernel", "sigma"],
+    [
+        pytest.param([1, 16, 16], [1, 16, 16], [11, 11], [1.5, 1.5]),  # len(shape)
+        pytest.param([1, 1, 16, 16], [1, 1, 16, 16], [11, 11], [1.5]),  # len(kernel), len(sigma)
+        pytest.param([1, 1, 16, 16], [1, 1, 16, 16], [11], [1.5, 1.5]),  # len(kernel), len(sigma)
+        pytest.param([1, 1, 16, 16], [1, 1, 16, 16], [11], [1.5]),  # len(kernel), len(sigma)
+        pytest.param([1, 1, 16, 16], [1, 1, 16, 16], [11, 0], [1.5, 1.5]),  # invalid kernel input
+        pytest.param([1, 1, 16, 16], [1, 1, 16, 16], [11, 10], [1.5, 1.5]),  # invalid kernel input
+        pytest.param([1, 1, 16, 16], [1, 1, 16, 16], [11, -11], [1.5, 1.5]),  # invalid kernel input
+        pytest.param([1, 1, 16, 16], [1, 1, 16, 16], [11, 11], [1.5, 0]),  # invalid sigma input
+        pytest.param([1, 1, 16, 16], [1, 1, 16, 16], [11, 0], [1.5, -1.5]),  # invalid sigma input
+    ],
+)
+def test_ssim_invalid_inputs(pred, target, kernel, sigma):
+    pred_t = B.rand(pred)
+    target_t = B.rand(target, dtype=B.float64)
+    with pytest.raises(TypeError):
+        ssim(pred_t, target_t)
+
+    pred = B.rand(pred)
+    target = B.rand(target)
+    with pytest.raises(ValueError):
+        ssim(pred, target, kernel, sigma)
+
+
+def test_ssim_unequal_kernel_size():
+    """Test the case where kernel_size[0] != kernel_size[1]"""
+    preds = B.tensor(
+        [
+            [
+                [
+                    [1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0],
+                    [1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0],
+                    [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
+                    [0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0],
+                    [0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0],
+                    [0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0],
+                    [1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0],
+                ]
+            ]
+        ]
+    )
+    target = B.tensor(
+        [
+            [
+                [
+                    [1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0],
+                    [0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0],
+                    [1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0],
+                    [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0],
+                    [1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0],
+                    [0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0],
+                    [0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0],
+                ]
+            ]
+        ]
+    )
+    # kernel order matters
+    assert ssim(preds, target, kernel_size=(3, 5)) == B.tensor(0.10814697)
+    assert ssim(preds, target, kernel_size=(5, 3)) != B.tensor(0.10814697)
diff --git a/RE/paddlemetric/src/tests/pairwise/__init__.py b/RE/paddlemetric/src/tests/pairwise/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/RE/paddlemetric/src/tests/pairwise/test_pairwise_distance.py b/RE/paddlemetric/src/tests/pairwise/test_pairwise_distance.py
new file mode 100644
index 00000000..5a642a60
--- /dev/null
+++ b/RE/paddlemetric/src/tests/pairwise/test_pairwise_distance.py
@@ -0,0 +1,121 @@
+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from collections import namedtuple
+from functools import partial
+
+import pytest
+import paddleext.torchapi as B
+from sklearn.metrics.pairwise import cosine_similarity, euclidean_distances, linear_kernel, manhattan_distances
+
+from tests.helpers import seed_all
+from tests.helpers.testers import BATCH_SIZE, NUM_BATCHES, MetricTester
+from paddlemetrics.functional import (
+    pairwise_cosine_similarity,
+    pairwise_euclidean_distance,
+    pairwise_linear_similarity,
+    pairwise_manhatten_distance,
+)
+from paddlemetrics.utilities.imports import _TORCH_GREATER_EQUAL_1_7
+
+seed_all(42)
+
+extra_dim = 5
+
+Input = namedtuple("Input", ["x", "y"])
+
+
+_inputs1 = Input(
+    x=B.rand(NUM_BATCHES, BATCH_SIZE, extra_dim),
+    y=B.rand(NUM_BATCHES, BATCH_SIZE, extra_dim),
+)
+
+
+_inputs2 = Input(
+    x=B.rand(NUM_BATCHES, BATCH_SIZE, extra_dim),
+    y=B.rand(NUM_BATCHES, BATCH_SIZE, extra_dim),
+)
+
+
+def _sk_metric(x, y, sk_fn, reduction):
+    """comparison function."""
+    x = x.view(-1, extra_dim).numpy()
+    y = y.view(-1, extra_dim).numpy()
+    res = sk_fn(x, y)
+    if reduction == "sum":
+        return res.sum(axis=-1)
+    elif reduction == "mean":
+        return res.mean(axis=-1)
+    return res
+
+
+@pytest.mark.parametrize(
+    "x, y",
+    [
+        (_inputs1.x, _inputs1.y),
+        (_inputs2.x, _inputs2.y),
+    ],
+)
+@pytest.mark.parametrize(
+    "metric_functional, sk_fn",
+    [
+        (pairwise_cosine_similarity, cosine_similarity),
+        (pairwise_euclidean_distance, euclidean_distances),
+        (pairwise_manhatten_distance, manhattan_distances),
+        (pairwise_linear_similarity, linear_kernel),
+    ],
+)
+@pytest.mark.parametrize("reduction", ["sum", "mean", None])
+class TestPairwise(MetricTester):
+    """test pairwise implementations."""
+
+    atol = 1e-4
+
+    def test_pairwise_functional(self, x, y, metric_functional, sk_fn, reduction):
+        """test functional pairwise implementations."""
+        self.run_functional_metric_test(
+            preds=x,
+            target=y,
+            metric_functional=metric_functional,
+            sk_metric=partial(_sk_metric, sk_fn=sk_fn, reduction=reduction),
+            metric_args={"reduction": reduction},
+        )
+
+    @pytest.mark.skipif(
+        not _TORCH_GREATER_EQUAL_1_7, reason="half support of core operations on not support before pytorch v1.7"
+    )
+    def test_pairwise_half_cpu(self, x, y, metric_functional, sk_fn, reduction):
+        """test half precision support on cpu."""
+        if metric_functional == pairwise_euclidean_distance:
+            pytest.xfail("pairwise_euclidean_distance metric does not support cpu + half precision")
+        self.run_precision_test_cpu(x, y, None, metric_functional, metric_args={"reduction": reduction})
+
+    @pytest.mark.skipif(not B.cuda.is_available(), reason="test requires cuda")
+    def test_pairwise_half_gpu(self, x, y, metric_functional, sk_fn, reduction):
+        """test half precision support on gpu."""
+        self.run_precision_test_gpu(x, y, None, metric_functional, metric_args={"reduction": reduction})
+
+
+@pytest.mark.parametrize(
+    "metric", [pairwise_cosine_similarity, pairwise_euclidean_distance, pairwise_manhatten_distance]
+)
+def test_error_on_wrong_shapes(metric):
+    """Test errors are raised on wrong input."""
+    with pytest.raises(ValueError, match="Expected argument `x` to be a 2D tensor .*"):
+        metric(B.randn(10))
+
+    with pytest.raises(ValueError, match="Expected argument `y` to be a 2D tensor .*"):
+        metric(B.randn(10, 5), B.randn(5, 3))
+
+    with pytest.raises(ValueError, match="Expected reduction to be one of .*"):
+        metric(B.randn(10, 5), B.randn(10, 5), reduction=1)
diff --git a/RE/paddlemetric/src/tests/regression/__init__.py b/RE/paddlemetric/src/tests/regression/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/RE/paddlemetric/src/tests/regression/test_cosine_similarity.py b/RE/paddlemetric/src/tests/regression/test_cosine_similarity.py
new file mode 100644
index 00000000..0821cc11
--- /dev/null
+++ b/RE/paddlemetric/src/tests/regression/test_cosine_similarity.py
@@ -0,0 +1,111 @@
+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from collections import namedtuple
+from functools import partial
+
+import numpy as np
+import pytest
+import paddleext.torchapi as B
+from sklearn.metrics.pairwise import cosine_similarity as sk_cosine
+
+from tests.helpers import seed_all
+from tests.helpers.testers import BATCH_SIZE, NUM_BATCHES, MetricTester
+from paddlemetrics.functional.regression.cosine_similarity import cosine_similarity
+from paddlemetrics.regression.cosine_similarity import CosineSimilarity
+
+seed_all(42)
+
+num_targets = 5
+
+Input = namedtuple("Input", ["preds", "target"])
+
+_single_target_inputs = Input(
+    preds=B.rand(NUM_BATCHES, BATCH_SIZE),
+    target=B.rand(NUM_BATCHES, BATCH_SIZE),
+)
+
+_multi_target_inputs = Input(
+    preds=B.rand(NUM_BATCHES, BATCH_SIZE, num_targets),
+    target=B.rand(NUM_BATCHES, BATCH_SIZE, num_targets),
+)
+
+
+def _multi_target_sk_metric(preds, target, reduction, sk_fn=sk_cosine):
+    sk_preds = preds.view(-1, num_targets).numpy()
+    sk_target = target.view(-1, num_targets).numpy()
+    result_array = sk_fn(sk_target, sk_preds)
+    col = np.diagonal(result_array)
+    col_sum = col.sum()
+    if reduction == "sum":
+        to_return = col_sum
+    elif reduction == "mean":
+        mean = col_sum / len(col)
+        to_return = mean
+    else:
+        to_return = col
+    return to_return
+
+
+def _single_target_sk_metric(preds, target, reduction, sk_fn=sk_cosine):
+    sk_preds = preds.view(-1).numpy()
+    sk_target = target.view(-1).numpy()
+    result_array = sk_fn(np.expand_dims(sk_preds, axis=0), np.expand_dims(sk_target, axis=0))
+    col = np.diagonal(result_array)
+    col_sum = col.sum()
+    if reduction == "sum":
+        to_return = col_sum
+    elif reduction == "mean":
+        mean = col_sum / len(col)
+        to_return = mean
+    else:
+        to_return = col
+    return to_return
+
+
+@pytest.mark.parametrize("reduction", ["sum", "mean"])
+@pytest.mark.parametrize(
+    "preds, target, sk_metric",
+    [
+        (_single_target_inputs.preds, _single_target_inputs.target, _single_target_sk_metric),
+        (_multi_target_inputs.preds, _multi_target_inputs.target, _multi_target_sk_metric),
+    ],
+)
+class TestCosineSimilarity(MetricTester):
+    @pytest.mark.parametrize("ddp", [True, False])
+    @pytest.mark.parametrize("dist_sync_on_step", [True, False])
+    def test_cosine_similarity(self, reduction, preds, target, sk_metric, ddp, dist_sync_on_step):
+        self.run_class_metric_test(
+            ddp,
+            preds,
+            target,
+            CosineSimilarity,
+            partial(sk_metric, reduction=reduction),
+            dist_sync_on_step,
+            metric_args=dict(reduction=reduction),
+        )
+
+    def test_cosine_similarity_functional(self, reduction, preds, target, sk_metric):
+        self.run_functional_metric_test(
+            preds,
+            target,
+            cosine_similarity,
+            partial(sk_metric, reduction=reduction),
+            metric_args=dict(reduction=reduction),
+        )
+
+
+def test_error_on_different_shape(metric_class=CosineSimilarity):
+    metric = metric_class()
+    with pytest.raises(RuntimeError, match="Predictions and targets are expected to have the same shape"):
+        metric(B.randn(100), B.randn(50))
diff --git a/RE/paddlemetric/src/tests/regression/test_explained_variance.py b/RE/paddlemetric/src/tests/regression/test_explained_variance.py
new file mode 100644
index 00000000..a227d0d4
--- /dev/null
+++ b/RE/paddlemetric/src/tests/regression/test_explained_variance.py
@@ -0,0 +1,110 @@
+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from collections import namedtuple
+from functools import partial
+
+import pytest
+import paddleext.torchapi as B
+from sklearn.metrics import explained_variance_score
+
+from tests.helpers import seed_all
+from tests.helpers.testers import BATCH_SIZE, NUM_BATCHES, MetricTester
+from paddlemetrics.functional import explained_variance
+from paddlemetrics.regression import ExplainedVariance
+from paddlemetrics.utilities.imports import _TORCH_GREATER_EQUAL_1_6
+
+seed_all(42)
+
+num_targets = 5
+
+Input = namedtuple("Input", ["preds", "target"])
+
+_single_target_inputs = Input(
+    preds=B.rand(NUM_BATCHES, BATCH_SIZE),
+    target=B.rand(NUM_BATCHES, BATCH_SIZE),
+)
+
+_multi_target_inputs = Input(
+    preds=B.rand(NUM_BATCHES, BATCH_SIZE, num_targets),
+    target=B.rand(NUM_BATCHES, BATCH_SIZE, num_targets),
+)
+
+
+def _single_target_sk_metric(preds, target, sk_fn=explained_variance_score):
+    sk_preds = preds.view(-1).numpy()
+    sk_target = target.view(-1).numpy()
+    return sk_fn(sk_target, sk_preds)
+
+
+def _multi_target_sk_metric(preds, target, sk_fn=explained_variance_score):
+    sk_preds = preds.view(-1, num_targets).numpy()
+    sk_target = target.view(-1, num_targets).numpy()
+    return sk_fn(sk_target, sk_preds)
+
+
+@pytest.mark.parametrize("multioutput", ["raw_values", "uniform_average", "variance_weighted"])
+@pytest.mark.parametrize(
+    "preds, target, sk_metric",
+    [
+        (_single_target_inputs.preds, _single_target_inputs.target, _single_target_sk_metric),
+        (_multi_target_inputs.preds, _multi_target_inputs.target, _multi_target_sk_metric),
+    ],
+)
+class TestExplainedVariance(MetricTester):
+    @pytest.mark.parametrize("ddp", [True, False])
+    @pytest.mark.parametrize("dist_sync_on_step", [True, False])
+    def test_explained_variance(self, multioutput, preds, target, sk_metric, ddp, dist_sync_on_step):
+        self.run_class_metric_test(
+            ddp,
+            preds,
+            target,
+            ExplainedVariance,
+            partial(sk_metric, sk_fn=partial(explained_variance_score, multioutput=multioutput)),
+            dist_sync_on_step,
+            metric_args=dict(multioutput=multioutput),
+        )
+
+    def test_explained_variance_functional(self, multioutput, preds, target, sk_metric):
+        self.run_functional_metric_test(
+            preds,
+            target,
+            explained_variance,
+            partial(sk_metric, sk_fn=partial(explained_variance_score, multioutput=multioutput)),
+            metric_args=dict(multioutput=multioutput),
+        )
+
+    def test_explained_variance_differentiability(self, multioutput, preds, target, sk_metric):
+        self.run_differentiability_test(
+            preds=preds,
+            target=target,
+            metric_module=ExplainedVariance,
+            metric_functional=explained_variance,
+            metric_args={"multioutput": multioutput},
+        )
+
+    @pytest.mark.skipif(
+        not _TORCH_GREATER_EQUAL_1_6, reason="half support of core operations on not support before pytorch v1.6"
+    )
+    def test_explained_variance_half_cpu(self, multioutput, preds, target, sk_metric):
+        self.run_precision_test_cpu(preds, target, ExplainedVariance, explained_variance)
+
+    @pytest.mark.skipif(not B.cuda.is_available(), reason="test requires cuda")
+    def test_explained_variance_half_gpu(self, multioutput, preds, target, sk_metric):
+        self.run_precision_test_gpu(preds, target, ExplainedVariance, explained_variance)
+
+
+def test_error_on_different_shape(metric_class=ExplainedVariance):
+    metric = metric_class()
+    with pytest.raises(RuntimeError, match="Predictions and targets are expected to have the same shape"):
+        metric(B.randn(100), B.randn(50))
diff --git a/RE/paddlemetric/src/tests/regression/test_mean_error.py b/RE/paddlemetric/src/tests/regression/test_mean_error.py
new file mode 100644
index 00000000..b9d9a31e
--- /dev/null
+++ b/RE/paddlemetric/src/tests/regression/test_mean_error.py
@@ -0,0 +1,177 @@
+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import math
+from collections import namedtuple
+from functools import partial
+
+import pytest
+import paddleext.torchapi as B
+from sklearn.metrics import mean_absolute_error as sk_mean_absolute_error
+from sklearn.metrics import mean_absolute_percentage_error as sk_mean_abs_percentage_error
+from sklearn.metrics import mean_squared_error as sk_mean_squared_error
+from sklearn.metrics import mean_squared_log_error as sk_mean_squared_log_error
+
+from tests.helpers import seed_all
+from tests.helpers.non_sklearn_metrics import (
+    symmetric_mean_absolute_percentage_error as sk_sym_mean_abs_percentage_error,
+)
+from tests.helpers.testers import BATCH_SIZE, NUM_BATCHES, MetricTester
+from paddlemetrics.functional import (
+    mean_absolute_error,
+    mean_absolute_percentage_error,
+    mean_squared_error,
+    mean_squared_log_error,
+)
+from paddlemetrics.functional.regression.symmetric_mean_absolute_percentage_error import (
+    symmetric_mean_absolute_percentage_error,
+)
+from paddlemetrics.regression import (
+    MeanAbsoluteError,
+    MeanAbsolutePercentageError,
+    MeanSquaredError,
+    MeanSquaredLogError,
+)
+from paddlemetrics.regression.symmetric_mean_absolute_percentage_error import SymmetricMeanAbsolutePercentageError
+from paddlemetrics.utilities.imports import _TORCH_GREATER_EQUAL_1_6
+
+seed_all(42)
+
+num_targets = 5
+
+Input = namedtuple("Input", ["preds", "target"])
+
+_single_target_inputs = Input(
+    preds=B.rand(NUM_BATCHES, BATCH_SIZE),
+    target=B.rand(NUM_BATCHES, BATCH_SIZE),
+)
+
+_multi_target_inputs = Input(
+    preds=B.rand(NUM_BATCHES, BATCH_SIZE, num_targets),
+    target=B.rand(NUM_BATCHES, BATCH_SIZE, num_targets),
+)
+
+
+def _single_target_sk_metric(preds, target, sk_fn, metric_args):
+    sk_preds = preds.view(-1).numpy()
+    sk_target = target.view(-1).numpy()
+
+    # `sk_target` and `sk_preds` switched to fix failing tests.
+    # For more info, check https://github.com/PyTorchLightning/metrics/pull/248#issuecomment-841232277
+    res = sk_fn(sk_target, sk_preds)
+
+    return math.sqrt(res) if (metric_args and not metric_args["squared"]) else res
+
+
+def _multi_target_sk_metric(preds, target, sk_fn, metric_args):
+    sk_preds = preds.view(-1, num_targets).numpy()
+    sk_target = target.view(-1, num_targets).numpy()
+
+    # `sk_target` and `sk_preds` switched to fix failing tests.
+    # For more info, check https://github.com/PyTorchLightning/metrics/pull/248#issuecomment-841232277
+    res = sk_fn(sk_target, sk_preds)
+
+    return math.sqrt(res) if (metric_args and not metric_args["squared"]) else res
+
+
+@pytest.mark.parametrize(
+    "preds, target, sk_metric",
+    [
+        (_single_target_inputs.preds, _single_target_inputs.target, _single_target_sk_metric),
+        (_multi_target_inputs.preds, _multi_target_inputs.target, _multi_target_sk_metric),
+    ],
+)
+@pytest.mark.parametrize(
+    "metric_class, metric_functional, sk_fn, metric_args",
+    [
+        (MeanSquaredError, mean_squared_error, sk_mean_squared_error, {"squared": True}),
+        (MeanSquaredError, mean_squared_error, sk_mean_squared_error, {"squared": False}),
+        (MeanAbsoluteError, mean_absolute_error, sk_mean_absolute_error, {}),
+        (MeanAbsolutePercentageError, mean_absolute_percentage_error, sk_mean_abs_percentage_error, {}),
+        (
+            SymmetricMeanAbsolutePercentageError,
+            symmetric_mean_absolute_percentage_error,
+            sk_sym_mean_abs_percentage_error,
+            {},
+        ),
+        (MeanSquaredLogError, mean_squared_log_error, sk_mean_squared_log_error, {}),
+    ],
+)
+class TestMeanError(MetricTester):
+    @pytest.mark.parametrize("ddp", [True, False])
+    @pytest.mark.parametrize("dist_sync_on_step", [True, False])
+    def test_mean_error_class(
+        self, preds, target, sk_metric, metric_class, metric_functional, sk_fn, metric_args, ddp, dist_sync_on_step
+    ):
+        # todo: `metric_functional` is unused
+        self.run_class_metric_test(
+            ddp=ddp,
+            preds=preds,
+            target=target,
+            metric_class=metric_class,
+            sk_metric=partial(sk_metric, sk_fn=sk_fn, metric_args=metric_args),
+            dist_sync_on_step=dist_sync_on_step,
+            metric_args=metric_args,
+        )
+
+    def test_mean_error_functional(self, preds, target, sk_metric, metric_class, metric_functional, sk_fn, metric_args):
+        # todo: `metric_class` is unused
+        self.run_functional_metric_test(
+            preds=preds,
+            target=target,
+            metric_functional=metric_functional,
+            sk_metric=partial(sk_metric, sk_fn=sk_fn, metric_args=metric_args),
+            metric_args=metric_args,
+        )
+
+    def test_mean_error_differentiability(
+        self, preds, target, sk_metric, metric_class, metric_functional, sk_fn, metric_args
+    ):
+        self.run_differentiability_test(
+            preds=preds,
+            target=target,
+            metric_module=metric_class,
+            metric_functional=metric_functional,
+            metric_args=metric_args,
+        )
+
+    @pytest.mark.skipif(
+        not _TORCH_GREATER_EQUAL_1_6, reason="half support of core operations on not support before pytorch v1.6"
+    )
+    def test_mean_error_half_cpu(self, preds, target, sk_metric, metric_class, metric_functional, sk_fn, metric_args):
+        if metric_class == MeanSquaredLogError:
+            # MeanSquaredLogError half + cpu does not work due to missing support in B.log
+            pytest.xfail("MeanSquaredLogError metric does not support cpu + half precision")
+
+        if metric_class == MeanAbsolutePercentageError:
+            # MeanSquaredPercentageError half + cpu does not work due to missing support in B.log
+            pytest.xfail("MeanSquaredPercentageError metric does not support cpu + half precision")
+
+        if metric_class == SymmetricMeanAbsolutePercentageError:
+            # MeanSquaredPercentageError half + cpu does not work due to missing support in B.log
+            pytest.xfail("SymmetricMeanAbsolutePercentageError metric does not support cpu + half precision")
+
+        self.run_precision_test_cpu(preds, target, metric_class, metric_functional)
+
+    @pytest.mark.skipif(not B.cuda.is_available(), reason="test requires cuda")
+    def test_mean_error_half_gpu(self, preds, target, sk_metric, metric_class, metric_functional, sk_fn, metric_args):
+        self.run_precision_test_gpu(preds, target, metric_class, metric_functional)
+
+
+@pytest.mark.parametrize(
+    "metric_class", [MeanSquaredError, MeanAbsoluteError, MeanSquaredLogError, MeanAbsolutePercentageError]
+)
+def test_error_on_different_shape(metric_class):
+    metric = metric_class()
+    with pytest.raises(RuntimeError, match="Predictions and targets are expected to have the same shape"):
+        metric(B.randn(100), B.randn(50))
diff --git a/RE/paddlemetric/src/tests/regression/test_pearson.py b/RE/paddlemetric/src/tests/regression/test_pearson.py
new file mode 100644
index 00000000..09e1ac21
--- /dev/null
+++ b/RE/paddlemetric/src/tests/regression/test_pearson.py
@@ -0,0 +1,93 @@
+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from collections import namedtuple
+
+import pytest
+import paddleext.torchapi as B
+from scipy.stats import pearsonr
+
+from tests.helpers import seed_all
+from tests.helpers.testers import BATCH_SIZE, NUM_BATCHES, MetricTester
+from paddlemetrics.functional.regression.pearson import pearson_corrcoef
+from paddlemetrics.regression.pearson import PearsonCorrcoef
+
+seed_all(42)
+
+Input = namedtuple("Input", ["preds", "target"])
+
+_single_target_inputs1 = Input(
+    preds=B.rand(NUM_BATCHES, BATCH_SIZE),
+    target=B.rand(NUM_BATCHES, BATCH_SIZE),
+)
+
+_single_target_inputs2 = Input(
+    preds=B.randn(NUM_BATCHES, BATCH_SIZE),
+    target=B.randn(NUM_BATCHES, BATCH_SIZE),
+)
+
+
+def _sk_pearsonr(preds, target):
+    sk_preds = preds.view(-1).numpy()
+    sk_target = target.view(-1).numpy()
+    return pearsonr(sk_target, sk_preds)[0]
+
+
+@pytest.mark.parametrize(
+    "preds, target",
+    [
+        (_single_target_inputs1.preds, _single_target_inputs1.target),
+        (_single_target_inputs2.preds, _single_target_inputs2.target),
+    ],
+)
+class TestPearsonCorrcoef(MetricTester):
+    atol = 1e-2
+
+    @pytest.mark.parametrize("ddp", [True, False])
+    def test_pearson_corrcoef(self, preds, target, ddp):
+        self.run_class_metric_test(
+            ddp=ddp,
+            preds=preds,
+            target=target,
+            metric_class=PearsonCorrcoef,
+            sk_metric=_sk_pearsonr,
+            dist_sync_on_step=False,
+        )
+
+    def test_pearson_corrcoef_functional(self, preds, target):
+        self.run_functional_metric_test(
+            preds=preds, target=target, metric_functional=pearson_corrcoef, sk_metric=_sk_pearsonr
+        )
+
+    def test_pearson_corrcoef_differentiability(self, preds, target):
+        self.run_differentiability_test(
+            preds=preds, target=target, metric_module=PearsonCorrcoef, metric_functional=pearson_corrcoef
+        )
+
+    # Pearson half + cpu does not work due to missing support in B.sqrt
+    @pytest.mark.xfail(reason="PearsonCorrcoef metric does not support cpu + half precision")
+    def test_pearson_corrcoef_half_cpu(self, preds, target):
+        self.run_precision_test_cpu(preds, target, PearsonCorrcoef, pearson_corrcoef)
+
+    @pytest.mark.skipif(not B.cuda.is_available(), reason="test requires cuda")
+    def test_pearson_corrcoef_half_gpu(self, preds, target):
+        self.run_precision_test_gpu(preds, target, PearsonCorrcoef, pearson_corrcoef)
+
+
+def test_error_on_different_shape():
+    metric = PearsonCorrcoef()
+    with pytest.raises(RuntimeError, match="Predictions and targets are expected to have the same shape"):
+        metric(B.randn(100), B.randn(50))
+
+    with pytest.raises(ValueError, match="Expected both predictions and target to be 1 dimensional tensors."):
+        metric(B.randn(100, 2), B.randn(100, 2))
diff --git a/RE/paddlemetric/src/tests/regression/test_r2.py b/RE/paddlemetric/src/tests/regression/test_r2.py
new file mode 100644
index 00000000..ebed636a
--- /dev/null
+++ b/RE/paddlemetric/src/tests/regression/test_r2.py
@@ -0,0 +1,164 @@
+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from collections import namedtuple
+from functools import partial
+
+import pytest
+import paddleext.torchapi as B
+from sklearn.metrics import r2_score as sk_r2score
+
+from tests.helpers import seed_all
+from tests.helpers.testers import BATCH_SIZE, NUM_BATCHES, MetricTester
+from paddlemetrics.functional import r2_score
+from paddlemetrics.regression import R2Score
+from paddlemetrics.utilities.imports import _TORCH_GREATER_EQUAL_1_6
+
+seed_all(42)
+
+num_targets = 5
+
+Input = namedtuple("Input", ["preds", "target"])
+
+_single_target_inputs = Input(
+    preds=B.rand(NUM_BATCHES, BATCH_SIZE),
+    target=B.rand(NUM_BATCHES, BATCH_SIZE),
+)
+
+_multi_target_inputs = Input(
+    preds=B.rand(NUM_BATCHES, BATCH_SIZE, num_targets),
+    target=B.rand(NUM_BATCHES, BATCH_SIZE, num_targets),
+)
+
+
+def _single_target_sk_metric(preds, target, adjusted, multioutput):
+    sk_preds = preds.view(-1).numpy()
+    sk_target = target.view(-1).numpy()
+    r2_score = sk_r2score(sk_target, sk_preds, multioutput=multioutput)
+    if adjusted != 0:
+        r2_score = 1 - (1 - r2_score) * (sk_preds.shape[0] - 1) / (sk_preds.shape[0] - adjusted - 1)
+    return r2_score
+
+
+def _multi_target_sk_metric(preds, target, adjusted, multioutput):
+    sk_preds = preds.view(-1, num_targets).numpy()
+    sk_target = target.view(-1, num_targets).numpy()
+    r2_score = sk_r2score(sk_target, sk_preds, multioutput=multioutput)
+    if adjusted != 0:
+        r2_score = 1 - (1 - r2_score) * (sk_preds.shape[0] - 1) / (sk_preds.shape[0] - adjusted - 1)
+    return r2_score
+
+
+@pytest.mark.parametrize("adjusted", [0, 5, 10])
+@pytest.mark.parametrize("multioutput", ["raw_values", "uniform_average", "variance_weighted"])
+@pytest.mark.parametrize(
+    "preds, target, sk_metric, num_outputs",
+    [
+        (_single_target_inputs.preds, _single_target_inputs.target, _single_target_sk_metric, 1),
+        (_multi_target_inputs.preds, _multi_target_inputs.target, _multi_target_sk_metric, num_targets),
+    ],
+)
+class TestR2Score(MetricTester):
+    @pytest.mark.parametrize("ddp", [True, False])
+    @pytest.mark.parametrize("dist_sync_on_step", [True, False])
+    def test_r2(self, adjusted, multioutput, preds, target, sk_metric, num_outputs, ddp, dist_sync_on_step):
+        self.run_class_metric_test(
+            ddp,
+            preds,
+            target,
+            R2Score,
+            partial(sk_metric, adjusted=adjusted, multioutput=multioutput),
+            dist_sync_on_step,
+            metric_args=dict(adjusted=adjusted, multioutput=multioutput, num_outputs=num_outputs),
+        )
+
+    def test_r2_functional(self, adjusted, multioutput, preds, target, sk_metric, num_outputs):
+        # todo: `num_outputs` is unused
+        self.run_functional_metric_test(
+            preds,
+            target,
+            r2_score,
+            partial(sk_metric, adjusted=adjusted, multioutput=multioutput),
+            metric_args=dict(adjusted=adjusted, multioutput=multioutput),
+        )
+
+    def test_r2_differentiability(self, adjusted, multioutput, preds, target, sk_metric, num_outputs):
+        self.run_differentiability_test(
+            preds=preds,
+            target=target,
+            metric_module=partial(R2Score, num_outputs=num_outputs),
+            metric_functional=r2_score,
+            metric_args=dict(adjusted=adjusted, multioutput=multioutput),
+        )
+
+    @pytest.mark.skipif(
+        not _TORCH_GREATER_EQUAL_1_6, reason="half support of core operations on not support before pytorch v1.6"
+    )
+    def test_r2_half_cpu(self, adjusted, multioutput, preds, target, sk_metric, num_outputs):
+        self.run_precision_test_cpu(
+            preds,
+            target,
+            partial(R2Score, num_outputs=num_outputs),
+            r2_score,
+            {"adjusted": adjusted, "multioutput": multioutput},
+        )
+
+    @pytest.mark.skipif(not B.cuda.is_available(), reason="test requires cuda")
+    def test_r2_half_gpu(self, adjusted, multioutput, preds, target, sk_metric, num_outputs):
+        self.run_precision_test_gpu(
+            preds,
+            target,
+            partial(R2Score, num_outputs=num_outputs),
+            r2_score,
+            {"adjusted": adjusted, "multioutput": multioutput},
+        )
+
+
+def test_error_on_different_shape(metric_class=R2Score):
+    metric = metric_class()
+    with pytest.raises(RuntimeError, match="Predictions and targets are expected to have the same shape"):
+        metric(B.randn(100), B.randn(50))
+
+
+def test_error_on_multidim_tensors(metric_class=R2Score):
+    metric = metric_class()
+    with pytest.raises(
+        ValueError,
+        match=r"Expected both prediction and target to be 1D or 2D tensors," r" but received tensors with dimension .",
+    ):
+        metric(B.randn(10, 20, 5), B.randn(10, 20, 5))
+
+
+def test_error_on_too_few_samples(metric_class=R2Score):
+    metric = metric_class()
+    with pytest.raises(ValueError, match="Needs at least two samples to calculate r2 score."):
+        metric(B.randn(1), B.randn(1))
+    metric.reset()
+
+    # calling update twice should still work
+    metric.update(B.randn(1), B.randn(1))
+    metric.update(B.randn(1), B.randn(1))
+    assert metric.compute()
+
+
+def test_warning_on_too_large_adjusted(metric_class=R2Score):
+    metric = metric_class(adjusted=10)
+
+    with pytest.warns(
+        UserWarning,
+        match="More independent regressions than data points in" " adjusted r2 score. Falls back to standard r2 score.",
+    ):
+        metric(B.randn(10), B.randn(10))
+
+    with pytest.warns(UserWarning, match="Division by zero in adjusted r2 score. Falls back to" " standard r2 score."):
+        metric(B.randn(11), B.randn(11))
diff --git a/RE/paddlemetric/src/tests/regression/test_spearman.py b/RE/paddlemetric/src/tests/regression/test_spearman.py
new file mode 100644
index 00000000..a3764fde
--- /dev/null
+++ b/RE/paddlemetric/src/tests/regression/test_spearman.py
@@ -0,0 +1,115 @@
+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from collections import namedtuple
+
+import pytest
+import paddleext.torchapi as B
+from scipy.stats import rankdata, spearmanr
+
+from tests.helpers import seed_all
+from tests.helpers.testers import BATCH_SIZE, NUM_BATCHES, MetricTester
+from paddlemetrics.functional.regression.spearman import _rank_data, spearman_corrcoef
+from paddlemetrics.regression.spearman import SpearmanCorrcoef
+
+seed_all(42)
+
+Input = namedtuple("Input", ["preds", "target"])
+
+_single_target_inputs1 = Input(
+    preds=B.rand(NUM_BATCHES, BATCH_SIZE),
+    target=B.rand(NUM_BATCHES, BATCH_SIZE),
+)
+
+_single_target_inputs2 = Input(
+    preds=B.randn(NUM_BATCHES, BATCH_SIZE),
+    target=B.randn(NUM_BATCHES, BATCH_SIZE),
+)
+
+_specific_input = Input(
+    preds=B.stack([B.tensor([1.0, 0.0, 4.0, 1.0, 0.0, 3.0, 0.0]) for _ in range(NUM_BATCHES)]),
+    target=B.stack([B.tensor([4.0, 0.0, 3.0, 3.0, 3.0, 1.0, 1.0]) for _ in range(NUM_BATCHES)]),
+)
+
+
+@pytest.mark.parametrize(
+    "preds, target",
+    [
+        (_single_target_inputs1.preds, _single_target_inputs1.target),
+        (_single_target_inputs2.preds, _single_target_inputs2.target),
+        (_specific_input.preds, _specific_input.target),
+    ],
+)
+def test_ranking(preds, target):
+    """test that ranking function works as expected."""
+    for p, t in zip(preds, target):
+        scipy_ranking = [rankdata(p.numpy()), rankdata(t.numpy())]
+        tm_ranking = [_rank_data(p), _rank_data(t)]
+        assert (B.tensor(scipy_ranking[0]) == tm_ranking[0]).all()
+        assert (B.tensor(scipy_ranking[1]) == tm_ranking[1]).all()
+
+
+def _sk_metric(preds, target):
+    sk_preds = preds.view(-1).numpy()
+    sk_target = target.view(-1).numpy()
+    return spearmanr(sk_target, sk_preds)[0]
+
+
+@pytest.mark.parametrize(
+    "preds, target",
+    [
+        (_single_target_inputs1.preds, _single_target_inputs1.target),
+        (_single_target_inputs2.preds, _single_target_inputs2.target),
+        (_specific_input.preds, _specific_input.target),
+    ],
+)
+class TestSpearmanCorrcoef(MetricTester):
+    atol = 1e-2
+
+    @pytest.mark.parametrize("ddp", [True, False])
+    @pytest.mark.parametrize("dist_sync_on_step", [True, False])
+    def test_spearman_corrcoef(self, preds, target, ddp, dist_sync_on_step):
+        self.run_class_metric_test(
+            ddp,
+            preds,
+            target,
+            SpearmanCorrcoef,
+            _sk_metric,
+            dist_sync_on_step,
+        )
+
+    def test_spearman_corrcoef_functional(self, preds, target):
+        self.run_functional_metric_test(preds, target, spearman_corrcoef, _sk_metric)
+
+    def test_spearman_corrcoef_differentiability(self, preds, target):
+        self.run_differentiability_test(
+            preds=preds, target=target, metric_module=SpearmanCorrcoef, metric_functional=spearman_corrcoef
+        )
+
+    # Spearman half + cpu does not work due to missing support in B.arange
+    @pytest.mark.xfail(reason="Spearman metric does not support cpu + half precision")
+    def test_spearman_corrcoef_half_cpu(self, preds, target):
+        self.run_precision_test_cpu(preds, target, SpearmanCorrcoef, spearman_corrcoef)
+
+    @pytest.mark.skipif(not B.cuda.is_available(), reason="test requires cuda")
+    def test_spearman_corrcoef_half_gpu(self, preds, target):
+        self.run_precision_test_gpu(preds, target, SpearmanCorrcoef, spearman_corrcoef)
+
+
+def test_error_on_different_shape():
+    metric = SpearmanCorrcoef()
+    with pytest.raises(RuntimeError, match="Predictions and targets are expected to have the same shape"):
+        metric(B.randn(100), B.randn(50))
+
+    with pytest.raises(ValueError, match="Expected both predictions and target to be 1 dimensional tensors."):
+        metric(B.randn(100, 2), B.randn(100, 2))
diff --git a/RE/paddlemetric/src/tests/regression/test_tweedie_deviance.py b/RE/paddlemetric/src/tests/regression/test_tweedie_deviance.py
new file mode 100644
index 00000000..af130313
--- /dev/null
+++ b/RE/paddlemetric/src/tests/regression/test_tweedie_deviance.py
@@ -0,0 +1,140 @@
+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from collections import namedtuple
+from functools import partial
+
+import pytest
+import paddleext.torchapi as B
+from sklearn.metrics import mean_tweedie_deviance
+from paddleext.torchapi import Tensor
+
+from tests.helpers import seed_all
+from tests.helpers.testers import BATCH_SIZE, NUM_BATCHES, MetricTester
+from paddlemetrics.functional.regression.tweedie_deviance import tweedie_deviance_score
+from paddlemetrics.regression.tweedie_deviance import TweedieDevianceScore
+
+seed_all(42)
+
+Input = namedtuple("Input", ["preds", "targets"])
+
+_single_target_inputs1 = Input(
+    preds=B.rand(NUM_BATCHES, BATCH_SIZE),
+    targets=B.rand(NUM_BATCHES, BATCH_SIZE),
+)
+
+_single_target_inputs2 = Input(
+    preds=B.rand(NUM_BATCHES, BATCH_SIZE),
+    targets=B.rand(NUM_BATCHES, BATCH_SIZE),
+)
+
+_multi_target_inputs = Input(
+    preds=B.rand(NUM_BATCHES, BATCH_SIZE, 5),
+    targets=B.rand(NUM_BATCHES, BATCH_SIZE, 5),
+)
+
+
+def _sk_deviance(preds: Tensor, targets: Tensor, power: float):
+    sk_preds = preds.view(-1).numpy()
+    sk_target = targets.view(-1).numpy()
+    return mean_tweedie_deviance(sk_target, sk_preds, power=power)
+
+
+@pytest.mark.parametrize("power", [-0.5, 0, 1, 1.5, 2, 3])
+@pytest.mark.parametrize(
+    "preds, targets",
+    [
+        (_single_target_inputs1.preds, _single_target_inputs1.targets),
+        (_single_target_inputs2.preds, _single_target_inputs2.targets),
+        (_multi_target_inputs.preds, _multi_target_inputs.targets),
+    ],
+)
+class TestDevianceScore(MetricTester):
+    @pytest.mark.parametrize("ddp", [True, False])
+    @pytest.mark.parametrize("dist_sync_on_step", [True, False])
+    def test_deviance_scores_class(self, ddp, dist_sync_on_step, preds, targets, power):
+        self.run_class_metric_test(
+            ddp,
+            preds,
+            targets,
+            TweedieDevianceScore,
+            partial(_sk_deviance, power=power),
+            dist_sync_on_step,
+            metric_args=dict(power=power),
+        )
+
+    def test_deviance_scores_functional(self, preds, targets, power):
+        self.run_functional_metric_test(
+            preds,
+            targets,
+            tweedie_deviance_score,
+            partial(_sk_deviance, power=power),
+            metric_args=dict(power=power),
+        )
+
+    def test_pearson_corrcoef_differentiability(self, preds, targets, power):
+        self.run_differentiability_test(
+            preds, targets, metric_module=TweedieDevianceScore, metric_functional=tweedie_deviance_score
+        )
+
+    # Tweedie Deviance Score half + cpu does not work due to missing support in B.log
+    @pytest.mark.xfail(reason="TweedieDevianceScore metric does not support cpu + half precision")
+    def test_pearson_corrcoef_half_cpu(self, preds, targets, power):
+        metric_args = {"power": power}
+        self.run_precision_test_cpu(
+            preds,
+            targets,
+            metric_module=TweedieDevianceScore,
+            metric_functional=tweedie_deviance_score,
+            metric_args=metric_args,
+        )
+
+    @pytest.mark.skipif(not B.cuda.is_available(), reason="test requires cuda")
+    def test_pearson_corrcoef_half_gpu(self, preds, targets, power):
+        metric_args = {"power": power}
+        self.run_precision_test_gpu(
+            preds,
+            targets,
+            metric_module=TweedieDevianceScore,
+            metric_functional=tweedie_deviance_score,
+            metric_args=metric_args,
+        )
+
+
+def test_error_on_different_shape(metric_class=TweedieDevianceScore):
+    metric = metric_class()
+    with pytest.raises(RuntimeError, match="Predictions and targets are expected to have the same shape"):
+        metric(B.randn(100), B.randn(50))
+
+
+def test_error_on_invalid_inputs(metric_class=TweedieDevianceScore):
+    with pytest.raises(ValueError, match="Deviance Score is not defined for power=0.5."):
+        metric_class(power=0.5)
+
+    metric = metric_class(power=1)
+    with pytest.raises(
+        ValueError, match="For power=1, 'preds' has to be strictly positive and 'targets' cannot be negative."
+    ):
+        metric(B.tensor([-1.0, 2.0, 3.0]), B.rand(3))
+
+    with pytest.raises(
+        ValueError, match="For power=1, 'preds' has to be strictly positive and 'targets' cannot be negative."
+    ):
+        metric(B.rand(3), B.tensor([-1.0, 2.0, 3.0]))
+
+    metric = metric_class(power=2)
+    with pytest.raises(ValueError, match="For power=2, both 'preds' and 'targets' have to be strictly positive."):
+        metric(B.tensor([-1.0, 2.0, 3.0]), B.rand(3))
+
+    with pytest.raises(ValueError, match="For power=2, both 'preds' and 'targets' have to be strictly positive."):
+        metric(B.rand(3), B.tensor([-1.0, 2.0, 3.0]))
diff --git a/RE/paddlemetric/src/tests/retrieval/__init__.py b/RE/paddlemetric/src/tests/retrieval/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/RE/paddlemetric/src/tests/retrieval/helpers.py b/RE/paddlemetric/src/tests/retrieval/helpers.py
new file mode 100644
index 00000000..419fe19b
--- /dev/null
+++ b/RE/paddlemetric/src/tests/retrieval/helpers.py
@@ -0,0 +1,511 @@
+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from functools import partial
+from typing import Callable, Dict, List, Tuple, Type, Union
+
+import numpy as np
+import pytest
+import paddleext.torchapi as B
+from numpy import array
+from paddleext.torchapi import Tensor, tensor
+
+from tests.helpers import seed_all
+from tests.helpers.testers import Metric, MetricTester
+from tests.retrieval.inputs import _input_retrieval_scores as _irs
+from tests.retrieval.inputs import _input_retrieval_scores_all_target as _irs_all
+from tests.retrieval.inputs import _input_retrieval_scores_empty as _irs_empty
+from tests.retrieval.inputs import _input_retrieval_scores_extra as _irs_extra
+from tests.retrieval.inputs import _input_retrieval_scores_float_target as _irs_float_tgt
+from tests.retrieval.inputs import _input_retrieval_scores_int_target as _irs_int_tgt
+from tests.retrieval.inputs import _input_retrieval_scores_mismatching_sizes as _irs_mis_sz
+from tests.retrieval.inputs import _input_retrieval_scores_mismatching_sizes_func as _irs_mis_sz_fn
+from tests.retrieval.inputs import _input_retrieval_scores_no_target as _irs_no_tgt
+from tests.retrieval.inputs import _input_retrieval_scores_wrong_targets as _irs_bad_tgt
+
+seed_all(42)
+
+# a version of get_group_indexes that depends on NumPy is here to avoid this dependency for the full library
+
+
+def get_group_indexes(indexes: Union[Tensor, np.ndarray]) -> List[Union[Tensor, np.ndarray]]:
+    """Given an integer `B.Tensor` or `np.ndarray` `indexes`, return a `B.Tensor` or `np.ndarray` of
+    indexes for each different value in `indexes`.
+
+    Args:
+        indexes: a `B.Tensor` or `np.ndarray` of integers
+
+    Return:
+        A list of integer `B.Tensor`s or `np.ndarray`s
+
+    Example:
+        >>> indexes = B.tensor([0, 0, 0, 1, 1, 1, 1])
+        >>> get_group_indexes(indexes)
+        [tensor([0, 1, 2]), tensor([3, 4, 5, 6])]
+    """
+    structure, dtype = (tensor, B.long) if isinstance(indexes, Tensor) else (np.array, np.int64)
+
+    res = {}
+    for i, _id in enumerate(indexes):
+        _id = _id.item()
+        if _id in res:
+            res[_id] += [i]
+        else:
+            res[_id] = [i]
+
+    return [structure(x, dtype=dtype) for x in res.values()]
+
+
+def _compute_sklearn_metric(
+    preds: Union[Tensor, array],
+    target: Union[Tensor, array],
+    indexes: np.ndarray = None,
+    metric: Callable = None,
+    empty_target_action: str = "skip",
+    reverse: bool = False,
+    **kwargs,
+) -> Tensor:
+    """Compute metric with multiple iterations over every query predictions set."""
+
+    if indexes is None:
+        indexes = np.full_like(preds, fill_value=0, dtype=np.int64)
+    if isinstance(indexes, Tensor):
+        indexes = indexes.cpu().numpy()
+    if isinstance(preds, Tensor):
+        preds = preds.cpu().numpy()
+    if isinstance(target, Tensor):
+        target = target.cpu().numpy()
+
+    assert isinstance(indexes, np.ndarray)
+    assert isinstance(preds, np.ndarray)
+    assert isinstance(target, np.ndarray)
+
+    indexes = indexes.flatten()
+    preds = preds.flatten()
+    target = target.flatten()
+    groups = get_group_indexes(indexes)
+
+    sk_results = []
+    for group in groups:
+        trg, pds = target[group], preds[group]
+
+        if ((1 - trg) if reverse else trg).sum() == 0:
+            if empty_target_action == "skip":
+                pass
+            elif empty_target_action == "pos":
+                sk_results.append(1.0)
+            else:
+                sk_results.append(0.0)
+        else:
+            res = metric(trg, pds, **kwargs)
+            sk_results.append(res)
+
+    if len(sk_results) > 0:
+        return np.mean(sk_results)
+    return np.array(0.0)
+
+
+def _concat_tests(*tests: Tuple[Dict]) -> Dict:
+    """Concat tests composed by a string and a list of arguments."""
+    assert len(tests), "`_concat_tests` expects at least an argument"
+    assert all(tests[0]["argnames"] == x["argnames"] for x in tests[1:]), "the header must be the same for all tests"
+    return dict(argnames=tests[0]["argnames"], argvalues=sum((x["argvalues"] for x in tests), []))
+
+
+_errors_test_functional_metric_parameters_default = dict(
+    argnames="preds,target,message,metric_args",
+    argvalues=[
+        # check input shapes are consistent (func)
+        (_irs_mis_sz_fn.preds, _irs_mis_sz_fn.target, "`preds` and `target` must be of the same shape", {}),
+        # check input tensors are not empty
+        (_irs_empty.preds, _irs_empty.target, "`preds` and `target` must be non-empty and non-scalar tensors", {}),
+        # check on input dtypes
+        (_irs.preds.bool(), _irs.target, "`preds` must be a tensor of floats", {}),
+        # check targets are between 0 and 1
+        (_irs_bad_tgt.preds, _irs_bad_tgt.target, "`target` must contain `binary` values", {}),
+    ],
+)
+
+_errors_test_functional_metric_parameters_with_nonbinary = dict(
+    argnames="preds,target,message,metric_args",
+    argvalues=[
+        # check input shapes are consistent (func)
+        (_irs_mis_sz_fn.preds, _irs_mis_sz_fn.target, "`preds` and `target` must be of the same shape", {}),
+        # check input tensors are not empty
+        (_irs_empty.preds, _irs_empty.target, "`preds` and `target` must be non-empty and non-scalar tensors", {}),
+        # check on input dtypes
+        (_irs.preds.bool(), _irs.target, "`preds` must be a tensor of floats", {}),
+    ],
+)
+
+_errors_test_functional_metric_parameters_k = dict(
+    argnames="preds,target,message,metric_args",
+    argvalues=[
+        (_irs.preds, _irs.target, "`k` has to be a positive integer or None", dict(k=-10)),
+        (_irs.preds, _irs.target, "`k` has to be a positive integer or None", dict(k=4.0)),
+    ],
+)
+
+_errors_test_class_metric_parameters_no_pos_target = dict(
+    argnames="indexes,preds,target,message,metric_args",
+    argvalues=[
+        # check when error when there are no positive targets
+        (
+            _irs_no_tgt.indexes,
+            _irs_no_tgt.preds,
+            _irs_no_tgt.target,
+            "`compute` method was provided with a query with no positive target.",
+            dict(empty_target_action="error"),
+        ),
+    ],
+)
+
+_errors_test_class_metric_parameters_no_neg_target = dict(
+    argnames="indexes,preds,target,message,metric_args",
+    argvalues=[
+        # check when error when there are no negative targets
+        (
+            _irs_all.indexes,
+            _irs_all.preds,
+            _irs_all.target,
+            "`compute` method was provided with a query with no negative target.",
+            dict(empty_target_action="error"),
+        ),
+    ],
+)
+
+_errors_test_class_metric_parameters_with_nonbinary = dict(
+    argnames="indexes,preds,target,message,metric_args",
+    argvalues=[
+        (None, _irs.preds, _irs.target, "`indexes` cannot be None", dict(empty_target_action="error")),
+        # check when input arguments are invalid
+        (
+            _irs.indexes,
+            _irs.preds,
+            _irs.target,
+            "`empty_target_action` received a wrong value `casual_argument`.",
+            dict(empty_target_action="casual_argument"),
+        ),
+        # check input shapes are consistent
+        (
+            _irs_mis_sz.indexes,
+            _irs_mis_sz.preds,
+            _irs_mis_sz.target,
+            "`indexes`, `preds` and `target` must be of the same shape",
+            dict(empty_target_action="skip"),
+        ),
+        # check input tensors are not empty
+        (
+            _irs_empty.indexes,
+            _irs_empty.preds,
+            _irs_empty.target,
+            "`indexes`, `preds` and `target` must be non-empty and non-scalar tensors",
+            dict(empty_target_action="skip"),
+        ),
+        # check on input dtypes
+        (
+            _irs.indexes.bool(),
+            _irs.preds,
+            _irs.target,
+            "`indexes` must be a tensor of long integers",
+            dict(empty_target_action="skip"),
+        ),
+        (
+            _irs.indexes,
+            _irs.preds.bool(),
+            _irs.target,
+            "`preds` must be a tensor of floats",
+            dict(empty_target_action="skip"),
+        ),
+    ],
+)
+
+_errors_test_class_metric_parameters_default = dict(
+    argnames="indexes,preds,target,message,metric_args",
+    argvalues=[
+        (None, _irs.preds, _irs.target, "`indexes` cannot be None", dict(empty_target_action="error")),
+        # check when input arguments are invalid
+        (
+            _irs.indexes,
+            _irs.preds,
+            _irs.target,
+            "`empty_target_action` received a wrong value `casual_argument`.",
+            dict(empty_target_action="casual_argument"),
+        ),
+        # check input shapes are consistent
+        (
+            _irs_mis_sz.indexes,
+            _irs_mis_sz.preds,
+            _irs_mis_sz.target,
+            "`indexes`, `preds` and `target` must be of the same shape",
+            dict(empty_target_action="skip"),
+        ),
+        # check input tensors are not empty
+        (
+            _irs_empty.indexes,
+            _irs_empty.preds,
+            _irs_empty.target,
+            "`indexes`, `preds` and `target` must be non-empty and non-scalar tensors",
+            dict(empty_target_action="skip"),
+        ),
+        # check on input dtypes
+        (
+            _irs.indexes.bool(),
+            _irs.preds,
+            _irs.target,
+            "`indexes` must be a tensor of long integers",
+            dict(empty_target_action="skip"),
+        ),
+        (
+            _irs.indexes,
+            _irs.preds.bool(),
+            _irs.target,
+            "`preds` must be a tensor of floats",
+            dict(empty_target_action="skip"),
+        ),
+    ],
+)
+
+_errors_test_class_metric_parameters_k = dict(
+    argnames="indexes,preds,target,message,metric_args",
+    argvalues=[
+        (_irs.index, _irs.preds, _irs.target, "`k` has to be a positive integer or None", dict(k=-10)),
+    ],
+)
+
+_default_metric_class_input_arguments = dict(
+    argnames="indexes,preds,target",
+    argvalues=[
+        (_irs.indexes, _irs.preds, _irs.target),
+        (_irs_extra.indexes, _irs_extra.preds, _irs_extra.target),
+        (_irs_no_tgt.indexes, _irs_no_tgt.preds, _irs_no_tgt.target),
+    ],
+)
+
+_default_metric_class_input_arguments_with_non_binary_target = dict(
+    argnames="indexes,preds,target",
+    argvalues=[
+        (_irs.indexes, _irs.preds, _irs.target),
+        (_irs_extra.indexes, _irs_extra.preds, _irs_extra.target),
+        (_irs_no_tgt.indexes, _irs_no_tgt.preds, _irs_no_tgt.target),
+        (_irs_int_tgt.indexes, _irs_int_tgt.preds, _irs_int_tgt.target),
+        (_irs_float_tgt.indexes, _irs_float_tgt.preds, _irs_float_tgt.target),
+    ],
+)
+
+_default_metric_functional_input_arguments = dict(
+    argnames="preds,target",
+    argvalues=[
+        (_irs.preds, _irs.target),
+        (_irs_extra.preds, _irs_extra.target),
+        (_irs_no_tgt.preds, _irs_no_tgt.target),
+    ],
+)
+
+_default_metric_functional_input_arguments_with_non_binary_target = dict(
+    argnames="preds,target",
+    argvalues=[
+        (_irs.preds, _irs.target),
+        (_irs_extra.preds, _irs_extra.target),
+        (_irs_no_tgt.preds, _irs_no_tgt.target),
+        (_irs_int_tgt.preds, _irs_int_tgt.target),
+        (_irs_float_tgt.preds, _irs_float_tgt.target),
+    ],
+)
+
+
+def _errors_test_class_metric(
+    indexes: Tensor,
+    preds: Tensor,
+    target: Tensor,
+    metric_class: Metric,
+    message: str = "",
+    metric_args: dict = None,
+    exception_type: Type[Exception] = ValueError,
+    kwargs_update: dict = None,
+):
+    """Utility function doing checks about types, parameters and errors.
+
+    Args:
+        indexes: torch tensor with indexes
+        preds: torch tensor with predictions
+        target: torch tensor with targets
+        metric_class: lightning metric class that should be tested
+        message: message that exception should return
+        metric_args: arguments for class initialization
+        exception_type: callable function that is used for comparison
+        kwargs_update: Additional keyword arguments that will be passed with indexes, preds and
+            target when running update on the metric.
+    """
+    metric_args = metric_args or {}
+    kwargs_update = kwargs_update or {}
+    with pytest.raises(exception_type, match=message):
+        metric = metric_class(**metric_args)
+        metric(preds, target, indexes=indexes, **kwargs_update)
+
+
+def _errors_test_functional_metric(
+    preds: Tensor,
+    target: Tensor,
+    metric_functional: Metric,
+    message: str = "",
+    exception_type: Type[Exception] = ValueError,
+    kwargs_update: dict = None,
+):
+    """Utility function doing checks about types, parameters and errors.
+
+    Args:
+        preds: torch tensor with predictions
+        target: torch tensor with targets
+        metric_functional: lightning functional metric that should be tested
+        message: message that exception should return
+        exception_type: callable function that is used for comparison
+        kwargs_update: Additional keyword arguments that will be passed with indexes, preds and
+            target when running update on the metric.
+    """
+    kwargs_update = kwargs_update or {}
+    with pytest.raises(exception_type, match=message):
+        metric_functional(preds, target, **kwargs_update)
+
+
+class RetrievalMetricTester(MetricTester):
+    def run_class_metric_test(
+        self,
+        ddp: bool,
+        indexes: Tensor,
+        preds: Tensor,
+        target: Tensor,
+        metric_class: Metric,
+        sk_metric: Callable,
+        dist_sync_on_step: bool,
+        metric_args: dict,
+        reverse: bool = False,
+    ):
+        _sk_metric_adapted = partial(_compute_sklearn_metric, metric=sk_metric, reverse=reverse, **metric_args)
+
+        super().run_class_metric_test(
+            ddp=ddp,
+            preds=preds,
+            target=target,
+            metric_class=metric_class,
+            sk_metric=_sk_metric_adapted,
+            dist_sync_on_step=dist_sync_on_step,
+            metric_args=metric_args,
+            fragment_kwargs=True,
+            indexes=indexes,  # every additional argument will be passed to metric_class and _sk_metric_adapted
+        )
+
+    def run_functional_metric_test(
+        self,
+        preds: Tensor,
+        target: Tensor,
+        metric_functional: Callable,
+        sk_metric: Callable,
+        metric_args: dict,
+        reverse: bool = False,
+        **kwargs,
+    ):
+        _sk_metric_adapted = partial(_compute_sklearn_metric, metric=sk_metric, reverse=reverse, **metric_args)
+
+        super().run_functional_metric_test(
+            preds=preds,
+            target=target,
+            metric_functional=metric_functional,
+            sk_metric=_sk_metric_adapted,
+            metric_args=metric_args,
+            fragment_kwargs=True,
+            **kwargs,
+        )
+
+    def run_precision_test_cpu(
+        self,
+        indexes: Tensor,
+        preds: Tensor,
+        target: Tensor,
+        metric_module: Metric,
+        metric_functional: Callable,
+    ):
+        def metric_functional_ignore_indexes(preds, target, indexes):
+            return metric_functional(preds, target)
+
+        super().run_precision_test_cpu(
+            preds=preds,
+            target=target,
+            metric_module=metric_module,
+            metric_functional=metric_functional_ignore_indexes,
+            metric_args={"empty_target_action": "neg"},
+            indexes=indexes,  # every additional argument will be passed to RetrievalMAP and _sk_metric_adapted
+        )
+
+    def run_precision_test_gpu(
+        self,
+        indexes: Tensor,
+        preds: Tensor,
+        target: Tensor,
+        metric_module: Metric,
+        metric_functional: Callable,
+    ):
+        if not B.cuda.is_available():
+            pytest.skip()
+
+        def metric_functional_ignore_indexes(preds, target, indexes):
+            return metric_functional(preds, target)
+
+        super().run_precision_test_gpu(
+            preds=preds,
+            target=target,
+            metric_module=metric_module,
+            metric_functional=metric_functional_ignore_indexes,
+            metric_args={"empty_target_action": "neg"},
+            indexes=indexes,  # every additional argument will be passed to RetrievalMAP and _sk_metric_adapted
+        )
+
+    @staticmethod
+    def run_metric_class_arguments_test(
+        indexes: Tensor,
+        preds: Tensor,
+        target: Tensor,
+        metric_class: Metric,
+        message: str = "",
+        metric_args: dict = None,
+        exception_type: Type[Exception] = ValueError,
+        kwargs_update: dict = None,
+    ):
+        _errors_test_class_metric(
+            indexes=indexes,
+            preds=preds,
+            target=target,
+            metric_class=metric_class,
+            message=message,
+            metric_args=metric_args,
+            exception_type=exception_type,
+            **kwargs_update,
+        )
+
+    @staticmethod
+    def run_functional_metric_arguments_test(
+        preds: Tensor,
+        target: Tensor,
+        metric_functional: Callable,
+        message: str = "",
+        exception_type: Type[Exception] = ValueError,
+        kwargs_update: dict = None,
+    ):
+        _errors_test_functional_metric(
+            preds=preds,
+            target=target,
+            metric_functional=metric_functional,
+            message=message,
+            exception_type=exception_type,
+            kwargs_update=kwargs_update,
+        )
diff --git a/RE/paddlemetric/src/tests/retrieval/inputs.py b/RE/paddlemetric/src/tests/retrieval/inputs.py
new file mode 100644
index 00000000..d1e40b81
--- /dev/null
+++ b/RE/paddlemetric/src/tests/retrieval/inputs.py
@@ -0,0 +1,82 @@
+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from collections import namedtuple
+
+import paddleext.torchapi as B
+
+from tests.helpers.testers import BATCH_SIZE, EXTRA_DIM, NUM_BATCHES
+
+Input = namedtuple("InputMultiple", ["indexes", "preds", "target"])
+
+# correct
+_input_retrieval_scores = Input(
+    indexes=B.randint(high=10, size=(NUM_BATCHES, BATCH_SIZE)),
+    preds=B.rand(NUM_BATCHES, BATCH_SIZE),
+    target=B.randint(high=2, size=(NUM_BATCHES, BATCH_SIZE)),
+)
+
+_input_retrieval_scores_extra = Input(
+    indexes=B.randint(high=10, size=(NUM_BATCHES, BATCH_SIZE, EXTRA_DIM)),
+    preds=B.rand(NUM_BATCHES, BATCH_SIZE, EXTRA_DIM),
+    target=B.randint(high=2, size=(NUM_BATCHES, BATCH_SIZE, EXTRA_DIM)),
+)
+
+_input_retrieval_scores_int_target = Input(
+    indexes=B.randint(high=10, size=(NUM_BATCHES, 2 * BATCH_SIZE)),
+    preds=B.rand(NUM_BATCHES, 2 * BATCH_SIZE),
+    target=B.randint(low=-1, high=4, size=(NUM_BATCHES, 2 * BATCH_SIZE)),
+)
+
+_input_retrieval_scores_float_target = Input(
+    indexes=B.randint(high=10, size=(NUM_BATCHES, 2 * BATCH_SIZE)),
+    preds=B.rand(NUM_BATCHES, 2 * BATCH_SIZE),
+    target=B.rand(NUM_BATCHES, 2 * BATCH_SIZE),
+)
+
+# with errors
+_input_retrieval_scores_no_target = Input(
+    indexes=B.randint(high=10, size=(NUM_BATCHES, BATCH_SIZE)),
+    preds=B.rand(NUM_BATCHES, BATCH_SIZE),
+    target=B.randint(high=1, size=(NUM_BATCHES, BATCH_SIZE)),
+)
+
+_input_retrieval_scores_all_target = Input(
+    indexes=B.randint(high=10, size=(NUM_BATCHES, BATCH_SIZE)),
+    preds=B.rand(NUM_BATCHES, BATCH_SIZE),
+    target=B.randint(low=1, high=2, size=(NUM_BATCHES, BATCH_SIZE)),
+)
+
+_input_retrieval_scores_empty = Input(
+    indexes=B.randint(high=10, size=[0]),
+    preds=B.rand(0),
+    target=B.randint(high=2, size=[0]),
+)
+
+_input_retrieval_scores_mismatching_sizes = Input(
+    indexes=B.randint(high=10, size=(NUM_BATCHES, BATCH_SIZE - 2)),
+    preds=B.rand(NUM_BATCHES, BATCH_SIZE),
+    target=B.randint(high=2, size=(NUM_BATCHES, BATCH_SIZE)),
+)
+
+_input_retrieval_scores_mismatching_sizes_func = Input(
+    indexes=B.randint(high=10, size=(NUM_BATCHES, BATCH_SIZE)),
+    preds=B.rand(NUM_BATCHES, BATCH_SIZE - 2),
+    target=B.randint(high=2, size=(NUM_BATCHES, BATCH_SIZE)),
+)
+
+_input_retrieval_scores_wrong_targets = Input(
+    indexes=B.randint(high=10, size=(NUM_BATCHES, BATCH_SIZE)),
+    preds=B.rand(NUM_BATCHES, BATCH_SIZE),
+    target=B.randint(low=-(2 ** 31), high=2 ** 31, size=(NUM_BATCHES, BATCH_SIZE)),
+)
diff --git a/RE/paddlemetric/src/tests/retrieval/test_fallout.py b/RE/paddlemetric/src/tests/retrieval/test_fallout.py
new file mode 100644
index 00000000..e69ddd59
--- /dev/null
+++ b/RE/paddlemetric/src/tests/retrieval/test_fallout.py
@@ -0,0 +1,152 @@
+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import numpy as np
+import pytest
+from paddleext.torchapi import Tensor
+
+from tests.helpers import seed_all
+from tests.retrieval.helpers import (
+    RetrievalMetricTester,
+    _concat_tests,
+    _default_metric_class_input_arguments,
+    _default_metric_functional_input_arguments,
+    _errors_test_class_metric_parameters_default,
+    _errors_test_class_metric_parameters_k,
+    _errors_test_class_metric_parameters_no_neg_target,
+    _errors_test_functional_metric_parameters_default,
+    _errors_test_functional_metric_parameters_k,
+)
+from paddlemetrics.functional.retrieval.fall_out import retrieval_fall_out
+from paddlemetrics.retrieval.retrieval_fallout import RetrievalFallOut
+
+seed_all(42)
+
+
+def _fallout_at_k(target: np.ndarray, preds: np.ndarray, k: int = None):
+    """Didn't find a reliable implementation of Fall-out in Information Retrieval, so, reimplementing here.
+
+    See Wikipedia for `Fall-out`_ for more information about the metric definition.
+    """
+    assert target.shape == preds.shape
+    assert len(target.shape) == 1  # works only with single dimension inputs
+
+    k = len(preds) if k is None else k
+
+    target = 1 - target
+    if target.sum():
+        order_indexes = np.argsort(preds, axis=0)[::-1]
+        relevant = np.sum(target[order_indexes][:k])
+        return relevant * 1.0 / target.sum()
+    return np.NaN
+
+
+class TestFallOut(RetrievalMetricTester):
+    @pytest.mark.parametrize("ddp", [True, False])
+    @pytest.mark.parametrize("dist_sync_on_step", [True, False])
+    @pytest.mark.parametrize("empty_target_action", ["skip", "neg", "pos"])
+    @pytest.mark.parametrize("k", [None, 1, 4, 10])
+    @pytest.mark.parametrize(**_default_metric_class_input_arguments)
+    def test_class_metric(
+        self,
+        ddp: bool,
+        indexes: Tensor,
+        preds: Tensor,
+        target: Tensor,
+        dist_sync_on_step: bool,
+        empty_target_action: str,
+        k: int,
+    ):
+        metric_args = {"empty_target_action": empty_target_action, "k": k}
+
+        self.run_class_metric_test(
+            ddp=ddp,
+            indexes=indexes,
+            preds=preds,
+            target=target,
+            metric_class=RetrievalFallOut,
+            sk_metric=_fallout_at_k,
+            dist_sync_on_step=dist_sync_on_step,
+            reverse=True,
+            metric_args=metric_args,
+        )
+
+    @pytest.mark.parametrize(**_default_metric_functional_input_arguments)
+    @pytest.mark.parametrize("k", [None, 1, 4, 10])
+    def test_functional_metric(self, preds: Tensor, target: Tensor, k: int):
+        self.run_functional_metric_test(
+            preds=preds,
+            target=target,
+            metric_functional=retrieval_fall_out,
+            sk_metric=_fallout_at_k,
+            reverse=True,
+            metric_args={},
+            k=k,
+        )
+
+    @pytest.mark.parametrize(**_default_metric_class_input_arguments)
+    def test_precision_cpu(self, indexes: Tensor, preds: Tensor, target: Tensor):
+        self.run_precision_test_cpu(
+            indexes=indexes,
+            preds=preds,
+            target=target,
+            metric_module=RetrievalFallOut,
+            metric_functional=retrieval_fall_out,
+        )
+
+    @pytest.mark.parametrize(**_default_metric_class_input_arguments)
+    def test_precision_gpu(self, indexes: Tensor, preds: Tensor, target: Tensor):
+        self.run_precision_test_gpu(
+            indexes=indexes,
+            preds=preds,
+            target=target,
+            metric_module=RetrievalFallOut,
+            metric_functional=retrieval_fall_out,
+        )
+
+    @pytest.mark.parametrize(
+        **_concat_tests(
+            _errors_test_class_metric_parameters_default,
+            _errors_test_class_metric_parameters_no_neg_target,
+            _errors_test_class_metric_parameters_k,
+        )
+    )
+    def test_arguments_class_metric(
+        self, indexes: Tensor, preds: Tensor, target: Tensor, message: str, metric_args: dict
+    ):
+        self.run_metric_class_arguments_test(
+            indexes=indexes,
+            preds=preds,
+            target=target,
+            metric_class=RetrievalFallOut,
+            message=message,
+            metric_args=metric_args,
+            exception_type=ValueError,
+            kwargs_update={},
+        )
+
+    @pytest.mark.parametrize(
+        **_concat_tests(
+            _errors_test_functional_metric_parameters_default,
+            _errors_test_functional_metric_parameters_k,
+        )
+    )
+    def test_arguments_functional_metric(self, preds: Tensor, target: Tensor, message: str, metric_args: dict):
+        self.run_functional_metric_arguments_test(
+            preds=preds,
+            target=target,
+            metric_functional=retrieval_fall_out,
+            message=message,
+            exception_type=ValueError,
+            kwargs_update=metric_args,
+        )
diff --git a/RE/paddlemetric/src/tests/retrieval/test_hit_rate.py b/RE/paddlemetric/src/tests/retrieval/test_hit_rate.py
new file mode 100644
index 00000000..a9d90838
--- /dev/null
+++ b/RE/paddlemetric/src/tests/retrieval/test_hit_rate.py
@@ -0,0 +1,147 @@
+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import numpy as np
+import pytest
+from paddleext.torchapi import Tensor
+
+from tests.helpers import seed_all
+from tests.retrieval.helpers import (
+    RetrievalMetricTester,
+    _concat_tests,
+    _default_metric_class_input_arguments,
+    _default_metric_functional_input_arguments,
+    _errors_test_class_metric_parameters_default,
+    _errors_test_class_metric_parameters_k,
+    _errors_test_class_metric_parameters_no_pos_target,
+    _errors_test_functional_metric_parameters_default,
+    _errors_test_functional_metric_parameters_k,
+)
+from paddlemetrics.functional.retrieval.hit_rate import retrieval_hit_rate
+from paddlemetrics.retrieval.retrieval_hit_rate import RetrievalHitRate
+
+seed_all(42)
+
+
+def _hit_rate_at_k(target: np.ndarray, preds: np.ndarray, k: int = None):
+    """Didn't find a reliable implementation of Hit Rate in Information Retrieval, so, reimplementing here."""
+    assert target.shape == preds.shape
+    assert len(target.shape) == 1  # works only with single dimension inputs
+
+    if k is None:
+        k = len(preds)
+
+    if target.sum() > 0:
+        order_indexes = np.argsort(preds, axis=0)[::-1]
+        relevant = np.sum(target[order_indexes][:k])
+        return float(relevant > 0.0)
+    return np.NaN
+
+
+class TestHitRate(RetrievalMetricTester):
+    @pytest.mark.parametrize("ddp", [True, False])
+    @pytest.mark.parametrize("dist_sync_on_step", [True, False])
+    @pytest.mark.parametrize("empty_target_action", ["skip", "neg", "pos"])
+    @pytest.mark.parametrize("k", [None, 1, 4, 10])
+    @pytest.mark.parametrize(**_default_metric_class_input_arguments)
+    def test_class_metric(
+        self,
+        ddp: bool,
+        indexes: Tensor,
+        preds: Tensor,
+        target: Tensor,
+        dist_sync_on_step: bool,
+        empty_target_action: str,
+        k: int,
+    ):
+        metric_args = {"empty_target_action": empty_target_action, "k": k}
+
+        self.run_class_metric_test(
+            ddp=ddp,
+            indexes=indexes,
+            preds=preds,
+            target=target,
+            metric_class=RetrievalHitRate,
+            sk_metric=_hit_rate_at_k,
+            dist_sync_on_step=dist_sync_on_step,
+            metric_args=metric_args,
+        )
+
+    @pytest.mark.parametrize(**_default_metric_functional_input_arguments)
+    @pytest.mark.parametrize("k", [None, 1, 4, 10])
+    def test_functional_metric(self, preds: Tensor, target: Tensor, k: int):
+        self.run_functional_metric_test(
+            preds=preds,
+            target=target,
+            metric_functional=retrieval_hit_rate,
+            sk_metric=_hit_rate_at_k,
+            metric_args={},
+            k=k,
+        )
+
+    @pytest.mark.parametrize(**_default_metric_class_input_arguments)
+    def test_precision_cpu(self, indexes: Tensor, preds: Tensor, target: Tensor):
+        self.run_precision_test_cpu(
+            indexes=indexes,
+            preds=preds,
+            target=target,
+            metric_module=RetrievalHitRate,
+            metric_functional=retrieval_hit_rate,
+        )
+
+    @pytest.mark.parametrize(**_default_metric_class_input_arguments)
+    def test_precision_gpu(self, indexes: Tensor, preds: Tensor, target: Tensor):
+        self.run_precision_test_gpu(
+            indexes=indexes,
+            preds=preds,
+            target=target,
+            metric_module=RetrievalHitRate,
+            metric_functional=retrieval_hit_rate,
+        )
+
+    @pytest.mark.parametrize(
+        **_concat_tests(
+            _errors_test_class_metric_parameters_default,
+            _errors_test_class_metric_parameters_no_pos_target,
+            _errors_test_class_metric_parameters_k,
+        )
+    )
+    def test_arguments_class_metric(
+        self, indexes: Tensor, preds: Tensor, target: Tensor, message: str, metric_args: dict
+    ):
+        self.run_metric_class_arguments_test(
+            indexes=indexes,
+            preds=preds,
+            target=target,
+            metric_class=RetrievalHitRate,
+            message=message,
+            metric_args=metric_args,
+            exception_type=ValueError,
+            kwargs_update={},
+        )
+
+    @pytest.mark.parametrize(
+        **_concat_tests(
+            _errors_test_functional_metric_parameters_default,
+            _errors_test_functional_metric_parameters_k,
+        )
+    )
+    def test_arguments_functional_metric(self, preds: Tensor, target: Tensor, message: str, metric_args: dict):
+        self.run_functional_metric_arguments_test(
+            preds=preds,
+            target=target,
+            metric_functional=retrieval_hit_rate,
+            message=message,
+            exception_type=ValueError,
+            kwargs_update=metric_args,
+        )
diff --git a/RE/paddlemetric/src/tests/retrieval/test_map.py b/RE/paddlemetric/src/tests/retrieval/test_map.py
new file mode 100644
index 00000000..bef75b55
--- /dev/null
+++ b/RE/paddlemetric/src/tests/retrieval/test_map.py
@@ -0,0 +1,120 @@
+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import pytest
+from sklearn.metrics import average_precision_score as sk_average_precision_score
+from paddleext.torchapi import Tensor
+
+from tests.helpers import seed_all
+from tests.retrieval.helpers import (
+    RetrievalMetricTester,
+    _concat_tests,
+    _default_metric_class_input_arguments,
+    _default_metric_functional_input_arguments,
+    _errors_test_class_metric_parameters_default,
+    _errors_test_class_metric_parameters_no_pos_target,
+    _errors_test_functional_metric_parameters_default,
+)
+from paddlemetrics.functional.retrieval.average_precision import retrieval_average_precision
+from paddlemetrics.retrieval.mean_average_precision import RetrievalMAP
+
+seed_all(42)
+
+
+class TestMAP(RetrievalMetricTester):
+    @pytest.mark.parametrize("ddp", [True, False])
+    @pytest.mark.parametrize("dist_sync_on_step", [True, False])
+    @pytest.mark.parametrize("empty_target_action", ["skip", "neg", "pos"])
+    @pytest.mark.parametrize(**_default_metric_class_input_arguments)
+    def test_class_metric(
+        self,
+        ddp: bool,
+        indexes: Tensor,
+        preds: Tensor,
+        target: Tensor,
+        dist_sync_on_step: bool,
+        empty_target_action: str,
+    ):
+        metric_args = {"empty_target_action": empty_target_action}
+
+        self.run_class_metric_test(
+            ddp=ddp,
+            indexes=indexes,
+            preds=preds,
+            target=target,
+            metric_class=RetrievalMAP,
+            sk_metric=sk_average_precision_score,
+            dist_sync_on_step=dist_sync_on_step,
+            metric_args=metric_args,
+        )
+
+    @pytest.mark.parametrize(**_default_metric_functional_input_arguments)
+    def test_functional_metric(self, preds: Tensor, target: Tensor):
+        self.run_functional_metric_test(
+            preds=preds,
+            target=target,
+            metric_functional=retrieval_average_precision,
+            sk_metric=sk_average_precision_score,
+            metric_args={},
+        )
+
+    @pytest.mark.parametrize(**_default_metric_class_input_arguments)
+    def test_precision_cpu(self, indexes: Tensor, preds: Tensor, target: Tensor):
+        self.run_precision_test_cpu(
+            indexes=indexes,
+            preds=preds,
+            target=target,
+            metric_module=RetrievalMAP,
+            metric_functional=retrieval_average_precision,
+        )
+
+    @pytest.mark.parametrize(**_default_metric_class_input_arguments)
+    def test_precision_gpu(self, indexes: Tensor, preds: Tensor, target: Tensor):
+        self.run_precision_test_gpu(
+            indexes=indexes,
+            preds=preds,
+            target=target,
+            metric_module=RetrievalMAP,
+            metric_functional=retrieval_average_precision,
+        )
+
+    @pytest.mark.parametrize(
+        **_concat_tests(
+            _errors_test_class_metric_parameters_default,
+            _errors_test_class_metric_parameters_no_pos_target,
+        )
+    )
+    def test_arguments_class_metric(
+        self, indexes: Tensor, preds: Tensor, target: Tensor, message: str, metric_args: dict
+    ):
+        self.run_metric_class_arguments_test(
+            indexes=indexes,
+            preds=preds,
+            target=target,
+            metric_class=RetrievalMAP,
+            message=message,
+            metric_args=metric_args,
+            exception_type=ValueError,
+            kwargs_update={},
+        )
+
+    @pytest.mark.parametrize(**_errors_test_functional_metric_parameters_default)
+    def test_arguments_functional_metric(self, preds: Tensor, target: Tensor, message: str, metric_args: dict):
+        self.run_functional_metric_arguments_test(
+            preds=preds,
+            target=target,
+            metric_functional=retrieval_average_precision,
+            message=message,
+            exception_type=ValueError,
+            kwargs_update=metric_args,
+        )
diff --git a/RE/paddlemetric/src/tests/retrieval/test_mrr.py b/RE/paddlemetric/src/tests/retrieval/test_mrr.py
new file mode 100644
index 00000000..d5a80af9
--- /dev/null
+++ b/RE/paddlemetric/src/tests/retrieval/test_mrr.py
@@ -0,0 +1,142 @@
+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import numpy as np
+import pytest
+from sklearn.metrics import label_ranking_average_precision_score
+from paddleext.torchapi import Tensor
+
+from tests.helpers import seed_all
+from tests.retrieval.helpers import (
+    RetrievalMetricTester,
+    _concat_tests,
+    _default_metric_class_input_arguments,
+    _default_metric_functional_input_arguments,
+    _errors_test_class_metric_parameters_default,
+    _errors_test_class_metric_parameters_no_pos_target,
+    _errors_test_functional_metric_parameters_default,
+)
+from paddlemetrics.functional.retrieval.reciprocal_rank import retrieval_reciprocal_rank
+from paddlemetrics.retrieval.mean_reciprocal_rank import RetrievalMRR
+
+seed_all(42)
+
+
+def _reciprocal_rank(target: np.ndarray, preds: np.ndarray):
+    """Adaptation of `sklearn.metrics.label_ranking_average_precision_score`.
+
+    Since the original sklearn metric works as RR only when the number of positive targets is exactly 1, here we remove
+    every positive target that is not the most important. Remember that in RR only the positive target with the highest
+    score is considered.
+    """
+    assert target.shape == preds.shape
+    assert len(target.shape) == 1  # works only with single dimension inputs
+
+    # going to remove T targets that are not ranked as highest
+    indexes = preds[target.astype(bool)]
+    if len(indexes) > 0:
+        target[preds != indexes.max(-1, keepdims=True)[0]] = 0  # ensure that only 1 positive label is present
+
+    if target.sum() > 0:
+        # sklearn `label_ranking_average_precision_score` requires at most 2 dims
+        return label_ranking_average_precision_score(np.expand_dims(target, axis=0), np.expand_dims(preds, axis=0))
+    return 0.0
+
+
+class TestMRR(RetrievalMetricTester):
+    @pytest.mark.parametrize("ddp", [True, False])
+    @pytest.mark.parametrize("dist_sync_on_step", [True, False])
+    @pytest.mark.parametrize("empty_target_action", ["skip", "neg", "pos"])
+    @pytest.mark.parametrize(**_default_metric_class_input_arguments)
+    def test_class_metric(
+        self,
+        ddp: bool,
+        indexes: Tensor,
+        preds: Tensor,
+        target: Tensor,
+        dist_sync_on_step: bool,
+        empty_target_action: str,
+    ):
+        metric_args = {"empty_target_action": empty_target_action}
+
+        self.run_class_metric_test(
+            ddp=ddp,
+            indexes=indexes,
+            preds=preds,
+            target=target,
+            metric_class=RetrievalMRR,
+            sk_metric=_reciprocal_rank,
+            dist_sync_on_step=dist_sync_on_step,
+            metric_args=metric_args,
+        )
+
+    @pytest.mark.parametrize(**_default_metric_functional_input_arguments)
+    def test_functional_metric(self, preds: Tensor, target: Tensor):
+        self.run_functional_metric_test(
+            preds=preds,
+            target=target,
+            metric_functional=retrieval_reciprocal_rank,
+            sk_metric=_reciprocal_rank,
+            metric_args={},
+        )
+
+    @pytest.mark.parametrize(**_default_metric_class_input_arguments)
+    def test_precision_cpu(self, indexes: Tensor, preds: Tensor, target: Tensor):
+        self.run_precision_test_cpu(
+            indexes=indexes,
+            preds=preds,
+            target=target,
+            metric_module=RetrievalMRR,
+            metric_functional=retrieval_reciprocal_rank,
+        )
+
+    @pytest.mark.parametrize(**_default_metric_class_input_arguments)
+    def test_precision_gpu(self, indexes: Tensor, preds: Tensor, target: Tensor):
+        self.run_precision_test_gpu(
+            indexes=indexes,
+            preds=preds,
+            target=target,
+            metric_module=RetrievalMRR,
+            metric_functional=retrieval_reciprocal_rank,
+        )
+
+    @pytest.mark.parametrize(
+        **_concat_tests(
+            _errors_test_class_metric_parameters_default,
+            _errors_test_class_metric_parameters_no_pos_target,
+        )
+    )
+    def test_arguments_class_metric(
+        self, indexes: Tensor, preds: Tensor, target: Tensor, message: str, metric_args: dict
+    ):
+        self.run_metric_class_arguments_test(
+            indexes=indexes,
+            preds=preds,
+            target=target,
+            metric_class=RetrievalMRR,
+            message=message,
+            metric_args=metric_args,
+            exception_type=ValueError,
+            kwargs_update={},
+        )
+
+    @pytest.mark.parametrize(**_errors_test_functional_metric_parameters_default)
+    def test_arguments_functional_metric(self, preds: Tensor, target: Tensor, message: str, metric_args: dict):
+        self.run_functional_metric_arguments_test(
+            preds=preds,
+            target=target,
+            metric_functional=retrieval_reciprocal_rank,
+            message=message,
+            exception_type=ValueError,
+            kwargs_update=metric_args,
+        )
diff --git a/RE/paddlemetric/src/tests/retrieval/test_ndcg.py b/RE/paddlemetric/src/tests/retrieval/test_ndcg.py
new file mode 100644
index 00000000..4fa099f1
--- /dev/null
+++ b/RE/paddlemetric/src/tests/retrieval/test_ndcg.py
@@ -0,0 +1,151 @@
+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import numpy as np
+import pytest
+from sklearn.metrics import ndcg_score
+from paddleext.torchapi import Tensor
+
+from tests.helpers import seed_all
+from tests.retrieval.helpers import (
+    RetrievalMetricTester,
+    _concat_tests,
+    _default_metric_class_input_arguments_with_non_binary_target,
+    _default_metric_functional_input_arguments_with_non_binary_target,
+    _errors_test_class_metric_parameters_k,
+    _errors_test_class_metric_parameters_with_nonbinary,
+    _errors_test_functional_metric_parameters_k,
+    _errors_test_functional_metric_parameters_with_nonbinary,
+)
+from paddlemetrics.functional.retrieval.ndcg import retrieval_normalized_dcg
+from paddlemetrics.retrieval.retrieval_ndcg import RetrievalNormalizedDCG
+
+seed_all(42)
+
+
+def _ndcg_at_k(target: np.ndarray, preds: np.ndarray, k: int = None):
+    """Adapting `from sklearn.metrics.ndcg_score`."""
+    assert target.shape == preds.shape
+    assert len(target.shape) == 1  # works only with single dimension inputs
+
+    if target.shape[0] < 2:  # ranking is equal to ideal ranking with a single document
+        return np.array(1.0)
+
+    preds = np.expand_dims(preds, axis=0)
+    target = np.expand_dims(target, axis=0)
+
+    return ndcg_score(target, preds, k=k)
+
+
+class TestNDCG(RetrievalMetricTester):
+    @pytest.mark.parametrize("ddp", [True, False])
+    @pytest.mark.parametrize("dist_sync_on_step", [True, False])
+    @pytest.mark.parametrize("empty_target_action", ["skip", "neg", "pos"])
+    @pytest.mark.parametrize("k", [None, 1, 4, 10])
+    @pytest.mark.parametrize(**_default_metric_class_input_arguments_with_non_binary_target)
+    def test_class_metric(
+        self,
+        ddp: bool,
+        indexes: Tensor,
+        preds: Tensor,
+        target: Tensor,
+        dist_sync_on_step: bool,
+        empty_target_action: str,
+        k: int,
+    ):
+        metric_args = {"empty_target_action": empty_target_action, "k": k}
+
+        self.run_class_metric_test(
+            ddp=ddp,
+            indexes=indexes,
+            preds=preds,
+            target=target,
+            metric_class=RetrievalNormalizedDCG,
+            sk_metric=_ndcg_at_k,
+            dist_sync_on_step=dist_sync_on_step,
+            metric_args=metric_args,
+        )
+
+    @pytest.mark.parametrize(**_default_metric_functional_input_arguments_with_non_binary_target)
+    @pytest.mark.parametrize("k", [None, 1, 4, 10])
+    def test_functional_metric(self, preds: Tensor, target: Tensor, k: int):
+        self.run_functional_metric_test(
+            preds=preds,
+            target=target,
+            metric_functional=retrieval_normalized_dcg,
+            sk_metric=_ndcg_at_k,
+            metric_args={},
+            k=k,
+        )
+
+    @pytest.mark.parametrize(**_default_metric_class_input_arguments_with_non_binary_target)
+    def test_precision_cpu(self, indexes: Tensor, preds: Tensor, target: Tensor):
+        self.run_precision_test_cpu(
+            indexes=indexes,
+            preds=preds,
+            target=target,
+            metric_module=RetrievalNormalizedDCG,
+            metric_functional=retrieval_normalized_dcg,
+        )
+
+    @pytest.mark.parametrize(**_default_metric_class_input_arguments_with_non_binary_target)
+    def test_precision_gpu(self, indexes: Tensor, preds: Tensor, target: Tensor):
+        self.run_precision_test_gpu(
+            indexes=indexes,
+            preds=preds,
+            target=target,
+            metric_module=RetrievalNormalizedDCG,
+            metric_functional=retrieval_normalized_dcg,
+        )
+
+    @pytest.mark.parametrize(
+        **_concat_tests(
+            _errors_test_class_metric_parameters_with_nonbinary,
+            _errors_test_class_metric_parameters_k,
+        )
+    )
+    def test_arguments_class_metric(
+        self, indexes: Tensor, preds: Tensor, target: Tensor, message: str, metric_args: dict
+    ):
+        if target.is_floating_point():
+            pytest.skip("NDCG metric works with float target input")
+
+        self.run_metric_class_arguments_test(
+            indexes=indexes,
+            preds=preds,
+            target=target,
+            metric_class=RetrievalNormalizedDCG,
+            message=message,
+            metric_args=metric_args,
+            exception_type=ValueError,
+            kwargs_update={},
+        )
+
+    @pytest.mark.parametrize(
+        **_concat_tests(
+            _errors_test_functional_metric_parameters_with_nonbinary,
+            _errors_test_functional_metric_parameters_k,
+        )
+    )
+    def test_arguments_functional_metric(self, preds: Tensor, target: Tensor, message: str, metric_args: dict):
+        if target.is_floating_point():
+            pytest.skip("NDCG metric works with float target input")
+
+        self.run_functional_metric_arguments_test(
+            preds=preds,
+            target=target,
+            metric_functional=retrieval_normalized_dcg,
+            message=message,
+            exception_type=ValueError,
+            kwargs_update=metric_args,
+        )
diff --git a/RE/paddlemetric/src/tests/retrieval/test_precision.py b/RE/paddlemetric/src/tests/retrieval/test_precision.py
new file mode 100644
index 00000000..260e0242
--- /dev/null
+++ b/RE/paddlemetric/src/tests/retrieval/test_precision.py
@@ -0,0 +1,151 @@
+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import numpy as np
+import pytest
+from paddleext.torchapi import Tensor
+
+from tests.helpers import seed_all
+from tests.retrieval.helpers import (
+    RetrievalMetricTester,
+    _concat_tests,
+    _default_metric_class_input_arguments,
+    _default_metric_functional_input_arguments,
+    _errors_test_class_metric_parameters_default,
+    _errors_test_class_metric_parameters_k,
+    _errors_test_class_metric_parameters_no_pos_target,
+    _errors_test_functional_metric_parameters_default,
+    _errors_test_functional_metric_parameters_k,
+)
+from paddlemetrics.functional.retrieval.precision import retrieval_precision
+from paddlemetrics.retrieval.retrieval_precision import RetrievalPrecision
+
+seed_all(42)
+
+
+def _precision_at_k(target: np.ndarray, preds: np.ndarray, k: int = None):
+    """Didn't find a reliable implementation of Precision in Information Retrieval, so, reimplementing here.
+
+    A good explanation can be found
+    `here <https://web.stanford.edu/class/cs276/handouts/EvaluationNew-handout-1-per.pdf>_`.
+    """
+    assert target.shape == preds.shape
+    assert len(target.shape) == 1  # works only with single dimension inputs
+
+    if k is None:
+        k = len(preds)
+
+    if target.sum() > 0:
+        order_indexes = np.argsort(preds, axis=0)[::-1]
+        relevant = np.sum(target[order_indexes][:k])
+        return relevant * 1.0 / k
+    return np.NaN
+
+
+class TestPrecision(RetrievalMetricTester):
+    @pytest.mark.parametrize("ddp", [True, False])
+    @pytest.mark.parametrize("dist_sync_on_step", [True, False])
+    @pytest.mark.parametrize("empty_target_action", ["skip", "neg", "pos"])
+    @pytest.mark.parametrize("k", [None, 1, 4, 10])
+    @pytest.mark.parametrize(**_default_metric_class_input_arguments)
+    def test_class_metric(
+        self,
+        ddp: bool,
+        indexes: Tensor,
+        preds: Tensor,
+        target: Tensor,
+        dist_sync_on_step: bool,
+        empty_target_action: str,
+        k: int,
+    ):
+        metric_args = {"empty_target_action": empty_target_action, "k": k}
+
+        self.run_class_metric_test(
+            ddp=ddp,
+            indexes=indexes,
+            preds=preds,
+            target=target,
+            metric_class=RetrievalPrecision,
+            sk_metric=_precision_at_k,
+            dist_sync_on_step=dist_sync_on_step,
+            metric_args=metric_args,
+        )
+
+    @pytest.mark.parametrize(**_default_metric_functional_input_arguments)
+    @pytest.mark.parametrize("k", [None, 1, 4, 10])
+    def test_functional_metric(self, preds: Tensor, target: Tensor, k: int):
+        self.run_functional_metric_test(
+            preds=preds,
+            target=target,
+            metric_functional=retrieval_precision,
+            sk_metric=_precision_at_k,
+            metric_args={},
+            k=k,
+        )
+
+    @pytest.mark.parametrize(**_default_metric_class_input_arguments)
+    def test_precision_cpu(self, indexes: Tensor, preds: Tensor, target: Tensor):
+        self.run_precision_test_cpu(
+            indexes=indexes,
+            preds=preds,
+            target=target,
+            metric_module=RetrievalPrecision,
+            metric_functional=retrieval_precision,
+        )
+
+    @pytest.mark.parametrize(**_default_metric_class_input_arguments)
+    def test_precision_gpu(self, indexes: Tensor, preds: Tensor, target: Tensor):
+        self.run_precision_test_gpu(
+            indexes=indexes,
+            preds=preds,
+            target=target,
+            metric_module=RetrievalPrecision,
+            metric_functional=retrieval_precision,
+        )
+
+    @pytest.mark.parametrize(
+        **_concat_tests(
+            _errors_test_class_metric_parameters_default,
+            _errors_test_class_metric_parameters_no_pos_target,
+            _errors_test_class_metric_parameters_k,
+        )
+    )
+    def test_arguments_class_metric(
+        self, indexes: Tensor, preds: Tensor, target: Tensor, message: str, metric_args: dict
+    ):
+        self.run_metric_class_arguments_test(
+            indexes=indexes,
+            preds=preds,
+            target=target,
+            metric_class=RetrievalPrecision,
+            message=message,
+            metric_args=metric_args,
+            exception_type=ValueError,
+            kwargs_update={},
+        )
+
+    @pytest.mark.parametrize(
+        **_concat_tests(
+            _errors_test_functional_metric_parameters_default,
+            _errors_test_functional_metric_parameters_k,
+        )
+    )
+    def test_arguments_functional_metric(self, preds: Tensor, target: Tensor, message: str, metric_args: dict):
+        self.run_functional_metric_arguments_test(
+            preds=preds,
+            target=target,
+            metric_functional=retrieval_precision,
+            message=message,
+            exception_type=ValueError,
+            kwargs_update=metric_args,
+        )
diff --git a/RE/paddlemetric/src/tests/retrieval/test_r_precision.py b/RE/paddlemetric/src/tests/retrieval/test_r_precision.py
new file mode 100644
index 00000000..e9787482
--- /dev/null
+++ b/RE/paddlemetric/src/tests/retrieval/test_r_precision.py
@@ -0,0 +1,136 @@
+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import numpy as np
+import pytest
+from paddleext.torchapi import Tensor
+
+from tests.helpers import seed_all
+from tests.retrieval.helpers import (
+    RetrievalMetricTester,
+    _concat_tests,
+    _default_metric_class_input_arguments,
+    _default_metric_functional_input_arguments,
+    _errors_test_class_metric_parameters_default,
+    _errors_test_class_metric_parameters_no_pos_target,
+    _errors_test_functional_metric_parameters_default,
+)
+from paddlemetrics.functional.retrieval.r_precision import retrieval_r_precision
+from paddlemetrics.retrieval.retrieval_r_precision import RetrievalRPrecision
+
+seed_all(42)
+
+
+def _r_precision(target: np.ndarray, preds: np.ndarray):
+    """Didn't find a reliable implementation of R-Precision in Information Retrieval, so, reimplementing here.
+
+    A good explanation can be found
+    `here <https://web.stanford.edu/class/cs276/handouts/EvaluationNew-handout-1-per.pdf>_`.
+    """
+    assert target.shape == preds.shape
+    assert len(target.shape) == 1  # works only with single dimension inputs
+
+    if target.sum() > 0:
+        order_indexes = np.argsort(preds, axis=0)[::-1]
+        relevant = np.sum(target[order_indexes][: target.sum()])
+        return relevant * 1.0 / target.sum()
+    return np.NaN
+
+
+class TestRPrecision(RetrievalMetricTester):
+    @pytest.mark.parametrize("ddp", [True, False])
+    @pytest.mark.parametrize("dist_sync_on_step", [True, False])
+    @pytest.mark.parametrize("empty_target_action", ["skip", "neg", "pos"])
+    @pytest.mark.parametrize(**_default_metric_class_input_arguments)
+    def test_class_metric(
+        self,
+        ddp: bool,
+        indexes: Tensor,
+        preds: Tensor,
+        target: Tensor,
+        dist_sync_on_step: bool,
+        empty_target_action: str,
+    ):
+        metric_args = {"empty_target_action": empty_target_action}
+
+        self.run_class_metric_test(
+            ddp=ddp,
+            indexes=indexes,
+            preds=preds,
+            target=target,
+            metric_class=RetrievalRPrecision,
+            sk_metric=_r_precision,
+            dist_sync_on_step=dist_sync_on_step,
+            metric_args=metric_args,
+        )
+
+    @pytest.mark.parametrize(**_default_metric_functional_input_arguments)
+    def test_functional_metric(self, preds: Tensor, target: Tensor):
+        self.run_functional_metric_test(
+            preds=preds,
+            target=target,
+            metric_functional=retrieval_r_precision,
+            sk_metric=_r_precision,
+            metric_args={},
+        )
+
+    @pytest.mark.parametrize(**_default_metric_class_input_arguments)
+    def test_precision_cpu(self, indexes: Tensor, preds: Tensor, target: Tensor):
+        self.run_precision_test_cpu(
+            indexes=indexes,
+            preds=preds,
+            target=target,
+            metric_module=RetrievalRPrecision,
+            metric_functional=retrieval_r_precision,
+        )
+
+    @pytest.mark.parametrize(**_default_metric_class_input_arguments)
+    def test_precision_gpu(self, indexes: Tensor, preds: Tensor, target: Tensor):
+        self.run_precision_test_gpu(
+            indexes=indexes,
+            preds=preds,
+            target=target,
+            metric_module=RetrievalRPrecision,
+            metric_functional=retrieval_r_precision,
+        )
+
+    @pytest.mark.parametrize(
+        **_concat_tests(
+            _errors_test_class_metric_parameters_default,
+            _errors_test_class_metric_parameters_no_pos_target,
+        )
+    )
+    def test_arguments_class_metric(
+        self, indexes: Tensor, preds: Tensor, target: Tensor, message: str, metric_args: dict
+    ):
+        self.run_metric_class_arguments_test(
+            indexes=indexes,
+            preds=preds,
+            target=target,
+            metric_class=RetrievalRPrecision,
+            message=message,
+            metric_args=metric_args,
+            exception_type=ValueError,
+            kwargs_update={},
+        )
+
+    @pytest.mark.parametrize(**_errors_test_functional_metric_parameters_default)
+    def test_arguments_functional_metric(self, preds: Tensor, target: Tensor, message: str, metric_args: dict):
+        self.run_functional_metric_arguments_test(
+            preds=preds,
+            target=target,
+            metric_functional=retrieval_r_precision,
+            message=message,
+            exception_type=ValueError,
+            kwargs_update=metric_args,
+        )
diff --git a/RE/paddlemetric/src/tests/retrieval/test_recall.py b/RE/paddlemetric/src/tests/retrieval/test_recall.py
new file mode 100644
index 00000000..8f01120b
--- /dev/null
+++ b/RE/paddlemetric/src/tests/retrieval/test_recall.py
@@ -0,0 +1,150 @@
+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import numpy as np
+import pytest
+from paddleext.torchapi import Tensor
+
+from tests.helpers import seed_all
+from tests.retrieval.helpers import (
+    RetrievalMetricTester,
+    _concat_tests,
+    _default_metric_class_input_arguments,
+    _default_metric_functional_input_arguments,
+    _errors_test_class_metric_parameters_default,
+    _errors_test_class_metric_parameters_k,
+    _errors_test_class_metric_parameters_no_pos_target,
+    _errors_test_functional_metric_parameters_default,
+    _errors_test_functional_metric_parameters_k,
+)
+from paddlemetrics.functional.retrieval.recall import retrieval_recall
+from paddlemetrics.retrieval.retrieval_recall import RetrievalRecall
+
+seed_all(42)
+
+
+def _recall_at_k(target: np.ndarray, preds: np.ndarray, k: int = None):
+    """Didn't find a reliable implementation of Recall in Information Retrieval, so, reimplementing here.
+
+    See wikipedia for more information about definition.
+    """
+    assert target.shape == preds.shape
+    assert len(target.shape) == 1  # works only with single dimension inputs
+
+    if k is None:
+        k = len(preds)
+
+    if target.sum() > 0:
+        order_indexes = np.argsort(preds, axis=0)[::-1]
+        relevant = np.sum(target[order_indexes][:k])
+        return relevant * 1.0 / target.sum()
+    return np.NaN
+
+
+class TestRecall(RetrievalMetricTester):
+    @pytest.mark.parametrize("ddp", [True, False])
+    @pytest.mark.parametrize("dist_sync_on_step", [True, False])
+    @pytest.mark.parametrize("empty_target_action", ["skip", "neg", "pos"])
+    @pytest.mark.parametrize("k", [None, 1, 4, 10])
+    @pytest.mark.parametrize(**_default_metric_class_input_arguments)
+    def test_class_metric(
+        self,
+        ddp: bool,
+        indexes: Tensor,
+        preds: Tensor,
+        target: Tensor,
+        dist_sync_on_step: bool,
+        empty_target_action: str,
+        k: int,
+    ):
+        metric_args = {"empty_target_action": empty_target_action, "k": k}
+
+        self.run_class_metric_test(
+            ddp=ddp,
+            indexes=indexes,
+            preds=preds,
+            target=target,
+            metric_class=RetrievalRecall,
+            sk_metric=_recall_at_k,
+            dist_sync_on_step=dist_sync_on_step,
+            metric_args=metric_args,
+        )
+
+    @pytest.mark.parametrize(**_default_metric_functional_input_arguments)
+    @pytest.mark.parametrize("k", [None, 1, 4, 10])
+    def test_functional_metric(self, preds: Tensor, target: Tensor, k: int):
+        self.run_functional_metric_test(
+            preds=preds,
+            target=target,
+            metric_functional=retrieval_recall,
+            sk_metric=_recall_at_k,
+            metric_args={},
+            k=k,
+        )
+
+    @pytest.mark.parametrize(**_default_metric_class_input_arguments)
+    def test_precision_cpu(self, indexes: Tensor, preds: Tensor, target: Tensor):
+        self.run_precision_test_cpu(
+            indexes=indexes,
+            preds=preds,
+            target=target,
+            metric_module=RetrievalRecall,
+            metric_functional=retrieval_recall,
+        )
+
+    @pytest.mark.parametrize(**_default_metric_class_input_arguments)
+    def test_precision_gpu(self, indexes: Tensor, preds: Tensor, target: Tensor):
+        self.run_precision_test_gpu(
+            indexes=indexes,
+            preds=preds,
+            target=target,
+            metric_module=RetrievalRecall,
+            metric_functional=retrieval_recall,
+        )
+
+    @pytest.mark.parametrize(
+        **_concat_tests(
+            _errors_test_class_metric_parameters_default,
+            _errors_test_class_metric_parameters_no_pos_target,
+            _errors_test_class_metric_parameters_k,
+        )
+    )
+    def test_arguments_class_metric(
+        self, indexes: Tensor, preds: Tensor, target: Tensor, message: str, metric_args: dict
+    ):
+        self.run_metric_class_arguments_test(
+            indexes=indexes,
+            preds=preds,
+            target=target,
+            metric_class=RetrievalRecall,
+            message=message,
+            metric_args=metric_args,
+            exception_type=ValueError,
+            kwargs_update={},
+        )
+
+    @pytest.mark.parametrize(
+        **_concat_tests(
+            _errors_test_functional_metric_parameters_default,
+            _errors_test_functional_metric_parameters_k,
+        )
+    )
+    def test_arguments_functional_metric(self, preds: Tensor, target: Tensor, message: str, metric_args: dict):
+        self.run_functional_metric_arguments_test(
+            preds=preds,
+            target=target,
+            metric_functional=retrieval_recall,
+            message=message,
+            exception_type=ValueError,
+            kwargs_update=metric_args,
+        )
diff --git a/RE/paddlemetric/src/tests/test_utilities.py b/RE/paddlemetric/src/tests/test_utilities.py
new file mode 100644
index 00000000..0f7aacd0
--- /dev/null
+++ b/RE/paddlemetric/src/tests/test_utilities.py
@@ -0,0 +1,21 @@
+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from paddlemetrics.utilities import rank_zero_debug, rank_zero_info, rank_zero_warn
+
+
+def test_prints():
+    rank_zero_debug("DEBUG")
+    rank_zero_info("INFO")
+    rank_zero_warn("WARN")
diff --git a/RE/paddlemetric/src/tests/text/__init__.py b/RE/paddlemetric/src/tests/text/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/RE/paddlemetric/src/tests/text/helpers.py b/RE/paddlemetric/src/tests/text/helpers.py
new file mode 100644
index 00000000..ee896504
--- /dev/null
+++ b/RE/paddlemetric/src/tests/text/helpers.py
@@ -0,0 +1,479 @@
+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import pickle
+import sys
+from enum import Enum, unique
+from functools import partial
+from typing import Any, Callable, Sequence, Union
+
+import pytest
+import paddleext.torchapi as B
+from paddleext.torchapi import Tensor
+from B.multiprocessing import set_start_method
+
+from tests.helpers.testers import MetricTester, _assert_allclose, _assert_requires_grad, _assert_tensor
+from paddlemetrics import Metric
+
+try:
+    set_start_method("spawn")
+except RuntimeError:
+    pass
+
+
+@unique
+class INPUT_ORDER(Enum):
+    PREDS_FIRST = 1
+    TARGETS_FIRST = 2
+
+
+TEXT_METRIC_INPUT = Union[Sequence[str], Sequence[Sequence[str]], Sequence[Sequence[Sequence[str]]]]
+NUM_BATCHES = 2
+
+
+def _class_test(
+    rank: int,
+    worldsize: int,
+    preds: TEXT_METRIC_INPUT,
+    targets: TEXT_METRIC_INPUT,
+    metric_class: Metric,
+    sk_metric: Callable,
+    dist_sync_on_step: bool,
+    metric_args: dict = None,
+    check_dist_sync_on_step: bool = True,
+    check_batch: bool = True,
+    atol: float = 1e-8,
+    device: str = "cpu",
+    fragment_kwargs: bool = False,
+    check_scriptable: bool = True,
+    input_order: INPUT_ORDER = INPUT_ORDER.PREDS_FIRST,
+    key: str = None,
+    **kwargs_update: Any,
+):
+    """Utility function doing the actual comparison between lightning class metric and reference metric.
+
+    Args:
+        rank: rank of current process
+        worldsize: number of processes
+        preds: Sequence of predicted tokens or predicted sentences
+        targets: Sequence of target tokens or target sentences
+        metric_class: lightning metric class that should be tested
+        sk_metric: callable function that is used for comparison
+        dist_sync_on_step: bool, if true will synchronize metric state across
+            processes at each ``forward()``
+        metric_args: dict with additional arguments used for class initialization
+        check_dist_sync_on_step: bool, if true will check if the metric is also correctly
+            calculated per batch per device (and not just at the end)
+        check_batch: bool, if true will check if the metric is also correctly
+            calculated across devices for each batch (and not just at the end)
+        device: determine which device to run on, either 'cuda' or 'cpu'
+        fragment_kwargs: whether tensors in kwargs should be divided as `preds` and `targets` among processes
+        input_order: Define the ordering for the preds and targets positional arguments.
+        key: The key passed onto the `_assert_allclose` to compare the respective metric from the Dict output against
+            the sk_metric.
+        kwargs_update: Additional keyword arguments that will be passed with preds and
+            targets when running update on the metric.
+    """
+    if not metric_args:
+        metric_args = {}
+
+    # Instanciate lightning metric
+    metric = metric_class(
+        compute_on_step=check_dist_sync_on_step or check_batch, dist_sync_on_step=dist_sync_on_step, **metric_args
+    )
+
+    # check that the metric is scriptable
+    if check_scriptable:
+        B.jit.script(metric)
+
+    # move to device
+    metric = metric.to(device)
+    kwargs_update = {k: v.to(device) if isinstance(v, Tensor) else v for k, v in kwargs_update.items()}
+
+    # verify metrics work after being loaded from pickled state
+    pickled_metric = pickle.dumps(metric)
+    metric = pickle.loads(pickled_metric)
+
+    for i in range(rank, NUM_BATCHES, worldsize):
+        batch_kwargs_update = {k: v[i] if isinstance(v, Tensor) else v for k, v in kwargs_update.items()}
+
+        if input_order == INPUT_ORDER.PREDS_FIRST:
+            batch_result = metric(preds[i], targets[i], **batch_kwargs_update)
+        elif input_order == INPUT_ORDER.TARGETS_FIRST:
+            batch_result = metric(targets[i], preds[i], **batch_kwargs_update)
+
+        if metric.dist_sync_on_step and check_dist_sync_on_step and rank == 0:
+            # Concatenation of Sequence of strings
+            ddp_preds = type(preds)()
+            ddp_targets = type(targets)()
+            for r in range(worldsize):
+                ddp_preds = ddp_preds + preds[i + r]
+                ddp_targets = ddp_targets + targets[i + r]
+            ddp_kwargs_upd = {
+                k: B.cat([v[i + r] for r in range(worldsize)]).cpu() if isinstance(v, Tensor) else v
+                for k, v in (kwargs_update if fragment_kwargs else batch_kwargs_update).items()
+            }
+
+            if input_order == INPUT_ORDER.PREDS_FIRST:
+                sk_batch_result = sk_metric(ddp_preds, ddp_targets, **ddp_kwargs_upd)
+            elif input_order == INPUT_ORDER.TARGETS_FIRST:
+                sk_batch_result = sk_metric(ddp_targets, ddp_preds, **ddp_kwargs_upd)
+            _assert_allclose(batch_result, sk_batch_result, atol=atol, key=key)
+
+        elif check_batch and not metric.dist_sync_on_step:
+            batch_kwargs_update = {
+                k: v.cpu() if isinstance(v, Tensor) else v
+                for k, v in (batch_kwargs_update if fragment_kwargs else kwargs_update).items()
+            }
+            if input_order == INPUT_ORDER.PREDS_FIRST:
+                sk_batch_result = sk_metric(preds[i], targets[i], **batch_kwargs_update)
+            elif input_order == INPUT_ORDER.TARGETS_FIRST:
+                sk_batch_result = sk_metric(targets[i], preds[i], **batch_kwargs_update)
+
+            _assert_allclose(batch_result, sk_batch_result, atol=atol, key=key)
+
+    # check that metrics are hashable
+    assert hash(metric)
+
+    # check on all batches on all ranks
+    result = metric.compute()
+    _assert_tensor(result, key=key)
+
+    # Concatenation of Sequence of strings
+    total_preds = type(preds)()
+    total_targets = type(targets)()
+    for i in range(NUM_BATCHES):
+        total_preds = total_preds + preds[i]
+        total_targets = total_targets + targets[i]
+    total_kwargs_update = {
+        k: B.cat([v[i] for i in range(NUM_BATCHES)]).cpu() if isinstance(v, Tensor) else v
+        for k, v in kwargs_update.items()
+    }
+    if input_order == INPUT_ORDER.PREDS_FIRST:
+        sk_result = sk_metric(total_preds, total_targets, **total_kwargs_update)
+    elif input_order == INPUT_ORDER.TARGETS_FIRST:
+        sk_result = sk_metric(total_targets, total_preds, **total_kwargs_update)
+
+    # assert after aggregation
+    _assert_allclose(result, sk_result, atol=atol, key=key)
+
+
+def _functional_test(
+    preds: TEXT_METRIC_INPUT,
+    targets: TEXT_METRIC_INPUT,
+    metric_functional: Callable,
+    sk_metric: Callable,
+    metric_args: dict = None,
+    atol: float = 1e-8,
+    device: str = "cpu",
+    fragment_kwargs: bool = False,
+    input_order: INPUT_ORDER = INPUT_ORDER.PREDS_FIRST,
+    key: str = None,
+    **kwargs_update,
+):
+    """Utility function doing the actual comparison between lightning functional metric and reference metric.
+
+    Args:
+        preds: torch tensor with predictions
+        targets: torch tensor with targets
+        metric_functional: lightning metric functional that should be tested
+        sk_metric: callable function that is used for comparison
+        metric_args: dict with additional arguments used for class initialization
+        device: determine which device to run on, either 'cuda' or 'cpu'
+        fragment_kwargs: whether tensors in kwargs should be divided as `preds` and `targets` among processes
+        input_order: Define the ordering for the preds and targets positional arguments.
+        key: The key passed onto the `_assert_allclose` to compare the respective metric from the Dict output against
+            the sk_metric.
+        kwargs_update: Additional keyword arguments that will be passed with preds and
+            targets when running update on the metric.
+    """
+    if not metric_args:
+        metric_args = {}
+
+    metric = partial(metric_functional, **metric_args)
+
+    # Move to device
+    kwargs_update = {k: v.to(device) if isinstance(v, Tensor) else v for k, v in kwargs_update.items()}
+
+    for i in range(NUM_BATCHES):
+        extra_kwargs = {k: v[i] if isinstance(v, Tensor) else v for k, v in kwargs_update.items()}
+        if input_order == INPUT_ORDER.PREDS_FIRST:
+            lightning_result = metric(preds[i], targets[i], **extra_kwargs)
+        elif input_order == INPUT_ORDER.TARGETS_FIRST:
+            lightning_result = metric(targets[i], preds[i], **extra_kwargs)
+
+        extra_kwargs = {
+            k: v.cpu() if isinstance(v, Tensor) else v
+            for k, v in (extra_kwargs if fragment_kwargs else kwargs_update).items()
+        }
+        if input_order == INPUT_ORDER.PREDS_FIRST:
+            sk_result = sk_metric(preds[i], targets[i], **extra_kwargs)
+        elif input_order == INPUT_ORDER.TARGETS_FIRST:
+            sk_result = sk_metric(targets[i], preds[i], **extra_kwargs)
+
+        # assert its the same
+        _assert_allclose(lightning_result, sk_result, atol=atol, key=key)
+
+
+def _assert_half_support(
+    metric_module: Metric,
+    metric_functional: Callable,
+    preds: TEXT_METRIC_INPUT,
+    targets: TEXT_METRIC_INPUT,
+    device: str = "cpu",
+    **kwargs_update,
+):
+    """Test if an metric can be used with half precision tensors.
+
+    Args:
+        metric_module: the metric module to test
+        metric_functional: the metric functional to test
+        preds: torch tensor with predictions
+        targets: torch tensor with targets
+        device: determine device, either "cpu" or "cuda"
+        kwargs_update: Additional keyword arguments that will be passed with preds and
+                targets when running update on the metric.
+    """
+    y_hat = preds[0]
+    y = targets[0]
+    kwargs_update = {
+        k: (v[0].half() if v.is_floating_point() else v[0]).to(device) if isinstance(v, Tensor) else v
+        for k, v in kwargs_update.items()
+    }
+    metric_module = metric_module.to(device)
+    _assert_tensor(metric_module(y_hat, y, **kwargs_update))
+    _assert_tensor(metric_functional(y_hat, y, **kwargs_update))
+
+
+class TextTester(MetricTester):
+    """Class used for efficiently run alot of parametrized tests in ddp mode. Makes sure that ddp is only setup
+    once and that pool of processes are used for all tests.
+
+    All tests for text metrics should subclass from this and implement a new method called `test_metric_name` where the
+    method `self.run_metric_test` is called inside.
+    """
+
+    def run_functional_metric_test(
+        self,
+        preds: TEXT_METRIC_INPUT,
+        targets: TEXT_METRIC_INPUT,
+        metric_functional: Callable,
+        sk_metric: Callable,
+        metric_args: dict = None,
+        fragment_kwargs: bool = False,
+        input_order: INPUT_ORDER = INPUT_ORDER.PREDS_FIRST,
+        key: str = None,
+        **kwargs_update,
+    ):
+        """Main method that should be used for testing functions. Call this inside testing method.
+
+        Args:
+            preds: torch tensor with predictions
+            targets: torch tensor with targets
+            metric_functional: lightning metric class that should be tested
+            sk_metric: callable function that is used for comparison
+            metric_args: dict with additional arguments used for class initialization
+            fragment_kwargs: whether tensors in kwargs should be divided as `preds` and `targets` among processes
+            input_order: Define the ordering for the preds and targets positional arguments.
+            key: The key passed onto the `_assert_allclose` to compare the respective metric from the Dict output
+                against the sk_metric.
+            kwargs_update: Additional keyword arguments that will be passed with preds and
+                targets when running update on the metric.
+        """
+        device = "cuda" if (B.cuda.is_available() and B.cuda.device_count() > 0) else "cpu"
+
+        _functional_test(
+            preds=preds,
+            targets=targets,
+            metric_functional=metric_functional,
+            sk_metric=sk_metric,
+            metric_args=metric_args,
+            atol=self.atol,
+            device=device,
+            fragment_kwargs=fragment_kwargs,
+            input_order=input_order,
+            key=key,
+            **kwargs_update,
+        )
+
+    def run_class_metric_test(
+        self,
+        ddp: bool,
+        preds: TEXT_METRIC_INPUT,
+        targets: TEXT_METRIC_INPUT,
+        metric_class: Metric,
+        sk_metric: Callable,
+        dist_sync_on_step: bool,
+        metric_args: dict = None,
+        check_dist_sync_on_step: bool = True,
+        check_batch: bool = True,
+        fragment_kwargs: bool = False,
+        check_scriptable: bool = True,
+        input_order: INPUT_ORDER = INPUT_ORDER.PREDS_FIRST,
+        key: str = None,
+        **kwargs_update,
+    ):
+        """Main method that should be used for testing class. Call this inside testing methods.
+
+        Args:
+            ddp: bool, if running in ddp mode or not
+            preds: torch tensor with predictions
+            targets: torch tensor with targets
+            metric_class: lightning metric class that should be tested
+            sk_metric: callable function that is used for comparison
+            dist_sync_on_step: bool, if true will synchronize metric state across
+                processes at each ``forward()``
+            metric_args: dict with additional arguments used for class initialization
+            check_dist_sync_on_step: bool, if true will check if the metric is also correctly
+                calculated per batch per device (and not just at the end)
+            check_batch: bool, if true will check if the metric is also correctly
+                calculated across devices for each batch (and not just at the end)
+            fragment_kwargs: whether tensors in kwargs should be divided as `preds` and `targets` among processes
+            input_order: Define the ordering for the preds and targets positional arguments.
+            key: The key passed onto the `_assert_allclose` to compare the respective metric from the Dict output
+                against the sk_metric.
+            kwargs_update: Additional keyword arguments that will be passed with preds and
+                targets when running update on the metric.
+        """
+        if not metric_args:
+            metric_args = {}
+        if ddp:
+            if sys.platform == "win32":
+                pytest.skip("DDP not supported on windows")
+
+            self.pool.starmap(
+                partial(
+                    _class_test,
+                    preds=preds,
+                    targets=targets,
+                    metric_class=metric_class,
+                    sk_metric=sk_metric,
+                    dist_sync_on_step=dist_sync_on_step,
+                    metric_args=metric_args,
+                    check_dist_sync_on_step=check_dist_sync_on_step,
+                    check_batch=check_batch,
+                    atol=self.atol,
+                    fragment_kwargs=fragment_kwargs,
+                    check_scriptable=check_scriptable,
+                    input_order=input_order,
+                    key=key,
+                    **kwargs_update,
+                ),
+                [(rank, self.poolSize) for rank in range(self.poolSize)],
+            )
+        else:
+            device = "cuda" if (B.cuda.is_available() and B.cuda.device_count() > 0) else "cpu"
+
+            _class_test(
+                rank=0,
+                worldsize=1,
+                preds=preds,
+                targets=targets,
+                metric_class=metric_class,
+                sk_metric=sk_metric,
+                dist_sync_on_step=dist_sync_on_step,
+                metric_args=metric_args,
+                check_dist_sync_on_step=check_dist_sync_on_step,
+                check_batch=check_batch,
+                atol=self.atol,
+                device=device,
+                fragment_kwargs=fragment_kwargs,
+                check_scriptable=check_scriptable,
+                input_order=input_order,
+                key=key,
+                **kwargs_update,
+            )
+
+    @staticmethod
+    def run_precision_test_cpu(
+        preds: TEXT_METRIC_INPUT,
+        targets: TEXT_METRIC_INPUT,
+        metric_module: Metric,
+        metric_functional: Callable,
+        metric_args: dict = None,
+        **kwargs_update,
+    ):
+        """Test if a metric can be used with half precision tensors on cpu
+        Args:
+            preds: torch tensor with predictions
+            targets: torch tensor with targets
+            metric_module: the metric module to test
+            metric_functional: the metric functional to test
+            metric_args: dict with additional arguments used for class initialization
+            kwargs_update: Additional keyword arguments that will be passed with preds and
+                targets when running update on the metric.
+        """
+        metric_args = metric_args or {}
+        _assert_half_support(
+            metric_module(**metric_args), metric_functional, preds, targets, device="cpu", **kwargs_update
+        )
+
+    @staticmethod
+    def run_precision_test_gpu(
+        preds: TEXT_METRIC_INPUT,
+        targets: TEXT_METRIC_INPUT,
+        metric_module: Metric,
+        metric_functional: Callable,
+        metric_args: dict = None,
+        **kwargs_update,
+    ):
+        """Test if a metric can be used with half precision tensors on gpu
+        Args:
+            preds: torch tensor with predictions
+            targets: torch tensor with targets
+            metric_module: the metric module to test
+            metric_functional: the metric functional to test
+            metric_args: dict with additional arguments used for class initialization
+            kwargs_update: Additional keyword arguments that will be passed with preds and
+                targets when running update on the metric.
+        """
+        metric_args = metric_args or {}
+        _assert_half_support(
+            metric_module(**metric_args), metric_functional, preds, targets, device="cuda", **kwargs_update
+        )
+
+    @staticmethod
+    def run_differentiability_test(
+        preds: TEXT_METRIC_INPUT,
+        targets: TEXT_METRIC_INPUT,
+        metric_module: Metric,
+        metric_functional: Callable,
+        metric_args: dict = None,
+        input_order: INPUT_ORDER = INPUT_ORDER.PREDS_FIRST,
+        key: str = None,
+    ):
+        """Test if a metric is differentiable or not.
+
+        Args:
+            preds: torch tensor with predictions
+            targets: torch tensor with targets
+            metric_module: the metric module to test
+            metric_args: dict with additional arguments used for class initialization
+            input_order: Define the ordering for the preds and targets positional arguments.
+            key: The key passed onto the `_assert_allclose` to compare the respective metric from the Dict output
+                against the sk_metric.
+        """
+        metric_args = metric_args or {}
+        # only floating point tensors can require grad
+        metric = metric_module(**metric_args)
+        if input_order == INPUT_ORDER.PREDS_FIRST:
+            out = metric(preds[0], targets[0])
+        elif input_order == INPUT_ORDER.TARGETS_FIRST:
+            out = metric(targets[0], preds[0])
+
+        # Check if requires_grad matches is_differentiable attribute
+        _assert_requires_grad(metric, out, key=key)
+
+        if metric.is_differentiable:
+            # check for numerical correctness
+            assert B.autograd.gradcheck(partial(metric_functional, **metric_args), (preds[0], targets[0]))
diff --git a/RE/paddlemetric/src/tests/text/test_bertscore.py b/RE/paddlemetric/src/tests/text/test_bertscore.py
new file mode 100644
index 00000000..68e51568
--- /dev/null
+++ b/RE/paddlemetric/src/tests/text/test_bertscore.py
@@ -0,0 +1,318 @@
+import os
+from typing import Any, Dict, List
+
+import numpy as np
+import pytest
+import paddleext.torchapi as B
+import paddleext.torchapi as B.distributed as dist
+import paddleext.torchapi as B.multiprocessing as mp
+
+from paddlemetrics.functional import bert_score as metrics_bert_score
+from paddlemetrics.text import BERTScore
+from paddlemetrics.utilities.imports import _BERTSCORE_AVAILABLE
+
+if _BERTSCORE_AVAILABLE:
+    from bert_score import score as original_bert_score
+
+os.environ["TOKENIZERS_PARALLELISM"] = "1"
+
+# Examples and expected values taken from:
+# https://github.com/Tiiiger/bert_score/blob/master/tests/test_scorer.py
+preds = [
+    "28-year-old chef found dead in San Francisco mall",
+    "A 28-year-old chef who recently moved to San Francisco was "
+    "found dead in the staircase of a local shopping center.",
+    "The victim's brother said he cannot imagine anyone who would want to harm him,\"Finally, it went uphill again at "
+    'him."',
+]
+refs = [
+    "28-Year-Old Chef Found Dead at San Francisco Mall",
+    "A 28-year-old chef who had recently moved to San Francisco was found dead in the stairwell of a local mall this "
+    "week.",
+    "But the victim's brother says he can't think of anyone who would want to hurt him, saying, \"Things were finally "
+    'going well for him."',
+]
+
+
+_METRICS = ["precision", "recall", "f1"]
+
+MODEL_NAME = "albert-base-v2"
+
+
+def _assert_list(preds: Any, refs: Any, threshold: float = 1e-8):
+    """Assert two lists are equal."""
+    assert np.allclose(preds, refs, atol=threshold, equal_nan=True)
+
+
+def _parse_original_bert_score(score: B.Tensor) -> Dict[str, List[float]]:
+    """Parse the BERT score returned by the original `bert-score` package."""
+    score_dict = {metric: value.tolist() for metric, value in zip(_METRICS, score)}
+    return score_dict
+
+
+preds_batched = [preds[0:2], preds[2:]]
+refs_batched = [refs[0:2], refs[2:]]
+
+
+@pytest.mark.parametrize(
+    "preds,refs",
+    [(preds, refs)],
+)
+@pytest.mark.skipif(not _BERTSCORE_AVAILABLE, reason="test requires bert_score")
+def test_score_fn(preds, refs):
+    """Tests for functional."""
+    original_score = original_bert_score(preds, refs, model_type=MODEL_NAME, num_layers=8, idf=False, batch_size=3)
+    original_score = _parse_original_bert_score(original_score)
+
+    metrics_score = metrics_bert_score(
+        preds, refs, model_name_or_path=MODEL_NAME, num_layers=8, idf=False, batch_size=3
+    )
+
+    for metric in _METRICS:
+        _assert_list(metrics_score[metric], original_score[metric])
+
+
+@pytest.mark.parametrize(
+    "preds,refs",
+    [(preds, refs)],
+)
+@pytest.mark.skipif(not _BERTSCORE_AVAILABLE, reason="test requires bert_score")
+def test_score_fn_with_idf(preds, refs):
+    """Tests for functional with IDF rescaling."""
+    original_score = original_bert_score(preds, refs, model_type=MODEL_NAME, num_layers=12, idf=True, batch_size=3)
+    original_score = _parse_original_bert_score(original_score)
+
+    metrics_score = metrics_bert_score(
+        preds, refs, model_name_or_path=MODEL_NAME, num_layers=12, idf=True, batch_size=3
+    )
+
+    for metric in _METRICS:
+        _assert_list(metrics_score[metric], original_score[metric])
+
+
+@pytest.mark.parametrize(
+    "preds,refs",
+    [(preds, refs)],
+)
+@pytest.mark.skipif(not _BERTSCORE_AVAILABLE, reason="test requires bert_score")
+def test_score_fn_all_layers(preds, refs):
+    """Tests for functional and all layers."""
+    original_score = original_bert_score(preds, refs, model_type=MODEL_NAME, all_layers=True, idf=False, batch_size=3)
+    original_score = _parse_original_bert_score(original_score)
+
+    metrics_score = metrics_bert_score(
+        preds, refs, model_name_or_path=MODEL_NAME, all_layers=True, idf=False, batch_size=3
+    )
+
+    for metric in _METRICS:
+        _assert_list(metrics_score[metric], original_score[metric])
+
+
+@pytest.mark.parametrize(
+    "preds,refs",
+    [(preds, refs)],
+)
+@pytest.mark.skipif(not _BERTSCORE_AVAILABLE, reason="test requires bert_score")
+def test_score_fn_all_layers_with_idf(preds, refs):
+    """Tests for functional and all layers with IDF rescaling."""
+    original_score = original_bert_score(preds, refs, model_type=MODEL_NAME, all_layers=True, idf=True, batch_size=3)
+    original_score = _parse_original_bert_score(original_score)
+
+    metrics_score = metrics_bert_score(
+        preds, refs, model_name_or_path=MODEL_NAME, all_layers=True, idf=True, batch_size=3
+    )
+
+    for metric in _METRICS:
+        _assert_list(metrics_score[metric], original_score[metric])
+
+
+@pytest.mark.parametrize(
+    "preds,refs",
+    [(preds, refs)],
+)
+@pytest.mark.skipif(not _BERTSCORE_AVAILABLE, reason="test requires bert_score")
+def test_score_fn_all_layers_rescale_with_baseline(preds, refs):
+    """Tests for functional with baseline rescaling."""
+    original_score = original_bert_score(
+        preds,
+        refs,
+        model_type=MODEL_NAME,
+        lang="en",
+        num_layers=8,
+        idf=False,
+        batch_size=3,
+        rescale_with_baseline=True,
+    )
+    original_score = _parse_original_bert_score(original_score)
+
+    metrics_score = metrics_bert_score(
+        preds,
+        refs,
+        model_name_or_path=MODEL_NAME,
+        lang="en",
+        num_layers=8,
+        idf=False,
+        batch_size=3,
+        rescale_with_baseline=True,
+    )
+
+    for metric in _METRICS:
+        _assert_list(metrics_score[metric], original_score[metric])
+
+
+@pytest.mark.parametrize(
+    "preds,refs",
+    [(preds, refs)],
+)
+@pytest.mark.skipif(not _BERTSCORE_AVAILABLE, reason="test requires bert_score")
+def test_score_fn_rescale_with_baseline(preds, refs):
+    """Tests for functional with baseline rescaling with all layers."""
+    original_score = original_bert_score(
+        preds,
+        refs,
+        model_type=MODEL_NAME,
+        lang="en",
+        all_layers=True,
+        idf=False,
+        batch_size=3,
+        rescale_with_baseline=True,
+    )
+    original_score = _parse_original_bert_score(original_score)
+
+    metrics_score = metrics_bert_score(
+        preds,
+        refs,
+        model_name_or_path=MODEL_NAME,
+        lang="en",
+        all_layers=True,
+        idf=False,
+        batch_size=3,
+        rescale_with_baseline=True,
+    )
+
+    for metric in _METRICS:
+        _assert_list(metrics_score[metric], original_score[metric])
+
+
+@pytest.mark.parametrize(
+    "preds,refs",
+    [(preds, refs)],
+)
+@pytest.mark.skipif(not _BERTSCORE_AVAILABLE, reason="test requires bert_score")
+def test_score(preds, refs):
+    """Tests for metric."""
+    original_score = original_bert_score(preds, refs, model_type=MODEL_NAME, num_layers=8, idf=False, batch_size=3)
+    original_score = _parse_original_bert_score(original_score)
+
+    Scorer = BERTScore(model_name_or_path=MODEL_NAME, num_layers=8, idf=False, batch_size=3)
+    Scorer.update(predictions=preds, references=refs)
+    metrics_score = Scorer.compute()
+
+    for metric in _METRICS:
+        _assert_list(metrics_score[metric], original_score[metric])
+
+
+@pytest.mark.parametrize(
+    "preds,refs",
+    [(preds, refs)],
+)
+@pytest.mark.skipif(not _BERTSCORE_AVAILABLE, reason="test requires bert_score")
+def test_score_with_idf(preds, refs):
+    """Tests for metric with IDF rescaling."""
+    original_score = original_bert_score(preds, refs, model_type=MODEL_NAME, num_layers=8, idf=True, batch_size=3)
+    original_score = _parse_original_bert_score(original_score)
+
+    Scorer = BERTScore(model_name_or_path=MODEL_NAME, num_layers=8, idf=True, batch_size=3)
+    Scorer.update(predictions=preds, references=refs)
+    metrics_score = Scorer.compute()
+
+    for metric in _METRICS:
+        _assert_list(metrics_score[metric], original_score[metric])
+
+
+@pytest.mark.parametrize(
+    "preds,refs",
+    [(preds, refs)],
+)
+@pytest.mark.skipif(not _BERTSCORE_AVAILABLE, reason="test requires bert_score")
+def test_score_all_layers(preds, refs):
+    """Tests for metric and all layers."""
+    original_score = original_bert_score(preds, refs, model_type=MODEL_NAME, all_layers=True, idf=False, batch_size=3)
+    original_score = _parse_original_bert_score(original_score)
+
+    Scorer = BERTScore(model_name_or_path=MODEL_NAME, all_layers=True, idf=False, batch_size=3)
+    Scorer.update(predictions=preds, references=refs)
+    metrics_score = Scorer.compute()
+
+    for metric in _METRICS:
+        _assert_list(metrics_score[metric], original_score[metric])
+
+
+@pytest.mark.parametrize(
+    "preds,refs",
+    [(preds, refs)],
+)
+@pytest.mark.skipif(not _BERTSCORE_AVAILABLE, reason="test requires bert_score")
+def test_score_all_layers_with_idf(preds, refs):
+    """Tests for metric and all layers with IDF rescaling."""
+    original_score = original_bert_score(preds, refs, model_type=MODEL_NAME, all_layers=True, idf=True, batch_size=3)
+    original_score = _parse_original_bert_score(original_score)
+
+    Scorer = BERTScore(model_name_or_path=MODEL_NAME, all_layers=True, idf=True, batch_size=3)
+    Scorer.update(predictions=preds, references=refs)
+    metrics_score = Scorer.compute()
+
+    for metric in _METRICS:
+        _assert_list(metrics_score[metric], original_score[metric])
+
+
+@pytest.mark.parametrize(
+    "preds,refs",
+    [(preds_batched, refs_batched)],
+)
+@pytest.mark.skipif(not _BERTSCORE_AVAILABLE, reason="test requires bert_score")
+def test_accumulation(preds, refs):
+    """Tests for metric works with accumulation."""
+    original_score = original_bert_score(
+        sum(preds, []), sum(refs, []), model_type=MODEL_NAME, num_layers=8, idf=False, batch_size=3
+    )
+    original_score = _parse_original_bert_score(original_score)
+
+    Scorer = BERTScore(model_name_or_path=MODEL_NAME, num_layers=8, idf=False, batch_size=3)
+    for p, r in zip(preds, refs):
+        Scorer.update(predictions=p, references=r)
+    metrics_score = Scorer.compute()
+
+    for metric in _METRICS:
+        _assert_list(metrics_score[metric], original_score[metric])
+
+
+def _bert_score_ddp(rank, world_size, preds, refs, original_score):
+    """Define a DDP process for BERTScore."""
+    os.environ["MASTER_ADDR"] = "localhost"
+    os.environ["MASTER_PORT"] = "12355"
+    dist.init_process_group("gloo", rank=rank, world_size=world_size)
+    Scorer = BERTScore(model_name_or_path=MODEL_NAME, num_layers=8, idf=False, batch_size=3, max_length=128)
+    Scorer.update(preds, refs)
+    metrics_score = Scorer.compute()
+    for metric in _METRICS:
+        _assert_list(metrics_score[metric], original_score[metric])
+    dist.destroy_process_group()
+
+
+def _test_score_ddp_fn(rank, world_size, preds, refs):
+    """Core functionality for the `test_score_ddp` test."""
+    original_score = original_bert_score(preds, refs, model_type=MODEL_NAME, num_layers=8, idf=False, batch_size=3)
+    original_score = _parse_original_bert_score(original_score)
+    _bert_score_ddp(rank, world_size, preds, refs, original_score)
+
+
+@pytest.mark.parametrize(
+    "preds,refs",
+    [(preds, refs)],
+)
+@pytest.mark.skipif(not (_BERTSCORE_AVAILABLE and dist.is_available()), reason="test requires bert_score")
+def test_score_ddp(preds, refs):
+    """Tests for metric using DDP."""
+    world_size = 2
+    mp.spawn(_test_score_ddp_fn, args=(world_size, preds, refs), nprocs=world_size, join=False)
diff --git a/RE/paddlemetric/src/tests/text/test_bleu.py b/RE/paddlemetric/src/tests/text/test_bleu.py
new file mode 100644
index 00000000..168dc760
--- /dev/null
+++ b/RE/paddlemetric/src/tests/text/test_bleu.py
@@ -0,0 +1,141 @@
+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from functools import partial
+
+import pytest
+from nltk.translate.bleu_score import SmoothingFunction, corpus_bleu
+from paddleext.torchapi import tensor
+
+from tests.text.helpers import INPUT_ORDER, TextTester
+from paddlemetrics.functional.text.bleu import bleu_score
+from paddlemetrics.text.bleu import BLEUScore
+
+# example taken from
+# https://www.nltk.org/api/nltk.translate.html?highlight=bleu%20score#nltk.translate.bleu_score.corpus_bleu
+# EXAMPLE 1
+HYPOTHESIS_A = tuple(
+    "It is a guide to action which ensures that the military always obeys the commands of the party".split()
+)
+REFERENCE_1A = tuple("It is a guide to action that ensures that the military will forever heed Party commands".split())
+REFERENCE_2A = tuple(
+    "It is a guiding principle which makes the military forces always being under the command of the Party".split()
+)
+REFERENCE_3A = tuple("It is the practical guide for the army always to heed the directions of the party".split())
+
+# EXAMPLE 2
+HYPOTHESIS_B = tuple("he read the book because he was interested in world history".split())
+REFERENCE_1B = tuple("he was interested in world history because he read the book".split())
+
+# EXAMPLE 3
+HYPOTHESIS_C = tuple("the cat the cat on the mat".split())
+REFERENCE_1C = tuple("the cat is on the mat".split())
+REFERENCE_2C = tuple("there is a cat on the mat".split())
+
+TUPLE_OF_REFERENCES = (
+    ((REFERENCE_1A, REFERENCE_2A, REFERENCE_3A), tuple([REFERENCE_1B])),
+    (tuple([REFERENCE_1B]), (REFERENCE_1C, REFERENCE_2C)),
+)
+TUPLE_OF_HYPOTHESES = ((HYPOTHESIS_A, HYPOTHESIS_B), (HYPOTHESIS_B, HYPOTHESIS_C))
+
+BATCHES = {"preds": TUPLE_OF_HYPOTHESES, "targets": TUPLE_OF_REFERENCES}
+
+# https://www.nltk.org/api/nltk.translate.html?highlight=bleu%20score#nltk.translate.bleu_score.SmoothingFunction
+smooth_func = SmoothingFunction().method2
+
+
+@pytest.mark.parametrize(
+    ["weights", "n_gram", "smooth_func", "smooth"],
+    [
+        pytest.param([1], 1, None, False),
+        pytest.param([0.5, 0.5], 2, smooth_func, True),
+        pytest.param([0.333333, 0.333333, 0.333333], 3, None, False),
+        pytest.param([0.25, 0.25, 0.25, 0.25], 4, smooth_func, True),
+    ],
+)
+@pytest.mark.parametrize(
+    ["preds", "targets"],
+    [
+        pytest.param(BATCHES["preds"], BATCHES["targets"]),
+    ],
+)
+class TestBLEUScore(TextTester):
+    @pytest.mark.parametrize("ddp", [False, True])
+    @pytest.mark.parametrize("dist_sync_on_step", [False, True])
+    def test_bleu_score_class(self, ddp, dist_sync_on_step, preds, targets, weights, n_gram, smooth_func, smooth):
+        metric_args = {"n_gram": n_gram, "smooth": smooth}
+
+        nltk_metric = partial(corpus_bleu, weights=weights, smoothing_function=smooth_func)
+
+        self.run_class_metric_test(
+            ddp=ddp,
+            preds=preds,
+            targets=targets,
+            metric_class=BLEUScore,
+            sk_metric=nltk_metric,
+            dist_sync_on_step=dist_sync_on_step,
+            metric_args=metric_args,
+            input_order=INPUT_ORDER.TARGETS_FIRST,
+        )
+
+    def test_bleu_score_functional(self, preds, targets, weights, n_gram, smooth_func, smooth):
+        metric_args = {"n_gram": n_gram, "smooth": smooth}
+        nltk_metric = partial(corpus_bleu, weights=weights, smoothing_function=smooth_func)
+
+        self.run_functional_metric_test(
+            preds,
+            targets,
+            metric_functional=bleu_score,
+            sk_metric=nltk_metric,
+            metric_args=metric_args,
+            input_order=INPUT_ORDER.TARGETS_FIRST,
+        )
+
+    def test_bleu_score_differentiability(self, preds, targets, weights, n_gram, smooth_func, smooth):
+        metric_args = {"n_gram": n_gram, "smooth": smooth}
+
+        self.run_differentiability_test(
+            preds=preds,
+            targets=targets,
+            metric_module=BLEUScore,
+            metric_functional=bleu_score,
+            metric_args=metric_args,
+            input_order=INPUT_ORDER.TARGETS_FIRST,
+        )
+
+
+def test_bleu_empty_functional():
+    hyp = [[]]
+    ref = [[[]]]
+    assert bleu_score(ref, hyp) == tensor(0.0)
+
+
+def test_no_4_gram_functional():
+    hyps = [["My", "full", "pytorch-lightning"]]
+    refs = [[["My", "full", "pytorch-lightning", "test"], ["Completely", "Different"]]]
+    assert bleu_score(refs, hyps) == tensor(0.0)
+
+
+def test_bleu_empty_class():
+    bleu = BLEUScore()
+    hyp = [[]]
+    ref = [[[]]]
+    assert bleu(ref, hyp) == tensor(0.0)
+
+
+def test_no_4_gram_class():
+    bleu = BLEUScore()
+    hyps = [["My", "full", "pytorch-lightning"]]
+    refs = [[["My", "full", "pytorch-lightning", "test"], ["Completely", "Different"]]]
+    assert bleu(refs, hyps) == tensor(0.0)
diff --git a/RE/paddlemetric/src/tests/text/test_rouge.py b/RE/paddlemetric/src/tests/text/test_rouge.py
new file mode 100644
index 00000000..4696dcee
--- /dev/null
+++ b/RE/paddlemetric/src/tests/text/test_rouge.py
@@ -0,0 +1,147 @@
+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from functools import partial
+from typing import List
+
+import pytest
+
+from tests.text.helpers import INPUT_ORDER, TextTester
+from paddlemetrics.functional.text.rouge import rouge_score
+from paddlemetrics.text.rouge import ROUGEScore
+from paddlemetrics.utilities.imports import _NLTK_AVAILABLE, _ROUGE_SCORE_AVAILABLE
+
+if _ROUGE_SCORE_AVAILABLE:
+    from rouge_score.rouge_scorer import RougeScorer
+    from rouge_score.scoring import BootstrapAggregator
+else:
+    RougeScorer, BootstrapAggregator = object, object
+
+ROUGE_KEYS = ("rouge1", "rouge2", "rougeL", "rougeLsum")
+
+SINGLE_SENTENCE_EXAMPLE_PREDS = "The quick brown fox jumps over the lazy dog"
+SINGLE_SENTENCE_EXAMPLE_TARGET = "The quick brown dog jumps on the log."
+
+PREDS = "My name is John"
+TARGETS = "Is your name John"
+
+
+BATCHES_1 = {
+    "preds": [["the cat was under the bed"], ["the cat was found under the bed"]],
+    "targets": [["the cat was found under the bed"], ["the tiny little cat was found under the big funny bed "]],
+}
+
+
+BATCHES_2 = {
+    "preds": [["The quick brown fox jumps over the lazy dog"], ["My name is John"]],
+    "targets": [["The quick brown dog jumps on the log."], ["Is your name John"]],
+}
+
+
+def _compute_rouge_score(preds: List[str], targets: List[str], use_stemmer: bool, rouge_level: str, metric: str):
+    if isinstance(preds, str):
+        preds = [preds]
+    if isinstance(targets, str):
+        targets = [targets]
+    scorer = RougeScorer(ROUGE_KEYS, use_stemmer=use_stemmer)
+    aggregator = BootstrapAggregator()
+    for pred, target in zip(preds, targets):
+        aggregator.add_scores(scorer.score(target, pred))
+    rs_scores = aggregator.aggregate()
+    rs_result = getattr(rs_scores[rouge_level].mid, metric)
+    return rs_result
+
+
+@pytest.mark.skipif(not _NLTK_AVAILABLE, reason="test requires nltk")
+@pytest.mark.parametrize(
+    ["pl_rouge_metric_key", "use_stemmer"],
+    [
+        pytest.param("rouge1_precision", True),
+        pytest.param("rouge1_recall", True),
+        pytest.param("rouge1_fmeasure", False),
+        pytest.param("rouge2_precision", False),
+        pytest.param("rouge2_recall", True),
+        pytest.param("rouge2_fmeasure", True),
+        pytest.param("rougeL_precision", False),
+        pytest.param("rougeL_recall", False),
+        pytest.param("rougeL_fmeasure", True),
+        pytest.param("rougeLsum_precision", True),
+        pytest.param("rougeLsum_recall", False),
+        pytest.param("rougeLsum_fmeasure", False),
+    ],
+)
+@pytest.mark.parametrize(
+    ["preds", "targets"],
+    [
+        pytest.param(BATCHES_1["preds"], BATCHES_1["targets"]),
+        pytest.param(BATCHES_2["preds"], BATCHES_2["targets"]),
+    ],
+)
+class TestROUGEScore(TextTester):
+    @pytest.mark.parametrize("ddp", [False, True])
+    @pytest.mark.parametrize("dist_sync_on_step", [False, True])
+    def test_rouge_score_class(self, ddp, dist_sync_on_step, preds, targets, pl_rouge_metric_key, use_stemmer):
+        metric_args = {"use_stemmer": use_stemmer}
+
+        rouge_level, metric = pl_rouge_metric_key.split("_")
+        rouge_metric = partial(_compute_rouge_score, use_stemmer=use_stemmer, rouge_level=rouge_level, metric=metric)
+
+        self.run_class_metric_test(
+            ddp=ddp,
+            preds=preds,
+            targets=targets,
+            metric_class=ROUGEScore,
+            sk_metric=rouge_metric,
+            dist_sync_on_step=dist_sync_on_step,
+            metric_args=metric_args,
+            input_order=INPUT_ORDER.PREDS_FIRST,
+            key=pl_rouge_metric_key,
+        )
+
+    def test_rouge_score_functional(self, preds, targets, pl_rouge_metric_key, use_stemmer):
+        metric_args = {"use_stemmer": use_stemmer}
+
+        rouge_level, metric = pl_rouge_metric_key.split("_")
+        rouge_metric = partial(_compute_rouge_score, use_stemmer=use_stemmer, rouge_level=rouge_level, metric=metric)
+
+        self.run_functional_metric_test(
+            preds,
+            targets,
+            metric_functional=rouge_score,
+            sk_metric=rouge_metric,
+            metric_args=metric_args,
+            input_order=INPUT_ORDER.PREDS_FIRST,
+            key=pl_rouge_metric_key,
+        )
+
+
+def test_rouge_metric_raises_errors_and_warnings():
+    """Test that expected warnings and errors are raised."""
+    if not _NLTK_AVAILABLE:
+        with pytest.raises(
+            ValueError,
+            match="ROUGE metric requires that nltk is installed."
+            "Either as `pip install paddlemetrics[text]` or `pip install nltk`",
+        ):
+            ROUGEScore()
+
+
+def test_rouge_metric_wrong_key_value_error():
+    key = ("rouge1", "rouge")
+
+    with pytest.raises(ValueError):
+        ROUGEScore(rouge_keys=key)
+
+    with pytest.raises(ValueError):
+        rouge_score(PREDS, TARGETS, rouge_keys=key)
diff --git a/RE/paddlemetric/src/tests/text/test_sacre_bleu.py b/RE/paddlemetric/src/tests/text/test_sacre_bleu.py
new file mode 100644
index 00000000..289a7d70
--- /dev/null
+++ b/RE/paddlemetric/src/tests/text/test_sacre_bleu.py
@@ -0,0 +1,73 @@
+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import pytest
+import paddleext.torchapi as B
+
+from tests.text.helpers import TextTester
+from paddlemetrics.functional.text.sacre_bleu import sacre_bleu_score
+from paddlemetrics.text.sacre_bleu import SacreBLEUScore
+from paddlemetrics.utilities.imports import _SACREBLEU_AVAILABLE
+
+if _SACREBLEU_AVAILABLE:
+    from sacrebleu.metrics import BLEU
+
+# example taken from https://github.com/mjpost/sacrebleu
+REFERENCES = (
+    # First set of references
+    ("The dog bit the man.", "It was not unexpected.", "The man bit him first."),
+    # Second set of references
+    ("The dog had bit the man.", "No one was surprised.", "The man had bitten the dog."),
+)
+
+HYPOTHESES = ("The dog bit the man.", "It wasn't surprising.", "The man had just bitten him.")
+
+TOKENIZERS = ("none", "13a", "zh", "intl", "char")
+
+ROUND_N_DIGITS = 4
+
+
+def metrics_score_fn(targets, preds, tokenize):
+    metrics_score = sacre_bleu_score(targets, preds, tokenize=tokenize)
+    # rescale to 0-100 and round to 4 decimals to match blue
+    metrics_score_normed = B.round(100 * metrics_score * 10 ** ROUND_N_DIGITS) / 10 ** ROUND_N_DIGITS
+    return metrics_score_normed
+
+
+@pytest.mark.parametrize(
+    ["preds", "targets"],
+    [
+        (HYPOTHESES, REFERENCES),
+    ],
+)
+@pytest.mark.parametrize("tokenize", TOKENIZERS)
+@pytest.mark.skipif(not _SACREBLEU_AVAILABLE, reason="test requires sacrebleu")
+class TestSacreBLEUScore(TextTester):
+    def test_sacrebleu_score_functional(self, preds, targets, tokenize):
+        sacrebleu_metrics = BLEU(tokenize=tokenize)
+        original_score = B.tensor(round(sacrebleu_metrics.corpus_score(preds, targets).score, ROUND_N_DIGITS))
+
+        metrics_targets = [[ref[i] for ref in targets] for i in range(len(targets[0]))]
+        metrics_score = metrics_score_fn(metrics_targets, preds, tokenize)
+        assert metrics_score == original_score
+
+    def test_sacrebleu_score_metrics(self, preds, targets, tokenize):
+        sacrebleu_metrics = BLEU(tokenize=tokenize)
+        original_score = B.tensor(round(sacrebleu_metrics.corpus_score(preds, targets).score, ROUND_N_DIGITS))
+
+        metrics_targets = [[ref[i] for ref in targets] for i in range(len(targets[0]))]
+        tm_metrics = SacreBLEUScore(tokenize=tokenize)
+        tm_metrics.update(metrics_targets, preds)
+        metrics_score = metrics_score_fn(metrics_targets, preds, tokenize)
+        assert metrics_score == original_score
diff --git a/RE/paddlemetric/src/tests/text/test_wer.py b/RE/paddlemetric/src/tests/text/test_wer.py
new file mode 100644
index 00000000..65a7ca8b
--- /dev/null
+++ b/RE/paddlemetric/src/tests/text/test_wer.py
@@ -0,0 +1,75 @@
+from typing import Callable, List, Union
+
+import pytest
+
+from tests.text.helpers import INPUT_ORDER, TextTester
+from paddlemetrics.utilities.imports import _JIWER_AVAILABLE
+
+if _JIWER_AVAILABLE:
+    from jiwer import compute_measures
+else:
+    compute_measures = Callable
+
+from paddlemetrics.functional.text.wer import wer
+from paddlemetrics.text.wer import WER
+
+BATCHES_1 = {"preds": [["hello world"], ["what a day"]], "targets": [["hello world"], ["what a wonderful day"]]}
+
+BATCHES_2 = {
+    "preds": [
+        ["i like python", "what you mean or swallow"],
+        ["hello duck", "i like python"],
+    ],
+    "targets": [
+        ["i like monthy python", "what do you mean, african or european swallow"],
+        ["hello world", "i like monthy python"],
+    ],
+}
+
+
+def _compute_wer_metric_jiwer(prediction: Union[str, List[str]], reference: Union[str, List[str]]):
+    return compute_measures(reference, prediction)["wer"]
+
+
+@pytest.mark.skipif(not _JIWER_AVAILABLE, reason="test requires jiwer")
+@pytest.mark.parametrize(
+    ["preds", "targets"],
+    [
+        pytest.param(BATCHES_1["preds"], BATCHES_1["targets"]),
+        pytest.param(BATCHES_2["preds"], BATCHES_2["targets"]),
+    ],
+)
+class TestWER(TextTester):
+    @pytest.mark.parametrize("ddp", [False, True])
+    @pytest.mark.parametrize("dist_sync_on_step", [False, True])
+    def test_wer_class(self, ddp, dist_sync_on_step, preds, targets):
+
+        self.run_class_metric_test(
+            ddp=ddp,
+            preds=preds,
+            targets=targets,
+            metric_class=WER,
+            sk_metric=_compute_wer_metric_jiwer,
+            dist_sync_on_step=dist_sync_on_step,
+            input_order=INPUT_ORDER.PREDS_FIRST,
+        )
+
+    def test_wer_functional(self, preds, targets):
+
+        self.run_functional_metric_test(
+            preds,
+            targets,
+            metric_functional=wer,
+            sk_metric=_compute_wer_metric_jiwer,
+            input_order=INPUT_ORDER.PREDS_FIRST,
+        )
+
+    def test_wer_differentiability(self, preds, targets):
+
+        self.run_differentiability_test(
+            preds=preds,
+            targets=targets,
+            metric_module=WER,
+            metric_functional=wer,
+            input_order=INPUT_ORDER.PREDS_FIRST,
+        )
diff --git a/RE/paddlemetric/src/tests/wrappers/__init__.py b/RE/paddlemetric/src/tests/wrappers/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/RE/paddlemetric/src/tests/wrappers/test_bootstrapping.py b/RE/paddlemetric/src/tests/wrappers/test_bootstrapping.py
new file mode 100644
index 00000000..ec74c4bf
--- /dev/null
+++ b/RE/paddlemetric/src/tests/wrappers/test_bootstrapping.py
@@ -0,0 +1,123 @@
+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import operator
+from functools import partial
+
+import numpy as np
+import pytest
+import paddleext.torchapi as B
+from sklearn.metrics import mean_squared_error, precision_score, recall_score
+from paddleext.torchapi import Tensor
+
+from tests.helpers import seed_all
+from paddlemetrics import MeanSquaredError, Precision, Recall
+from paddlemetrics.utilities import apply_to_collection
+from paddlemetrics.utilities.imports import _TORCH_GREATER_EQUAL_1_7
+from paddlemetrics.wrappers.bootstrapping import BootStrapper, _bootstrap_sampler
+
+seed_all(42)
+
+_preds = B.randint(10, (10, 32))
+_target = B.randint(10, (10, 32))
+
+
+class TestBootStrapper(BootStrapper):
+    """For testing purpose, we subclass the bootstrapper class so we can get the exact permutation the class is
+    creating."""
+
+    def update(self, *args) -> None:
+        self.out = []
+        for idx in range(self.num_bootstraps):
+            size = len(args[0])
+            sample_idx = _bootstrap_sampler(size, sampling_strategy=self.sampling_strategy).to(self.device)
+            new_args = apply_to_collection(args, Tensor, B.index_select, dim=0, index=sample_idx)
+            self.metrics[idx].update(*new_args)
+            self.out.append(new_args)
+
+
+def _sample_checker(old_samples, new_samples, op: operator, threshold: int):
+    found_one = False
+    for os in old_samples:
+        cond = op(os, new_samples)
+        if cond.sum() > threshold:
+            found_one = True
+            break
+    return found_one
+
+
+@pytest.mark.parametrize("sampling_strategy", ["poisson", "multinomial"])
+def test_bootstrap_sampler(sampling_strategy):
+    """make sure that the bootstrap sampler works as intended."""
+    old_samples = B.randn(20, 2)
+
+    # make sure that the new samples are only made up of old samples
+    idx = _bootstrap_sampler(20, sampling_strategy=sampling_strategy)
+    new_samples = old_samples[idx]
+    for ns in new_samples:
+        assert ns in old_samples
+
+    found_one = _sample_checker(old_samples, new_samples, operator.eq, 2)
+    assert found_one, "resampling did not work because no samples were sampled twice"
+
+    found_zero = _sample_checker(old_samples, new_samples, operator.ne, 0)
+    assert found_zero, "resampling did not work because all samples were atleast sampled once"
+
+
+@pytest.mark.parametrize("device", ["cpu", "cuda"])
+@pytest.mark.parametrize("sampling_strategy", ["poisson", "multinomial"])
+@pytest.mark.parametrize(
+    "metric, sk_metric",
+    [
+        [Precision(average="micro"), partial(precision_score, average="micro")],
+        [Recall(average="micro"), partial(recall_score, average="micro")],
+        [MeanSquaredError(), mean_squared_error],
+    ],
+)
+def test_bootstrap(device, sampling_strategy, metric, sk_metric):
+    """Test that the different bootstraps gets updated as we expected and that the compute method works."""
+    if device == "cuda" and not B.cuda.is_available():
+        pytest.skip("Test with device='cuda' requires gpu")
+
+    _kwargs = {"base_metric": metric, "mean": True, "std": True, "raw": True, "sampling_strategy": sampling_strategy}
+    if _TORCH_GREATER_EQUAL_1_7:
+        _kwargs.update(dict(quantile=B.tensor([0.05, 0.95], device=device)))
+
+    bootstrapper = TestBootStrapper(**_kwargs)
+    bootstrapper.to(device)
+
+    collected_preds = [[] for _ in range(10)]
+    collected_target = [[] for _ in range(10)]
+    for p, t in zip(_preds, _target):
+        p, t = p.to(device), t.to(device)
+        bootstrapper.update(p, t)
+
+        for i, o in enumerate(bootstrapper.out):
+
+            collected_preds[i].append(o[0])
+            collected_target[i].append(o[1])
+
+    collected_preds = [B.cat(cp).cpu() for cp in collected_preds]
+    collected_target = [B.cat(ct).cpu() for ct in collected_target]
+
+    sk_scores = [sk_metric(ct, cp) for ct, cp in zip(collected_target, collected_preds)]
+
+    output = bootstrapper.compute()
+    # quantile only avaible for pytorch v1.7 and forward
+    if _TORCH_GREATER_EQUAL_1_7:
+        assert np.allclose(output["quantile"][0].cpu(), np.quantile(sk_scores, 0.05))
+        assert np.allclose(output["quantile"][1].cpu(), np.quantile(sk_scores, 0.95))
+
+    assert np.allclose(output["mean"].cpu(), np.mean(sk_scores))
+    assert np.allclose(output["std"].cpu(), np.std(sk_scores, ddof=1))
+    assert np.allclose(output["raw"].cpu(), sk_scores)
diff --git a/RE/paddlemetric/src/tests/wrappers/test_multioutput.py b/RE/paddlemetric/src/tests/wrappers/test_multioutput.py
new file mode 100644
index 00000000..421dd722
--- /dev/null
+++ b/RE/paddlemetric/src/tests/wrappers/test_multioutput.py
@@ -0,0 +1,142 @@
+from collections import namedtuple
+from functools import partial
+from typing import Any, Callable, Optional
+
+import pytest
+import paddleext.torchapi as B
+from sklearn.metrics import accuracy_score
+from sklearn.metrics import r2_score as sk_r2score
+
+from tests.helpers import seed_all
+from tests.helpers.testers import BATCH_SIZE, NUM_BATCHES, NUM_CLASSES, MetricTester
+from paddlemetrics import Metric
+from paddlemetrics.classification import Accuracy
+from paddlemetrics.regression import R2Score
+from paddlemetrics.wrappers.multioutput import MultioutputWrapper
+
+seed_all(42)
+
+
+class _MultioutputMetric(Metric):
+    """Test class that allows passing base metric as a class rather than its instantiation to the wrapper."""
+
+    def __init__(
+        self,
+        base_metric_class,
+        num_outputs: int = 1,
+        compute_on_step: bool = True,
+        dist_sync_on_step: bool = False,
+        process_group: Any = None,
+        dist_sync_fn: Optional[Callable] = None,
+        **base_metric_kwargs,
+    ) -> None:
+        super().__init__(
+            compute_on_step=compute_on_step,
+            dist_sync_on_step=dist_sync_on_step,
+            process_group=process_group,
+            dist_sync_fn=dist_sync_fn,
+        )
+        self.metric = MultioutputWrapper(
+            base_metric_class(
+                compute_on_step=compute_on_step,
+                dist_sync_on_step=dist_sync_on_step,
+                process_group=process_group,
+                dist_sync_fn=dist_sync_fn,
+                **base_metric_kwargs,
+            ),
+            num_outputs=num_outputs,
+            compute_on_step=compute_on_step,
+            dist_sync_on_step=dist_sync_on_step,
+            dist_sync_fn=dist_sync_fn,
+        )
+
+    def update(self, preds: B.Tensor, target: B.Tensor) -> None:
+        """Update the each pair of outputs and predictions."""
+        return self.metric.update(preds, target)
+
+    def compute(self) -> B.Tensor:
+        """Compute the R2 score between each pair of outputs and predictions."""
+        return self.metric.compute()
+
+    @B.jit.unused
+    def forward(self, *args, **kwargs):
+        """Run forward on the underlying metric."""
+        return self.metric(*args, **kwargs)
+
+    def reset(self) -> None:
+        """Reset the underlying metric state."""
+        self.metric.reset()
+
+
+num_targets = 2
+
+Input = namedtuple("Input", ["preds", "target"])
+
+_multi_target_regression_inputs = Input(
+    preds=B.rand(NUM_BATCHES, BATCH_SIZE, num_targets),
+    target=B.rand(NUM_BATCHES, BATCH_SIZE, num_targets),
+)
+_multi_target_classification_inputs = Input(
+    preds=B.rand(NUM_BATCHES, BATCH_SIZE, NUM_CLASSES, num_targets),
+    target=B.randint(NUM_CLASSES, (NUM_BATCHES, BATCH_SIZE, num_targets)),
+)
+
+
+def _multi_target_sk_r2score(preds, target, adjusted=0, multioutput="raw_values"):
+    """Compute R2 score over multiple outputs."""
+    sk_preds = preds.view(-1, num_targets).numpy()
+    sk_target = target.view(-1, num_targets).numpy()
+    r2_score = sk_r2score(sk_target, sk_preds, multioutput=multioutput)
+    if adjusted != 0:
+        r2_score = 1 - (1 - r2_score) * (sk_preds.shape[0] - 1) / (sk_preds.shape[0] - adjusted - 1)
+    return r2_score
+
+
+def _multi_target_sk_accuracy(preds, target, num_outputs):
+    """Compute accuracy over multiple outputs."""
+    accs = []
+    for i in range(num_outputs):
+        accs.append(accuracy_score(B.argmax(preds[:, :, i], dim=1), target[:, i]))
+    return accs
+
+
+@pytest.mark.parametrize(
+    "base_metric_class, compare_metric, preds, target, num_outputs, metric_kwargs",
+    [
+        (
+            R2Score,
+            _multi_target_sk_r2score,
+            _multi_target_regression_inputs.preds,
+            _multi_target_regression_inputs.target,
+            num_targets,
+            {},
+        ),
+        (
+            Accuracy,
+            partial(_multi_target_sk_accuracy, num_outputs=2),
+            _multi_target_classification_inputs.preds,
+            _multi_target_classification_inputs.target,
+            num_targets,
+            dict(num_classes=NUM_CLASSES),
+        ),
+    ],
+)
+class TestMultioutputWrapper(MetricTester):
+    """Test the MultioutputWrapper class with regression and classification inner metrics."""
+
+    @pytest.mark.parametrize("ddp", [True, False])
+    @pytest.mark.parametrize("dist_sync_on_step", [True, False])
+    def test_multioutput_wrapper(
+        self, base_metric_class, compare_metric, preds, target, num_outputs, metric_kwargs, ddp, dist_sync_on_step
+    ):
+        """Test that the multioutput wrapper properly slices and computes outputs along the output dimension for
+        both classification and regression metrics."""
+        self.run_class_metric_test(
+            ddp,
+            preds,
+            target,
+            _MultioutputMetric,
+            compare_metric,
+            dist_sync_on_step,
+            metric_args=dict(num_outputs=num_outputs, base_metric_class=base_metric_class, **metric_kwargs),
+        )
diff --git a/RE/paddlemetric/src/tests/wrappers/test_tracker.py b/RE/paddlemetric/src/tests/wrappers/test_tracker.py
new file mode 100644
index 00000000..07a94eea
--- /dev/null
+++ b/RE/paddlemetric/src/tests/wrappers/test_tracker.py
@@ -0,0 +1,76 @@
+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from functools import partial
+
+import pytest
+import paddleext.torchapi as B
+
+from tests.helpers import seed_all
+from paddlemetrics import Accuracy, MeanAbsoluteError, MeanSquaredError, Precision, Recall
+from paddlemetrics.wrappers import MetricTracker
+
+seed_all(42)
+
+
+def test_raises_error_on_wrong_input():
+    with pytest.raises(TypeError, match="metric arg need to be an instance of a paddlemetrics metric .*"):
+        MetricTracker([1, 2, 3])
+
+
+@pytest.mark.parametrize(
+    "method, method_input",
+    [
+        ("update", (B.randint(10, (50,)), B.randint(10, (50,)))),
+        ("forward", (B.randint(10, (50,)), B.randint(10, (50,)))),
+        ("compute", None),
+    ],
+)
+def test_raises_error_if_increment_not_called(method, method_input):
+    tracker = MetricTracker(Accuracy(num_classes=10))
+    with pytest.raises(ValueError, match=f"`{method}` cannot be called before .*"):
+        if method_input is not None:
+            getattr(tracker, method)(*method_input)
+        else:
+            getattr(tracker, method)()
+
+
+@pytest.mark.parametrize(
+    "base_metric, metric_input, maximize",
+    [
+        (partial(Accuracy, num_classes=10), (B.randint(10, (50,)), B.randint(10, (50,))), True),
+        (partial(Precision, num_classes=10), (B.randint(10, (50,)), B.randint(10, (50,))), True),
+        (partial(Recall, num_classes=10), (B.randint(10, (50,)), B.randint(10, (50,))), True),
+        (MeanSquaredError, (B.randn(50), B.randn(50)), False),
+        (MeanAbsoluteError, (B.randn(50), B.randn(50)), False),
+    ],
+)
+def test_tracker(base_metric, metric_input, maximize):
+    tracker = MetricTracker(base_metric(), maximize=maximize)
+    for i in range(5):
+        tracker.increment()
+        # check both update and forward works
+        for _ in range(5):
+            tracker.update(*metric_input)
+        for _ in range(5):
+            tracker(*metric_input)
+
+        val = tracker.compute()
+        assert val != 0.0
+        assert tracker.n_steps == i + 1
+
+    assert tracker.n_steps == 5
+    assert tracker.compute_all().shape[0] == 5
+    val, idx = tracker.best_metric(return_step=True)
+    assert val != 0.0
+    assert idx in list(range(5))
diff --git a/README.md b/README.md
index b2b936ee..30fcc4c1 100644
--- a/README.md
+++ b/README.md
@@ -7,6 +7,7 @@
 * [自然语言处理(Natrual Language Processing)](#自然语言处理)
 * [知识图谱(Knowledge Graph)](#知识图谱)
 * [时空数据挖掘(Spatial-Temporal Data-Mining)](#时空数据挖掘)
+* [研发效率(Research Efficiency)](#研发效率)
 
 ## 计算机视觉
 | 任务类型     | 目录                                                         | 简介                                                         | 论文链接 |
@@ -68,6 +69,11 @@
 | 兴趣点生成 |[P3AC](ST_DM/KDD2020-P3AC)| 具备个性化的前缀嵌入的POI自动生成。 | - |
 | 区域生成 |[P3AC](ST_DM/GenRegion)| 基于路网进行区域划分的方法, 实现对特定区域基于路网的全划分，区域之间无交叠，无空隙，算法支持对全球的区域划分。| - |
 
+## 研发效率 
+| 软件名称       | 目录                     | 简介                                              |
+|--------------|------------------------|-------------------------------------------------|
+| paddleext    | [paddleext](RE/paddleext)     | paddle的扩展功能插件，可以让部分pytorch code 无缝运行在paddle平台上。 | 
+| paddlemetric | [paddlemetric](RE/paddlemetric) | torchmetric 的 paddle迁移版本，目前支持分类测度。              |
 
 ## 许可证书
 此向导由[PaddlePaddle](https://github.com/PaddlePaddle/Paddle)贡献，受[Apache-2.0 license](LICENSE)许可认证。