From a9b812b55ca265b8f26fee6c583b80129849368d Mon Sep 17 00:00:00 2001
From: Zachary Coleman <42484306+zachcoleman@users.noreply.github.com>
Date: Tue, 31 May 2022 21:53:57 -0500
Subject: [PATCH] Additional Helper Functions (#15)

* updating project metadata

* fixing CI yaml

* using venv

* trying again

* redoing CI

* fixing tests

* changing some settings

* updates

* fixing build

* trying to fix this

* fixing release

* bumping version

* better code organization

* updates

* adding initial cm impl

* adding unique, dispatching pattern to Py objects, renaming ext

* rustfmt

* cm dispatched

* rustfmt

* tests and benchmarks added

* bump version

* 100% test coverage

* updating readme

* Threading enabled (#9)

* bumping version

* major refactor leveraging macros

* bumping version and updating test

* adding executed notebook

* fixing performance w/ bool

* multiclass implemented and tested ready for 1.0.0

* shifting to u32 for numpy compatability

* bumping version

* changing to i64 for better compatability

* Major additions to Python API to include high-level helpers
---
 Cargo.toml                                    |   2 +-
 README.md                                     |   2 +-
 .../timeit.ipynb => examples/benchmarks.ipynb |   0
 examples/stats.ipynb                          | 144 ++++++++++++++++
 .../threading.ipynb                           |   0
 fast_stats/__init__.py                        |  10 +-
 fast_stats/binary.py                          |  73 +++++++-
 fast_stats/multiclass.py                      |  96 ++++++++++-
 pyproject.toml                                |   3 +-
 tests/test_binary.py                          |  79 +++++++++
 tests/test_multiclass.py                      | 160 ++++++++++++++++++
 11 files changed, 562 insertions(+), 7 deletions(-)
 rename benchmarks/timeit.ipynb => examples/benchmarks.ipynb (100%)
 create mode 100644 examples/stats.ipynb
 rename benchmarks/threading_example.ipynb => examples/threading.ipynb (100%)

diff --git a/Cargo.toml b/Cargo.toml
index 3a8e7f9..8d33e11 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "fast-stats"
-version = "1.0.2"
+version = "1.1.0"
 edition = "2021"
 
 [lib]
diff --git a/README.md b/README.md
index 7ab19b6..f54994a 100644
--- a/README.md
+++ b/README.md
@@ -4,7 +4,7 @@
 [![License](https://img.shields.io/badge/license-Apache2.0-green)](./LICENSE)
 
 # fast-stats
-`fast-stats` is a fast and simple library for calculating basic statistics such as: precision, recall, and f1-score. The library also supports the calculation of confusion matrices. For examples, please look at the `benchmarks/` folder.
+`fast-stats` is a fast and simple library for calculating basic statistics such as: precision, recall, and f1-score. The library also supports the calculation of confusion matrices. For examples, please look at the `examples/` folder.
 
 The project was developed using the [maturin](https://maturin.rs) framework.
 
diff --git a/benchmarks/timeit.ipynb b/examples/benchmarks.ipynb
similarity index 100%
rename from benchmarks/timeit.ipynb
rename to examples/benchmarks.ipynb
diff --git a/examples/stats.ipynb b/examples/stats.ipynb
new file mode 100644
index 0000000..3f0ea7b
--- /dev/null
+++ b/examples/stats.ipynb
@@ -0,0 +1,144 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Imports"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import numpy as np\n",
+    "import fast_stats"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Settings"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "SIZE = (10, 512, 512)\n",
+    "NUM_CATS = 8"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Binary Statistics"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "y_true = np.random.randint(0, 2, SIZE).astype(bool)\n",
+    "y_pred = np.random.randint(0, 2, SIZE).astype(bool)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "{'precision': 0.49939381724124243,\n",
+       " 'recall': 0.4994250828781588,\n",
+       " 'f1-score': 0.4994094495703526}"
+      ]
+     },
+     "execution_count": 4,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "fast_stats.binary_stats(y_true, y_pred)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Multi-class Statistics"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "y_true = np.random.randint(0, NUM_CATS, SIZE)\n",
+    "y_pred = np.random.randint(0, NUM_CATS, SIZE)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "{'precision': array([0.1256168 , 0.12500038, 0.12486642, 0.1248673 , 0.12500914,\n",
+       "        0.12636344, 0.12454387, 0.12488666]),\n",
+       " 'recall': array([0.12568051, 0.12524121, 0.1245743 , 0.12500458, 0.12535152,\n",
+       "        0.12616009, 0.12444887, 0.12469365]),\n",
+       " 'f1-score': array([0.12564865, 0.12512068, 0.12472019, 0.1249359 , 0.1251801 ,\n",
+       "        0.12626169, 0.12449635, 0.12479008])}"
+      ]
+     },
+     "execution_count": 6,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "fast_stats.stats(y_true, y_pred)"
+   ]
+  }
+ ],
+ "metadata": {
+  "interpreter": {
+   "hash": "a3a671d63c09fb4878d313d605bf6366336b9695c04e11736a5d015abf9b1e42"
+  },
+  "kernelspec": {
+   "display_name": "Python 3.9.11 ('.venv39': venv)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.9.11"
+  },
+  "orig_nbformat": 4
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/benchmarks/threading_example.ipynb b/examples/threading.ipynb
similarity index 100%
rename from benchmarks/threading_example.ipynb
rename to examples/threading.ipynb
diff --git a/fast_stats/__init__.py b/fast_stats/__init__.py
index 3c6e8be..87f86b2 100644
--- a/fast_stats/__init__.py
+++ b/fast_stats/__init__.py
@@ -1,3 +1,9 @@
-from .binary import binary_f1_score, binary_precision, binary_recall
+from .binary import (
+    binary_f1_score,
+    binary_precision,
+    binary_recall,
+    binary_stats,
+    binary_tp_fp_fn,
+)
 from .confusion_matrix import confusion_matrix
-from .multiclass import f1_score, precision, recall
+from .multiclass import f1_score, precision, recall, stats
diff --git a/fast_stats/binary.py b/fast_stats/binary.py
index cf3eecf..9b79e90 100644
--- a/fast_stats/binary.py
+++ b/fast_stats/binary.py
@@ -1,5 +1,5 @@
 from enum import Enum
-from typing import Union
+from typing import Dict, Tuple, Union
 
 import numpy as np
 
@@ -128,3 +128,74 @@ def binary_f1_score(
             return 0.0
 
     return 2 * p * r / (p + r)
+
+
+def binary_tp_fp_fn(
+    y_true: np.ndarray,
+    y_pred: np.ndarray,
+) -> Tuple[float]:
+    """Binary calculations for TP, FP, and FN
+
+    Args:
+        y_true (np.ndarray): array of true values (must be bool or int types)
+        y_pred (np.ndarray): array of pred values (must be bool or int types)
+    Returns:
+        Tuple[int]: counts for TP, FP, and FN
+    """
+    assert y_true.shape == y_pred.shape, "y_true and y_pred must be same shape"
+    assert all(
+        [
+            isinstance(y_pred, np.ndarray),
+            isinstance(y_true, np.ndarray),
+        ]
+    ), "y_true and y_pred must be numpy arrays"
+
+    tp, tp_fp, tp_fn = _binary_f1_score_reqs(y_true, y_pred)
+    fp, fn = tp_fp - tp, tp_fn - tp
+    return tp, fp, fn
+
+
+def binary_stats(
+    y_true: np.ndarray,
+    y_pred: np.ndarray,
+    zero_division: ZeroDivision = ZeroDivision.NONE,
+) -> Dict[str, Result]:
+    """Binary calculations for precision, recall and f1-score
+
+    Args:
+        y_true (np.ndarray): array of true values (must be bool or int types)
+        y_pred (np.ndarray): array of pred values (must be bool or int types)
+    Returns:
+        Dict[str, Result]: stats for precision, recall and f1-score
+    """
+    assert y_true.shape == y_pred.shape, "y_true and y_pred must be same shape"
+    assert all(
+        [
+            isinstance(y_pred, np.ndarray),
+            isinstance(y_true, np.ndarray),
+        ]
+    ), "y_true and y_pred must be numpy arrays"
+    zero_division = ZeroDivision(zero_division)
+
+    tp, tp_fp, tp_fn = _binary_f1_score_reqs(y_true, y_pred)
+    p, r = _precision(tp, tp_fp, zero_division), _recall(tp, tp_fn, zero_division)
+    stats = dict({"precision": p, "recall": r})
+
+    # convert p and/or r to 0 if None
+    if p is None:
+        p = 0.0
+    if r is None:
+        r = 0.0
+
+    # handle 0 cases
+    if p + r == 0:
+        if zero_division == ZeroDivision.NONE:
+            f1 = None
+        elif zero_division == ZeroDivision.ZERO:
+            f1 = 0.0
+    else:
+        f1 = 2 * p * r / (p + r)
+
+    stats.update({"f1-score": f1})
+
+    return stats
diff --git a/fast_stats/multiclass.py b/fast_stats/multiclass.py
index 8984831..122d5f3 100644
--- a/fast_stats/multiclass.py
+++ b/fast_stats/multiclass.py
@@ -1,6 +1,6 @@
 from enum import Enum
 from functools import partial
-from typing import List, Union
+from typing import Dict, List, Union
 
 import numpy as np
 
@@ -190,3 +190,97 @@ def f1_from_ext(x, y, z):
             return zero_handle(f1_from_ext(x[:, 0].sum(), x[:, 1].sum(), x[:, 2].sum()))
         elif average == AverageType.MACRO:
             return np.nanmean(f1_from_ext(x[:, 0], x[:, 1], x[:, 2]))
+
+
+def stats(
+    y_true: np.ndarray,
+    y_pred: np.ndarray,
+    labels: Union[List, np.ndarray] = None,
+    zero_division: ZeroDivision = ZeroDivision.NONE,
+    average: AverageType = AverageType.NONE,
+) -> Dict[str, Result]:
+    """Multi-class calculation of f1 score
+
+    Args:
+        y_true (np.ndarray): array of true values (must be bool or int types)
+        y_pred (np.ndarray): array of pred values (must be bool or int types)
+        labels (optional | list or np.ndarray):
+            labels to calculate confusion matrix for (must be bool or int types)
+        zero_division (optional | str): strategy to handle division by 0
+        average (optional | str): strategy for averaging across classes
+    Returns:
+        Dict[str, Result]: dictionary of strings to 1D array or scalar values
+            depending on averaging
+    """
+    assert y_true.shape == y_pred.shape, "y_true and y_pred must be same shape"
+    assert all(
+        [
+            isinstance(y_pred, np.ndarray),
+            isinstance(y_true, np.ndarray),
+        ]
+    ), "y_true and y_pred must be numpy arrays"
+    zero_division = ZeroDivision(zero_division)
+    average = AverageType(average)
+
+    if labels is None:
+        labels = np.array(
+            sorted(list(_unique(np.concatenate([y_true, y_pred])))), dtype=y_true.dtype
+        )
+    elif isinstance(labels, list):
+        labels = np.array(labels, dtype=y_true.dtype)
+
+    x = _f1_score(y_true, y_pred, labels)
+
+    if zero_division == ZeroDivision.NONE:
+        zero_handle = partial(
+            np.nan_to_num, copy=False, nan=np.nan, posinf=np.nan, neginf=np.nan
+        )
+    elif zero_division == zero_division.ZERO:
+        zero_handle = partial(
+            np.nan_to_num, copy=False, nan=0.0, posinf=0.0, neginf=0.0
+        )
+
+    def f1_from_ext(x, y, z):
+        p, r = x / y, x / z
+        return 2 * p * r / (p + r)
+
+    stats = dict()
+
+    # precision
+    with np.errstate(divide="ignore", invalid="ignore"):
+        if average == AverageType.NONE:
+            stats.update({"precision": zero_handle(x[:, 0] / x[:, 1])})
+        elif average == AverageType.MICRO:
+            stats.update({"precision": zero_handle(x[:, 0].sum() / x[:, 1].sum())})
+        elif average == AverageType.MACRO:
+            stats.update({"precision": np.nanmean(zero_handle(x[:, 0] / x[:, 1]))})
+
+    # recall
+    with np.errstate(divide="ignore", invalid="ignore"):
+        if average == AverageType.NONE:
+            stats.update({"recall": zero_handle(x[:, 0] / x[:, 2])})
+        elif average == AverageType.MICRO:
+            stats.update({"recall": zero_handle(x[:, 0].sum() / x[:, 2].sum())})
+        elif average == AverageType.MACRO:
+            stats.update({"recall": np.nanmean(zero_handle(x[:, 0] / x[:, 2]))})
+
+    # f1-score
+    with np.errstate(divide="ignore", invalid="ignore"):
+        if average == AverageType.NONE:
+            stats.update(
+                {"f1-score": zero_handle(f1_from_ext(x[:, 0], x[:, 1], x[:, 2]))}
+            )
+        elif average == AverageType.MICRO:
+            stats.update(
+                {
+                    "f1-score": zero_handle(
+                        f1_from_ext(x[:, 0].sum(), x[:, 1].sum(), x[:, 2].sum())
+                    )
+                }
+            )
+        elif average == AverageType.MACRO:
+            stats.update(
+                {"f1-score": np.nanmean(f1_from_ext(x[:, 0], x[:, 1], x[:, 2]))}
+            )
+
+    return stats
diff --git a/pyproject.toml b/pyproject.toml
index 8755a95..4d95cb1 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "fast-stats"
-version = "1.0.2"
+version = "1.1.0"
 description = "A fast and simple library for calculating basic statistics"
 readme = "README.md"
 license = {text="Apache 2.0"}
@@ -29,6 +29,7 @@ repository = "https://github.com/zachcoleman/fast-stats"
 
 [project.optional-dependencies]
 test = [
+  "dictdiffer",
   "pytest",
   "pytest-cov[all]"
 ]
diff --git a/tests/test_binary.py b/tests/test_binary.py
index 8523cf8..0633c5b 100644
--- a/tests/test_binary.py
+++ b/tests/test_binary.py
@@ -1,5 +1,6 @@
 import numpy as np
 import pytest
+from dictdiffer import diff
 
 import fast_stats
 
@@ -148,3 +149,81 @@ def test_f1(y_true, y_pred, zero_division, expected):
         assert np.allclose(
             fast_stats.binary_f1_score(y_true, y_pred, zero_division), expected
         )
+
+
+@pytest.mark.parametrize(
+    "y_true,y_pred,expected",
+    [
+        (
+            np.ones(4, dtype=np.uint64),
+            np.zeros(4, dtype=np.uint64),
+            (0, 0, 4),
+        ),  # all FN
+        (np.ones(4, dtype=np.uint64), np.ones(4, dtype=np.uint64), (4, 0, 0)),  # all TP
+        (
+            np.zeros(4, dtype=np.uint64),
+            np.zeros(4, dtype=np.uint64),
+            (0, 0, 0),
+        ),  # No TP & No FP, & No FN
+        (
+            np.ones(4, dtype=np.uint64),
+            np.array([1, 0, 0, 0], dtype=np.uint64),
+            (1, 0, 3),
+        ),  # 1 TP & 3 FN
+        (
+            np.array([1, 1, 0, 0], dtype=np.uint64),
+            np.array([0, 1, 1, 0], dtype=np.uint64),
+            (1, 1, 1),
+        ),  # 1 TP, 1 FP, & 1 FN
+    ],
+)
+def test_tpfpfn(y_true, y_pred, expected):
+    assert np.allclose(fast_stats.binary_tp_fp_fn(y_true, y_pred), expected)
+
+
+@pytest.mark.parametrize(
+    "y_true,y_pred,zero_division,expected",
+    [
+        (
+            np.ones(4, dtype=np.uint64),
+            np.zeros(4, dtype=np.uint64),
+            "zero",
+            {"precision": 0.0, "recall": 0.0, "f1-score": 0.0},
+        ),  # all FN
+        (
+            np.ones(4, dtype=np.uint64),
+            np.ones(4, dtype=np.uint64),
+            "zero",
+            {"precision": 1.0, "recall": 1.0, "f1-score": 1.0},
+        ),  # all TP
+        (
+            np.zeros(4, dtype=np.uint64),
+            np.zeros(4, dtype=np.uint64),
+            "none",
+            {"precision": None, "recall": None, "f1-score": None},
+        ),  # No TP & No FP, & No FN
+        (
+            np.ones(4, dtype=np.uint64),
+            np.array([1, 0, 0, 0], dtype=np.uint64),
+            "none",
+            {
+                "precision": 1.0,
+                "recall": 0.25,
+                "f1-score": 2 * (1 / 4 * 1.0) / (1 / 4 + 1.0),
+            },
+        ),  # 1 TP & 3 FN
+        (
+            np.array([1, 1, 0, 0], dtype=np.uint64),
+            np.array([0, 1, 1, 0], dtype=np.uint64),
+            "none",
+            {"precision": 0.5, "recall": 0.5, "f1-score": 0.5},
+        ),
+    ],
+)
+def test_stats(y_true, y_pred, zero_division, expected):
+    assert (
+        len(
+            list(diff(fast_stats.binary_stats(y_true, y_pred, zero_division), expected))
+        )
+        == 0
+    )
diff --git a/tests/test_multiclass.py b/tests/test_multiclass.py
index 2c69efc..df63efd 100644
--- a/tests/test_multiclass.py
+++ b/tests/test_multiclass.py
@@ -1,5 +1,6 @@
 import numpy as np
 import pytest
+from dictdiffer import diff
 
 import fast_stats
 
@@ -419,3 +420,162 @@ def test_recall(y_true, y_pred, kwargs, expected):
 )
 def test_f1_score(y_true, y_pred, kwargs, expected):
     assert np.allclose(fast_stats.f1_score(y_true, y_pred, **kwargs), expected)
+
+
+@pytest.mark.parametrize(
+    "y_true,y_pred,kwargs,expected",
+    [
+        (
+            np.array(
+                [
+                    1.0,
+                    2.0,
+                    3.0,
+                    1.0,
+                    2.0,
+                    3.0,
+                ],
+                dtype=np.uint64,
+            ),
+            np.array(
+                [
+                    1.0,
+                    2.0,
+                    3.0,
+                    1.0,
+                    2.0,
+                    3.0,
+                ],
+                dtype=np.uint64,
+            ),
+            {},
+            {
+                "precision": np.array([1.0, 1.0, 1.0]),
+                "recall": np.array([1.0, 1.0, 1.0]),
+                "f1-score": np.array([1.0, 1.0, 1.0]),
+            },
+        ),  # perfect
+        (
+            np.array(
+                [
+                    1.0,
+                    2.0,
+                    3.0,
+                    1.0,
+                    2.0,
+                    3.0,
+                ],
+                dtype=np.uint64,
+            ),
+            np.array(
+                [
+                    1.0,
+                    2.0,
+                    3.0,
+                    2.0,
+                    3.0,
+                    1.0,
+                ],
+                dtype=np.uint64,
+            ),
+            {"labels": [1, 2, 3]},
+            {
+                "precision": np.array([0.5, 0.5, 0.5]),
+                "recall": np.array([0.5, 0.5, 0.5]),
+                "f1-score": np.array([0.5, 0.5, 0.5]),
+            },
+        ),  # 50%
+        (
+            np.array(
+                [
+                    1.0,
+                    2.0,
+                    3.0,
+                    1.0,
+                    2.0,
+                    3.0,
+                ],
+                dtype=np.uint64,
+            ),
+            np.array(
+                [
+                    1.0,
+                    2.0,
+                    3.0,
+                    2.0,
+                    1.0,
+                    3.0,
+                ],
+                dtype=np.uint64,
+            ),
+            {"labels": [1, 2]},
+            {
+                "precision": np.array([0.5, 0.5]),
+                "recall": np.array([0.5, 0.5]),
+                "f1-score": np.array([0.5, 0.5]),
+            },
+        ),  # 50% subset
+        (
+            np.array(
+                [
+                    1.0,
+                    2.0,
+                    3.0,
+                    1.0,
+                    2.0,
+                    3.0,
+                ],
+                dtype=np.uint64,
+            ),
+            np.array(
+                [
+                    1.0,
+                    2.0,
+                    3.0,
+                    2.0,
+                    3.0,
+                    1.0,
+                ],
+                dtype=np.uint64,
+            ),
+            {"average": "micro", "zero_division": "none"},
+            {
+                "precision": 0.5,
+                "recall": 0.5,
+                "f1-score": 0.5,
+            },
+        ),  # 50% micro
+        (
+            np.array(
+                [
+                    1.0,
+                    2.0,
+                    3.0,
+                    1.0,
+                    2.0,
+                    3.0,
+                ],
+                dtype=np.uint64,
+            ),
+            np.array(
+                [
+                    1.0,
+                    2.0,
+                    3.0,
+                    2.0,
+                    3.0,
+                    1.0,
+                ],
+                dtype=np.uint64,
+            ),
+            {"average": "macro", "zero_division": "zero"},
+            {
+                "precision": 0.5,
+                "recall": 0.5,
+                "f1-score": 0.5,
+            },
+        ),  # 50% macro
+    ],
+)
+def test_stats(y_true, y_pred, kwargs, expected):
+    assert len(list(diff(fast_stats.stats(y_true, y_pred, **kwargs), expected))) == 0