closes #2927 power divergence statistic (#2932)

* closes #2927 power divergence statistic * add scipy to requirements * add arkouda/akstats/_stats_py.pyi * Fix F403 and F401 error codes on flake8 arkouda from arkouda/akmath/__init__.py and arkouda/akstats/__init__.py * un-pin scipy from specific version * add scipy license and minor changes in response to code review * Update tests/akmath/akmath_test.py --------- Co-authored-by: Amanda Potts <ajpotts@users.noreply.github.com> Co-authored-by: pierce <48131946+pierce314159@users.noreply.github.com>
Bears-R-Us · Feb 2, 2024 · 3613d76 · 3613d76
1 parent ab0bb3f
commit 3613d76
Show file tree

Hide file tree

Showing 17 changed files with 529 additions and 4 deletions.
diff --git a/PROTO_tests/tests/akmath/akmath_tests.py b/PROTO_tests/tests/akmath/akmath_tests.py
@@ -0,0 +1,28 @@
+import math
+
+import numpy as np
+
+import arkouda as ak
+from arkouda.akmath import xlogy
+from arkouda.pdarrayclass import pdarray
+
+
+class TestStats:
+    def test_xlogy(self):
+        from scipy.special import xlogy as scipy_xlogy
+
+        ys = [ak.array([1, 2, 3]), ak.array([10, 100, 100]), ak.array([-1, 0, np.nan])]
+        xs = [3, 5, np.float64(6), ak.array([1.0, 2.0, 4.5])]
+
+        for y in ys:
+            for x in xs:
+                ak_result = xlogy(x, y)
+
+                np_y = y.to_ndarray()
+                np_x = x
+                if isinstance(np_x, pdarray):
+                    np_x = np_x.to_ndarray()
+
+                scipy_result = scipy_xlogy(np_x, np_y)
+
+                assert np.allclose(ak_result.to_ndarray(), scipy_result, equal_nan=True)
diff --git a/PROTO_tests/tests/akstats/akstats_test.py b/PROTO_tests/tests/akstats/akstats_test.py
@@ -0,0 +1,73 @@
+import math
+
+import numpy as np
+from scipy.stats import power_divergence as scipy_power_divergence
+
+import arkouda as ak
+from arkouda.akstats import power_divergence as ak_power_divergence
+
+
+class TestStats:
+    @staticmethod
+    def create_stat_test_pairs():
+        pairs = [
+            (
+                ak.array([10000000, 20000000, 30000000, 40000000, 50000000, 60000000, 70000000]),
+                ak.array([10000000, 20000000, 30000000, 40000001, 50000000, 60000000, 70000000]),
+            ),
+            (ak.array([10000000, 20000000, 30000000, 40000000, 50000000, 60000000, 70000000]), None),
+            (ak.array([44, 24, 29, 3]) / 100 * 189, ak.array([43, 52, 54, 40])),
+        ]
+        return pairs
+
+    def test_power_divergence(self):
+        pairs = self.create_stat_test_pairs()
+
+        lambdas = [
+            "pearson",
+            "log-likelihood",
+            "freeman-tukey",
+            "mod-log-likelihood",
+            "neyman",
+            "cressie-read",
+        ]
+
+        ddofs = [0, 1, 2, 3, 4, 5]
+
+        for f_obs, f_exp in pairs:
+            for lambda0 in lambdas:
+                for ddof in ddofs:
+                    ak_power_div = ak_power_divergence(f_obs, f_exp, ddof=ddof, lambda_=lambda0)
+
+                    np_f_obs = f_obs.to_ndarray()
+                    np_f_exp = None
+                    if f_exp is not None:
+                        np_f_exp = f_exp.to_ndarray()
+
+                    scipy_power_div = scipy_power_divergence(
+                        np_f_obs, np_f_exp, ddof=ddof, axis=0, lambda_=lambda0
+                    )
+
+                    assert np.allclose(ak_power_div, scipy_power_div, equal_nan=True)
+
+    def test_chisquare(self):
+        from scipy.stats import chisquare as scipy_chisquare
+
+        from arkouda.akstats import chisquare as ak_chisquare
+
+        pairs = self.create_stat_test_pairs()
+
+        ddofs = [0, 1, 2, 3, 4, 5]
+
+        for f_obs, f_exp in pairs:
+            for ddof in ddofs:
+                ak_chisq = ak_chisquare(f_obs, f_exp, ddof=ddof)
+
+                np_f_obs = f_obs.to_ndarray()
+                np_f_exp = None
+                if f_exp is not None:
+                    np_f_exp = f_exp.to_ndarray()
+
+                scipy_chisq = scipy_chisquare(np_f_obs, np_f_exp, ddof=ddof, axis=0)
+
+                assert np.allclose(ak_chisq, scipy_chisq, equal_nan=True)
diff --git a/arkouda-env-dev.yml b/arkouda-env-dev.yml
@@ -20,6 +20,7 @@ dependencies:
   - libiconv
   - libidn2
   - jupyter
+  - scipy
 
   # Developer dependencies
   - pexpect
@@ -42,4 +43,4 @@ dependencies:
     - furo # sphinx theme
     - myst-parser
     - linkify-it-py
-
+
diff --git a/arkouda-env.yml b/arkouda-env.yml
@@ -20,6 +20,7 @@ dependencies:
   - libiconv
   - libidn2
   - jupyter
-
+  - scipy
+
   - pip:
-      - typeguard==2.10.0
+      - typeguard==2.10.0
diff --git a/arkouda/__init__.py b/arkouda/__init__.py
@@ -39,3 +39,5 @@
     is_registered,
     broadcast_dims,
 )
+from arkouda.akmath import *
+from arkouda.akstats import *
diff --git a/arkouda/akmath/__init__.py b/arkouda/akmath/__init__.py
@@ -0,0 +1,5 @@
+from ._math import xlogy
+
+__all__ = [
+    "xlogy",
+]
diff --git a/arkouda/akmath/_math.py b/arkouda/akmath/_math.py
@@ -0,0 +1,52 @@
+from typing import Union
+from warnings import warn
+
+import numpy as np
+
+from arkouda.numeric import log
+from arkouda.pdarrayclass import pdarray
+
+
+def xlogy(x: Union[pdarray, np.float64], y: pdarray):
+    """
+    Computes x * log(y).
+
+    Parameters
+    ----------
+    x : pdarray or np.float64
+        x must have a datatype that is castable to float64
+    y : pdarray
+
+    Returns
+    -------
+    arkouda.pdarrayclass.pdarray
+
+    Examples
+    --------
+
+    >>> import arkouda as ak
+    >>> ak.connect()
+    >>> from arkouda.akmath import xlogy
+    >>> xlogy( ak.array([1, 2, 3, 4]),  ak.array([5,6,7,8]))
+    array([1.6094379124341003 3.5835189384561099 5.8377304471659395 8.317766166719343])
+    >>> xlogy( 5.0, ak.array([1, 2, 3, 4]))
+    array([0.00000000000000000 3.4657359027997265 5.4930614433405491 6.9314718055994531])
+
+
+    """
+    if not isinstance(x, (np.float64, pdarray)) and np.can_cast(x, np.float64):
+        x = np.float64(x)
+
+    if isinstance(x, pdarray) and isinstance(y, pdarray):
+        if x.size == y.size:
+            return x * log(y)
+        else:
+            msg = "x and y must have the same size."
+            warn(msg, UserWarning)
+            return None
+    elif isinstance(x, np.float64) and isinstance(y, pdarray):
+        return x * log(y)
+    else:
+        msg = "x and y must both be pdarrays or x must be castable to float64 and y must be a pdarray."
+        warn(msg, UserWarning)
+        return None
diff --git a/arkouda/akstats/LICENSE.txt b/arkouda/akstats/LICENSE.txt
@@ -0,0 +1,30 @@
+Copyright (c) 2001-2002 Enthought, Inc. 2003-2024, SciPy Developers.
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+
+1. Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above
+   copyright notice, this list of conditions and the following
+   disclaimer in the documentation and/or other materials provided
+   with the distribution.
+
+3. Neither the name of the copyright holder nor the names of its
+   contributors may be used to endorse or promote products derived
+   from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
diff --git a/arkouda/akstats/__init__.py b/arkouda/akstats/__init__.py
@@ -0,0 +1,3 @@
+from ._stats_py import Power_divergenceResult, chisquare, power_divergence
+
+__all__ = ["power_divergence", "chisquare", "Power_divergenceResult"]