From baffa9f75689ae944c618f70c943c91b63210482 Mon Sep 17 00:00:00 2001 From: tess <48131946+stress-tess@users.noreply.github.com> Date: Thu, 8 Aug 2024 10:05:17 -0400 Subject: [PATCH] Closes #3372: Add logistic to random number generators (#3605) * Closes #3372: Add logistic to random number generators This PR (closes #3372) adds logistic to generators. This doesn't add multi-dim bc it uses `uniformStreamPerElem` * added test, docs, and helper --------- Co-authored-by: Tess Hayes --- PROTO_tests/tests/random_test.py | 42 +++++++++++ arkouda/random/_generator.py | 122 ++++++++++++++++++++++++------- pydoc/usage/random.rst | 4 + src/RandMsg.chpl | 35 ++++++++- src/RandUtil.chpl | 15 +++- src/registry/Commands.chpl | 24 +++--- 6 files changed, 202 insertions(+), 40 deletions(-) diff --git a/PROTO_tests/tests/random_test.py b/PROTO_tests/tests/random_test.py index 3eb8ff291a..7d6f8ac13e 100644 --- a/PROTO_tests/tests/random_test.py +++ b/PROTO_tests/tests/random_test.py @@ -1,5 +1,6 @@ import os from collections import Counter +from itertools import product import numpy as np import pytest @@ -194,6 +195,30 @@ def test_choice_flags(self): current = rng.choice(a, size, replace, p) assert np.allclose(previous.to_list(), current.to_list()) + def test_logistic(self): + scal = 2 + arr = ak.arange(5) + + for loc, scale in product([scal, arr], [scal, arr]): + rng = ak.random.default_rng(17) + num_samples = 5 + log_sample = rng.logistic(loc=loc, scale=scale, size=num_samples).to_list() + + rng = ak.random.default_rng(17) + assert rng.logistic(loc=loc, scale=scale, size=num_samples).to_list() == log_sample + + def test_lognormal(self): + scal = 2 + arr = ak.arange(5) + + for mean, sigma in product([scal, arr], [scal, arr]): + rng = ak.random.default_rng(17) + num_samples = 5 + log_sample = rng.lognormal(mean=mean, sigma=sigma, size=num_samples).to_list() + + rng = ak.random.default_rng(17) + assert rng.lognormal(mean=mean, sigma=sigma, size=num_samples).to_list() == log_sample + def test_normal(self): rng = ak.random.default_rng(17) both_scalar = rng.normal(loc=10, scale=2, size=10).to_list() @@ -394,6 +419,23 @@ def test_exponential_hypothesis_testing(self, method): ) assert ks_res.pvalue > 0.05 + def test_logistic_hypothesis_testing(self): + # I tested this many times without a set seed, but with no seed + # it's expected to fail one out of every ~20 runs given a pval limit of 0.05. + rng = np.random.default_rng(34) + num_samples = 10**4 + mu = rng.uniform(0, 10) + scale = rng.uniform(0, 10) + + sample = rng.logistic(loc=mu, scale=scale, size=num_samples) + sample_list = sample.tolist() + + # second goodness of fit test against the distribution with proper mean and std + good_fit_res = sp_stats.goodness_of_fit( + sp_stats.logistic, sample_list, known_params={"loc": mu, "scale": scale} + ) + assert good_fit_res.pvalue > 0.05 + def test_legacy_randint(self): testArray = ak.random.randint(0, 10, 5) assert isinstance(testArray, ak.pdarray) diff --git a/arkouda/random/_generator.py b/arkouda/random/_generator.py index 2315eaf642..3f44cfd7a6 100644 --- a/arkouda/random/_generator.py +++ b/arkouda/random/_generator.py @@ -167,14 +167,9 @@ def exponential(self, scale=1.0, size=None, method="zig"): pdarray Drawn samples from the parameterized exponential distribution. """ - if isinstance(scale, pdarray): - if (scale < 0).any(): - raise ValueError("scale cannot be less then 0") - elif _val_isinstance_of_union(scale, numeric_scalars): - if scale < 0: - raise ValueError("scale cannot be less then 0") - else: - raise TypeError("scale must be either a float scalar or pdarray") + _, scale = float_array_or_scalar_helper("exponential", "scale", scale, size) + if (scale < 0).any() if isinstance(scale, pdarray) else scale < 0: + raise TypeError("scale must be non-negative.") return scale * self.standard_exponential(size, method=method) def standard_exponential(self, size=None, method="zig"): @@ -297,6 +292,78 @@ def integers(self, low, high=None, size=None, dtype=akint64, endpoint=False): self._state += full_size return create_pdarray(rep_msg) + def logistic(self, loc=0.0, scale=1.0, size=None): + r""" + Draw samples from a logistic distribution. + + Samples are drawn from a logistic distribution with specified parameters, + loc (location or mean, also median), and scale (>0). + + Parameters + ---------- + loc: float or pdarray of floats, optional + Parameter of the distribution. Default of 0. + + scale: float or pdarray of floats, optional + Parameter of the distribution. Must be non-negative. Default is 1. + + size: numeric_scalars, optional + Output shape. Default is None, in which case a single value is returned. + + Notes + ----- + The probability density for the Logistic distribution is + + .. math:: + P(x) = \frac{e^{-(x - \mu)/s}}{s( 1 + e^{-(x - \mu)/s})^2} + + where :math:`\mu` is the location and :math:`s` is the scale. + + The Logistic distribution is used in Extreme Value problems where it can act + as a mixture of Gumbel distributions, in Epidemiology, and by the World Chess Federation (FIDE) + where it is used in the Elo ranking system, assuming the performance of each player + is a logistically distributed random variable. + + Returns + ------- + pdarray + Pdarray of floats (unless size=None, in which case a single float is returned). + + See Also + -------- + normal + + Examples + -------- + >>> ak.random.default_rng(17).logistic(3, 2.5, 3) + array([1.1319566682702642 -7.1665150633720014 7.7208667145173608]) + """ + if size is None: + # delegate to numpy when return size is 1 + return self._np_generator.logistic(loc=loc, scale=scale, size=size) + + is_single_mu, mu = float_array_or_scalar_helper("logistic", "loc", loc, size) + is_single_scale, scale = float_array_or_scalar_helper("logistic", "scale", scale, size) + if (scale < 0).any() if isinstance(scale, pdarray) else scale < 0: + raise TypeError("scale must be non-negative.") + + rep_msg = generic_msg( + cmd="logisticGenerator", + args={ + "name": self._name_dict[akdtype("float64")], + "mu": mu, + "is_single_mu": is_single_mu, + "scale": scale, + "is_single_scale": is_single_scale, + "size": size, + "has_seed": self._seed is not None, + "state": self._state, + }, + ) + # we only generate one val using the generator in the symbol table + self._state += 1 + return create_pdarray(rep_msg) + def lognormal(self, mean=0.0, sigma=1.0, size=None, method="zig"): r""" Draw samples from a log-normal distribution with specified mean, @@ -622,24 +689,9 @@ def poisson(self, lam=1.0, size=None): # delegate to numpy when return size is 1 return self._np_generator.poisson(lam, size) - if _val_isinstance_of_union(lam, numeric_scalars): - is_single_lambda = True - if not _val_isinstance_of_union(lam, float_scalars): - lam = float(lam) - if lam < 0: - raise TypeError("lambda must be >=0") - elif isinstance(lam, pdarray): - is_single_lambda = False - if size != lam.size: - raise TypeError("array of lambdas must have same size as return size") - if lam.dtype != akfloat64: - from arkouda.numeric import cast as akcast - - lam = akcast(lam, akfloat64) - if (lam < 0).any(): - raise TypeError("all lambdas must be >=0") - else: - raise TypeError("poisson only accepts a pdarray or float scalar for lam") + is_single_lambda, lam = float_array_or_scalar_helper("poisson", "lam", lam, size) + if (lam < 0).any() if isinstance(lam, pdarray) else lam < 0: + raise TypeError("lam must be non-negative.") rep_msg = generic_msg( cmd="poissonGenerator", @@ -758,3 +810,21 @@ def default_rng(seed=None): ).split()[1] return Generator(name_dict, seed if has_seed else None, state=state) + + +def float_array_or_scalar_helper(func_name, var_name, var, size): + if _val_isinstance_of_union(var, numeric_scalars): + is_scalar = True + if not _val_isinstance_of_union(var, float_scalars): + var = float(var) + elif isinstance(var, pdarray): + is_scalar = False + if size != var.size: + raise TypeError(f"array of {var_name} must have same size as return size") + if var.dtype != akfloat64: + from arkouda.numeric import cast as akcast + + var = akcast(var, akfloat64) + else: + raise TypeError(f"{func_name} only accepts a pdarray or float scalar for {var_name}") + return is_scalar, var diff --git a/pydoc/usage/random.rst b/pydoc/usage/random.rst index a43b6f27a6..85b66acd00 100644 --- a/pydoc/usage/random.rst +++ b/pydoc/usage/random.rst @@ -29,6 +29,10 @@ integers --------- .. autofunction:: arkouda.random.Generator.integers +logistic +--------- +.. autofunction:: arkouda.random.Generator.logistic + lognormal --------- .. autofunction:: arkouda.random.Generator.lognormal diff --git a/src/RandMsg.chpl b/src/RandMsg.chpl index aec1fe2941..6837302676 100644 --- a/src/RandMsg.chpl +++ b/src/RandMsg.chpl @@ -573,6 +573,38 @@ module RandMsg return MsgTuple.error("choice does not support the bigint dtype"); } + inline proc logisticGenerator(mu: real, scale: real, ref rs) { + var U = rs.next(0, 1); + + while U <= 0.0 { + /* Reject U == 0.0 and call again to get next value */ + U = rs.next(0, 1); + } + return mu + scale * log(U / (1.0 - U)); + } + + proc logisticGeneratorMsg(cmd: string, msgArgs: borrowed MessageArgs, st: borrowed SymTab): MsgTuple throws { + const name = msgArgs["name"], + isSingleMu = msgArgs["is_single_mu"].toScalar(bool), + muStr = msgArgs["mu"].toScalar(string), + isSingleScale = msgArgs["is_single_scale"].toScalar(bool), + scaleStr = msgArgs["scale"].toScalar(string), + size = msgArgs["size"].toScalar(int), + hasSeed = msgArgs["has_seed"].toScalar(bool), + state = msgArgs["state"].toScalar(int); + + var generatorEntry = st[name]: borrowed GeneratorSymEntry(real); + ref rng = generatorEntry.generator; + if state != 1 then rng.skipTo(state-1); + + var logisticArr = makeDistArray(size, real); + const mu = new scalarOrArray(muStr, !isSingleMu, st), + scale = new scalarOrArray(scaleStr, !isSingleScale, st); + + uniformStreamPerElem(logisticArr, rng, GenerationFunction.LogisticGenerator, hasSeed, mu=mu, scale=scale); + return st.insert(createSymEntry(logisticArr)); + } + @arkouda.instantiateAndRegister proc permutation(cmd: string, msgArgs: borrowed MessageArgs, st: borrowed SymTab, type array_dtype, param array_nd: int): MsgTuple throws { const name = msgArgs["name"], @@ -656,7 +688,7 @@ module RandMsg var poissonArr = makeDistArray(size, int); const lam = new scalarOrArray(lamStr, !isSingleLam, st); - uniformStreamPerElem(poissonArr, rng, GenerationFunction.PoissonGenerator, hasSeed, lam); + uniformStreamPerElem(poissonArr, rng, GenerationFunction.PoissonGenerator, hasSeed, lam=lam); return st.insert(createSymEntry(poissonArr)); } @@ -717,6 +749,7 @@ module RandMsg } use CommandMap; + registerFunction("logisticGenerator", logisticGeneratorMsg, getModuleName()); registerFunction("segmentedSample", segmentedSampleMsg, getModuleName()); registerFunction("poissonGenerator", poissonGeneratorMsg, getModuleName()); } diff --git a/src/RandUtil.chpl b/src/RandUtil.chpl index 0627b76383..886aea4459 100644 --- a/src/RandUtil.chpl +++ b/src/RandUtil.chpl @@ -37,6 +37,7 @@ module RandUtil { enum GenerationFunction { ExponentialGenerator, + LogisticGenerator, NormalGenerator, PoissonGenerator, } @@ -44,7 +45,10 @@ module RandUtil { // TODO how to update this to handle randArr being a multi-dim array?? // I thought to just do the same randArr[randArr.domain.orderToIndex(i)] trick // but im not sure how randArr.localSubdomain() will differ with multi-dim - proc uniformStreamPerElem(ref randArr: [?D] ?t, ref rng, param function: GenerationFunction, hasSeed: bool, const lam: scalarOrArray(?) = new scalarOrArray()) throws { + proc uniformStreamPerElem(ref randArr: [?D] ?t, ref rng, param function: GenerationFunction, hasSeed: bool, + const lam: scalarOrArray(?) = new scalarOrArray(), + const mu: scalarOrArray(?) = new scalarOrArray(), + const scale: scalarOrArray(?) = new scalarOrArray()) throws { if hasSeed { // use a fixed number of elements per stream instead of relying on number of locales or numTasksPerLoc because these // can vary from run to run / machine to mahchine. And it's important for the same seed to give the same results @@ -79,6 +83,9 @@ module RandUtil { when GenerationFunction.ExponentialGenerator { agg.copy(randArr[i], standardExponentialZig(realRS, uintRS)); } + when GenerationFunction.LogisticGenerator { + agg.copy(randArr[i], logisticGenerator(mu[i], scale[i], realRS)); + } when GenerationFunction.NormalGenerator { agg.copy(randArr[i], standardNormZig(realRS, uintRS)); } @@ -101,6 +108,9 @@ module RandUtil { when GenerationFunction.ExponentialGenerator { randArr[i] = standardExponentialZig(realRS, uintRS); } + when GenerationFunction.LogisticGenerator { + randArr[i] = logisticGenerator(mu[i], scale[i], realRS); + } when GenerationFunction.NormalGenerator { randArr[i] = standardNormZig(realRS, uintRS); } @@ -124,6 +134,9 @@ module RandUtil { when GenerationFunction.ExponentialGenerator { rv = standardExponentialZig(realRS, uintRS); } + when GenerationFunction.LogisticGenerator { + rv = logisticGenerator(mu[i], scale[i], realRS); + } when GenerationFunction.NormalGenerator { rv = standardNormZig(realRS, uintRS); } diff --git a/src/registry/Commands.chpl b/src/registry/Commands.chpl index c7af1a60ce..7aabaf715a 100644 --- a/src/registry/Commands.chpl +++ b/src/registry/Commands.chpl @@ -1341,51 +1341,51 @@ registerFunction('choice', ark_choice_bigint, 'RandMsg', 503); proc ark_permutation_int_1(cmd: string, msgArgs: borrowed MessageArgs, st: borrowed SymTab): MsgTuple throws do return RandMsg.permutation(cmd, msgArgs, st, array_dtype=int, array_nd=1); -registerFunction('permutation', ark_permutation_int_1, 'RandMsg', 577); +registerFunction('permutation', ark_permutation_int_1, 'RandMsg', 609); proc ark_permutation_uint_1(cmd: string, msgArgs: borrowed MessageArgs, st: borrowed SymTab): MsgTuple throws do return RandMsg.permutation(cmd, msgArgs, st, array_dtype=uint, array_nd=1); -registerFunction('permutation', ark_permutation_uint_1, 'RandMsg', 577); +registerFunction('permutation', ark_permutation_uint_1, 'RandMsg', 609); proc ark_permutation_uint8_1(cmd: string, msgArgs: borrowed MessageArgs, st: borrowed SymTab): MsgTuple throws do return RandMsg.permutation(cmd, msgArgs, st, array_dtype=uint(8), array_nd=1); -registerFunction('permutation', ark_permutation_uint8_1, 'RandMsg', 577); +registerFunction('permutation', ark_permutation_uint8_1, 'RandMsg', 609); proc ark_permutation_real_1(cmd: string, msgArgs: borrowed MessageArgs, st: borrowed SymTab): MsgTuple throws do return RandMsg.permutation(cmd, msgArgs, st, array_dtype=real, array_nd=1); -registerFunction('permutation', ark_permutation_real_1, 'RandMsg', 577); +registerFunction('permutation', ark_permutation_real_1, 'RandMsg', 609); proc ark_permutation_bool_1(cmd: string, msgArgs: borrowed MessageArgs, st: borrowed SymTab): MsgTuple throws do return RandMsg.permutation(cmd, msgArgs, st, array_dtype=bool, array_nd=1); -registerFunction('permutation', ark_permutation_bool_1, 'RandMsg', 577); +registerFunction('permutation', ark_permutation_bool_1, 'RandMsg', 609); proc ark_permutation_bigint_1(cmd: string, msgArgs: borrowed MessageArgs, st: borrowed SymTab): MsgTuple throws do return RandMsg.permutation(cmd, msgArgs, st, array_dtype=bigint, array_nd=1); -registerFunction('permutation', ark_permutation_bigint_1, 'RandMsg', 577); +registerFunction('permutation', ark_permutation_bigint_1, 'RandMsg', 609); proc ark_shuffle_int_1(cmd: string, msgArgs: borrowed MessageArgs, st: borrowed SymTab): MsgTuple throws do return RandMsg.shuffle(cmd, msgArgs, st, array_dtype=int, array_nd=1); -registerFunction('shuffle', ark_shuffle_int_1, 'RandMsg', 664); +registerFunction('shuffle', ark_shuffle_int_1, 'RandMsg', 696); proc ark_shuffle_uint_1(cmd: string, msgArgs: borrowed MessageArgs, st: borrowed SymTab): MsgTuple throws do return RandMsg.shuffle(cmd, msgArgs, st, array_dtype=uint, array_nd=1); -registerFunction('shuffle', ark_shuffle_uint_1, 'RandMsg', 664); +registerFunction('shuffle', ark_shuffle_uint_1, 'RandMsg', 696); proc ark_shuffle_uint8_1(cmd: string, msgArgs: borrowed MessageArgs, st: borrowed SymTab): MsgTuple throws do return RandMsg.shuffle(cmd, msgArgs, st, array_dtype=uint(8), array_nd=1); -registerFunction('shuffle', ark_shuffle_uint8_1, 'RandMsg', 664); +registerFunction('shuffle', ark_shuffle_uint8_1, 'RandMsg', 696); proc ark_shuffle_real_1(cmd: string, msgArgs: borrowed MessageArgs, st: borrowed SymTab): MsgTuple throws do return RandMsg.shuffle(cmd, msgArgs, st, array_dtype=real, array_nd=1); -registerFunction('shuffle', ark_shuffle_real_1, 'RandMsg', 664); +registerFunction('shuffle', ark_shuffle_real_1, 'RandMsg', 696); proc ark_shuffle_bool_1(cmd: string, msgArgs: borrowed MessageArgs, st: borrowed SymTab): MsgTuple throws do return RandMsg.shuffle(cmd, msgArgs, st, array_dtype=bool, array_nd=1); -registerFunction('shuffle', ark_shuffle_bool_1, 'RandMsg', 664); +registerFunction('shuffle', ark_shuffle_bool_1, 'RandMsg', 696); proc ark_shuffle_bigint_1(cmd: string, msgArgs: borrowed MessageArgs, st: borrowed SymTab): MsgTuple throws do return RandMsg.shuffle(cmd, msgArgs, st, array_dtype=bigint, array_nd=1); -registerFunction('shuffle', ark_shuffle_bigint_1, 'RandMsg', 664); +registerFunction('shuffle', ark_shuffle_bigint_1, 'RandMsg', 696); import StatsMsg;