Skip to content

Commit

Permalink
Final touches
Browse files Browse the repository at this point in the history
  • Loading branch information
giacomov committed Jan 23, 2018
1 parent ba18e22 commit 39cda8f
Show file tree
Hide file tree
Showing 9 changed files with 251 additions and 28 deletions.
24 changes: 21 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,9 @@ or directly the `setup.py` file:
# Examples
See Vianello (2018) for details about these methods.

NOTE: all functions accept either single values or arrays as inputs, even
though the following examples deal with single values only.

## Ideal case
Compute the significance of a measurement of 100 counts with an expected
background of 90 in the ideal case of no uncertainty on the background:
Expand All @@ -32,7 +35,16 @@ background of 90 in the ideal case of no uncertainty on the background:
from gv_significance import ideal_case

ideal_case.significance(n=100, b=90)
```

Compute the the counts that a source must generate in order to be detected
at 5 sigma with an efficiency of 90% given a background of 100 counts:

```python
from gv_significance import ideal_case

# Note: detection_efficiency must be either 0.5, 0.9 or 0.99
ideal_case.five_sigma_threshold(B=100, detection_efficiency=0.9)
```

## Poisson observation, Poisson background with or without systematics
Expand All @@ -45,7 +57,15 @@ on-source (`alpha=0.1`):
from gv_significance import poisson_poisson

poisson_poisson.significance(n=100, b=900, alpha=0.1)
```

Alternative, one can use the $Z_Bi$ estimator described in Cousins 2008 (but be
careful about the range of usability, see Vianello 2018) by doing:

```python
from gv_significance import poisson_poisson

poisson_poisson.z_bi_significance(n=100, b=900, alpha=0.1)
```

If there is an additional systematic uncertainty on the background measurement of
Expand All @@ -55,7 +75,6 @@ at most 10%, then:
from gv_significance import poisson_poisson

poisson_poisson.significance(n=100, b=900, alpha=0.1, k=0.1)

```

If there is an additional systematic uncertainty on the background measurement
Expand All @@ -65,7 +84,6 @@ that is Gaussian-distributed with a sigma of 10%, then:
from gv_significance import poisson_poisson

poisson_poisson.significance(n=100, b=900, alpha=0.1, sigma=0.1)

```

## Poisson observation, Gaussian background
Expand All @@ -76,5 +94,5 @@ background estimation procedure that gives us an expected background of
```python
from gv_significance import poisson_gaussian

poisson_gaussian.significance(n=100, b=90, sigma=0.4)
poisson_gaussian.significance(n=100, b=90, sigma=2.4)
```
37 changes: 35 additions & 2 deletions gv_significance/poisson_gaussian.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
import numpy as np
from numpy import sqrt, squeeze
from xlogy import xlogyv
from z_bi import z_bi_vectorized
from size_one_or_n import size_one_or_n


def significance(n, b, sigma):
Expand All @@ -17,11 +19,13 @@ def significance(n, b, sigma):
n_ = np.array(n, dtype=float, ndmin=1)
b_ = np.array(b, dtype=float, ndmin=1)

sigma_ = size_one_or_n(sigma, n_, "sigma")

# Assign sign depending on whether n_ > b_

sign = np.where(n_ >= b_, 1, -1)

B0_mle = 0.5 * (b_ - sigma ** 2 + sqrt(b_ ** 2 - 2 * b_ * sigma ** 2 + 4 * n_ * sigma ** 2 + sigma ** 4))
B0_mle = 0.5 * (b_ - sigma_ ** 2 + sqrt(b_ ** 2 - 2 * b_ * sigma_ ** 2 + 4 * n_ * sigma_ ** 2 + sigma_ ** 4))

# B0_mle could be slightly < 0 (even though it shouldn't) because of the
# limited numerical precision of the calculator. let's accept as negative as 0.01, and clip
Expand All @@ -30,4 +34,33 @@ def significance(n, b, sigma):

B0_mle = np.clip(B0_mle, 0, None)

return squeeze(sqrt(2) * sqrt(xlogyv(n_, n_ / B0_mle) + (b_ - B0_mle)**2 / (2 * sigma**2) + B0_mle - n_) * sign)
return squeeze(sqrt(2) * sqrt(xlogyv(n_, n_ / B0_mle) + (b_ - B0_mle)**2 / (2 * sigma_**2) + B0_mle - n_) * sign)


def z_bi_significance(n, b, sigma):
"""
Use the estimator Z_Bi from Cousins et al. 2008 to compute the significance
:param n: observed counts (can be an array)
:param b: expected background counts (can be an array)
:param sigma: error on the measurement of b
:return: the significance (z score) for the measurement(s)
"""

n_ = np.array(n, dtype=float, ndmin=1)
b_ = np.array(b, dtype=float, ndmin=1)
sigma_ = size_one_or_n(sigma, n_, "sigma")

res = np.zeros(n_.shape[0], dtype=float)

idx = (sigma_ != 0)

# When sigma is zero there is no computation possible
res[~idx] = np.nan

tau = b_[idx] / sigma_[idx]**2 # Eq. 8 of Cousins et al. 2008
n_off = b_[idx] * tau

res[idx] = z_bi_vectorized(n_[idx], n_off, 1.0 / np.abs(tau))

return np.squeeze(res)
73 changes: 52 additions & 21 deletions gv_significance/poisson_poisson.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,16 @@
import numpy as np
from xlogy import xlogy
from xlogy import xlogy, xlogyv
import scipy.optimize
from z_bi import z_bi_vectorized
from size_one_or_n import size_one_or_n


def _li_and_ma(n_, b_, alpha):

# In order to avoid numerical problem with 0 * log(0), we add a tiny number to n_ and b_, inconsequential
# for the computation
n_ += 1e-15 # type: np.ndarray
b_ += 1e-15 # type: np.ndarray
n_ += 1E-25 # type: np.ndarray
b_ += 1E-25 # type: np.ndarray

# Pre-compute for speed
n_plus_b = n_ + b_
Expand All @@ -34,7 +36,7 @@ def _likelihood_with_sys(o, b, a, s, k, B, M):
Ba = B * a
Bak = B * a * k

res = -Bak - Ba - B - M + xlogy(b, B) - k ** 2 / (2 * s ** 2) + xlogy(o, Bak + Ba + M)
res = -Bak - Ba - B - M + xlogyv(b, B) - k ** 2 / (2 * s ** 2) + xlogyv(o, Bak + Ba + M)

return res

Expand Down Expand Up @@ -68,6 +70,9 @@ def _get_TS_by_numerical_optimization(n_, b_, alpha, sigma):
return TS


_get_TS_by_numerical_optimization_v = np.vectorize(_get_TS_by_numerical_optimization)


def significance(n, b, alpha, sigma=0, k=0):
"""
Returns the significance for detecting n counts when alpha * B are expected.
Expand All @@ -89,10 +94,10 @@ def significance(n, b, alpha, sigma=0, k=0):
:param n: observed counts (can be an array)
:param b: expected background counts (can be an array)
:param alpha: ratio of the source observation efficiency and background observation efficiency (must be the same for
all items in n and b)
:param sigma: standard deviation for the Gaussian case (must be the same for all items in n and b)
:param k: maximum fractional systematic uncertainty expected. (must be the same for all items in n and b)
:param alpha: ratio of the source observation efficiency and background observation efficiency
(either a float, or an array of the same shape of n)
:param sigma: standard deviation for the Gaussian case (either a float, or an array of the same shape of n)
:param k: maximum fractional systematic uncertainty expected (either a float, or an array of the same shape of n)
:return: the significance (z score) for the measurement(s)
"""

Expand All @@ -102,27 +107,53 @@ def significance(n, b, alpha, sigma=0, k=0):
n_ = np.array(n, dtype=float, ndmin=1)
b_ = np.array(b, dtype=float, ndmin=1)

k_ = size_one_or_n(k, n_, "k")

sigma_ = size_one_or_n(sigma, n_, "sigma")

alpha_ = size_one_or_n(alpha, n_, "alpha")

# Assign sign depending on whether n_ > b_

sign = np.where(n_ >= alpha * b_, 1, -1)
sign = np.where(n_ >= alpha_ * b_, 1, -1)

# Prepare vector for results
res = np.zeros(n_.shape[0], dtype=float)

# Select elements where we need to apply Li & Ma and apply it
idx_lima = (sigma_ == 0) & (k_ == 0)

res[idx_lima] = _li_and_ma(n_[idx_lima], b_[idx_lima], alpha_[idx_lima])

if sigma == 0 and k == 0:
# Select elements where we need to apply eq. 7 from Vianello (2018),
# which is simply Li & Ma with alpha -> alpha * (k+1)

# Li & Ma
return np.squeeze(sign * _li_and_ma(n_, b_, alpha))
idx_eq7 = (k_ > 0)
res[idx_eq7] = _li_and_ma(n_[idx_eq7], b_[idx_eq7], alpha_[idx_eq7] * (k_[idx_eq7] + 1))

if k > 0:
# Select elements where we need to apply eq. 9 from Vianello (2018)
idx_eq9 = (sigma_ > 0)

# Need to use eq. 7 from Vianello (2018), which is simply Li & Ma with alpha -> alpha * (k+1)
return np.squeeze(sign * _li_and_ma(n_, b_, alpha * (k+1)))
if np.any(idx_eq9):

if sigma > 0:
TS = _get_TS_by_numerical_optimization_v(n_[idx_eq9], b_[idx_eq9], alpha_[idx_eq9], sigma_[idx_eq9])

# Need to use eq. 9 from Vianello (2018)
res[idx_eq9] = np.sqrt(TS)

# The computation is not vectorized because there is an optimization involved, so we can only loop...
TS = np.array([_get_TS_by_numerical_optimization(n_[i], b_[i], alpha, sigma) for i in range(n_.shape[0])])
# Return significance

# Return significance
return np.squeeze(sign * res)


def z_bi_significance(n, b, alpha):
"""
Use the estimator Z_Bi from Cousins et al. 2008 to compute the significance
:param n: observed counts (can be an array)
:param b: expected background counts (can be an array)
:param alpha: ratio of the source observation efficiency and background observation efficiency (must be the same for
all items in n and b)
:return: the significance (z score) for the measurement(s)
"""

return np.squeeze(sign * np.sqrt(TS))
return z_bi_vectorized(n, b, alpha)
2 changes: 2 additions & 0 deletions gv_significance/significance_from_pvalue.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,4 +20,6 @@ def significance_from_pvalue(pvalue):

raise ArithmeticError("One or more pvalues are too small for a significance computation.")

# This is equivalent to sqrt(2) * erfinv(1 - 2 * pvalue) but faster

return -scipy.special.ndtri(pvalue)
16 changes: 16 additions & 0 deletions gv_significance/size_one_or_n.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
import numpy as np


def size_one_or_n(value, other_array, name):

value_ = np.array(value, dtype=float, ndmin=1)

if value_.shape[0] == 1:

value_ = np.zeros(other_array.shape[0], dtype=float) + value

else:

assert value_.shape[0] == other_array.shape[0], "The size of %s must be either 1 or the same size of n" % name

return value_
50 changes: 50 additions & 0 deletions gv_significance/test_formulae.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
import pytest
import numpy as np

from poisson_poisson import significance as poi_poi_significance
from poisson_poisson import z_bi_significance as poi_poi_zbi

from poisson_gaussian import significance as poi_gau_significance
from poisson_gaussian import z_bi_significance as poi_gau_zbi

from ideal_case import significance as ideal_significance


def test_poi_poi_significance():

# Test with one number
s = poi_poi_significance(20, 80, 0.1, k=0, sigma=0)

s = poi_poi_significance(20, 80, 0.1, k=0.1, sigma=0)

s = poi_poi_significance(20, 80, 0.1, k=0, sigma=0.1)

assert np.isclose(poi_poi_zbi(140, 100, 1 / 1.2), 3.93, rtol=0.01)

# Test with arrays
s = poi_poi_significance([20, 30], [80, 90], 0.1)
s = poi_poi_significance([20, 30], [80, 90], [0.1, 0.2])

s = poi_poi_significance([20, 30], [80, 90], 0.1, k=0.1)
s = poi_poi_significance([20, 30], [80, 90], [0.1, 0.2], k=[0.1, 0.1])

s = poi_poi_significance([20, 30], [80, 90], 0.1, sigma=0.1)
s = poi_poi_significance([20, 30], [80, 90], [0.1, 0.2], sigma=[0.1, 0.1])

assert np.allclose(poi_poi_zbi([140, 140], [100, 100], [1 / 1.2, 1/1.2]), [3.93, 3.93], rtol=0.01)

# Test mixed
s = poi_poi_significance([20, 30, 40], [80, 90, 100], alpha=[0.1, 0.1, 0.1],
k=[0, 0.1, 0], sigma=[0, 0, 10])

def test_poi_gau_significance():

# Test with one number
s = poi_gau_significance(120, 80, 5.3)

assert np.isclose(poi_gau_zbi(140, 83.33, 8.333), 3.93, rtol=0.01)

# Test with vectors
s = poi_gau_significance([120, 130], [80, 90], [5.3, 5.3])

assert np.allclose(poi_gau_zbi([140, 140], [83.33, 83.33], [8.333, 8.333]), [3.93, 3.93], rtol=0.01)
3 changes: 2 additions & 1 deletion gv_significance/xlogy.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from math import log
import numpy as np


def xlogy(x, y):
"""
This function implements x * log(y) so that if both x and y are 0, the results is zero (and not infinite or nan
Expand Down Expand Up @@ -33,6 +34,6 @@ def xlogyv(x, y):

idx = (x != 0)

results[idx] = x[idx] * log(y[idx])
results[idx] = x[idx] * np.log(y[idx])

return np.squeeze(results)
Loading

0 comments on commit 39cda8f

Please sign in to comment.