Skip to content

Commit

Permalink
Merge pull request #3 from Ciela-Institute/bootstrapping
Browse files Browse the repository at this point in the history
Update to version 0.1
  • Loading branch information
Pablo-Lemos authored Nov 21, 2023
2 parents ebbffce + ad55f6d commit 8de0760
Show file tree
Hide file tree
Showing 6 changed files with 377 additions and 17 deletions.
203 changes: 203 additions & 0 deletions notebooks/test.ipynb

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[project]
name = "tarp"
version = "0.0.2"
version = "0.1.0"
authors = [
{name = "Adam Coogan", email = "dr.adam.coogan@gmail.com"},
{name = "Pablo Lemos", email = "plemos91@gmail.com"},
Expand Down
2 changes: 2 additions & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -7,3 +7,5 @@ Sphinx==5.3.0
sphinx_autodoc_typehints==1.22
sphinx-pyproject==0.1.0
torch==1.13.1
tqdm==4.65.0
deprecation==2.1.0
2 changes: 1 addition & 1 deletion src/tarp/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
from .drp import *
from .drp import get_drp_coverage, get_tarp_coverage
136 changes: 128 additions & 8 deletions src/tarp/drp.py
Original file line number Diff line number Diff line change
@@ -1,18 +1,43 @@
from typing import Tuple, Union

import numpy as np
from tqdm import tqdm
import deprecation


__all__ = ("get_drp_coverage",)
__all__ = ("get_tarp_coverage", "get_drp_coverage")


@deprecation.deprecated(
deprecated_in="0.1.0",
removed_in="0.2.0",
current_version="0.1.0",
details="Use get_tarp_coverage instead",
)
def get_drp_coverage(
samples: np.ndarray,
theta: np.ndarray,
references: Union[str, np.ndarray] = "random",
metric: str = "euclidean",
) -> Tuple[np.ndarray, np.ndarray]:
return get_tarp_coverage(samples=samples,
theta=theta,
references=references,
metric=metric,
norm=True,
bootstrap=False,
seed=None)


def _get_tarp_coverage_single(
samples: np.ndarray,
theta: np.ndarray,
references: Union[str, np.ndarray] = "random",
metric: str = "euclidean",
norm: bool = True,
seed: Union[int, None] = None
) -> Tuple[np.ndarray, np.ndarray]:
"""
Estimates coverage with the distance to random point method.
Estimates coverage with the TARP method a single time.
Reference: `Lemos, Coogan et al 2023 <https://arxiv.org/abs/2302.03026>`_
Expand All @@ -25,10 +50,13 @@ def get_drp_coverage(
the parameter space.
metric: the metric to use when computing the distance. Can be ``"euclidean"`` or
``"manhattan"``.
norm : whether to apply or not the normalization (Default = True)
seed: the seed to use for the random number generator. If ``None``, then no seed
Returns:
Credibility values (``alpha``) and expected coverage probability (``ecp``).
Expected coverage probability (``ecp``) and credibility values (``alpha``)
"""
np.random.seed(seed)

# Check that shapes are correct
if samples.ndim != 3:
Expand All @@ -51,10 +79,11 @@ def get_drp_coverage(
theta = theta[np.newaxis, :, :]

# Normalize
low = np.min(theta, axis=1, keepdims=True)
high = np.max(theta, axis=1, keepdims=True)
samples = (samples - low) / (high - low + 1e-10)
theta = (theta - low) / (high - low + 1e-10)
if norm:
low = np.min(theta, axis=1, keepdims=True)
high = np.max(theta, axis=1, keepdims=True)
samples = (samples - low) / (high - low + 1e-10)
theta = (theta - low) / (high - low + 1e-10)

# Generate reference points
if isinstance(references, str) and references == "random":
Expand Down Expand Up @@ -92,3 +121,94 @@ def get_drp_coverage(
dx = alpha[1] - alpha[0]
ecp = np.cumsum(h) * dx
return ecp, alpha[1:]


def _get_tarp_coverage_bootstrap(samples: np.ndarray,
theta: np.ndarray,
references: Union[str, np.ndarray] = "random",
metric: str = "euclidean",
norm: bool = True,
seed: Union[int, None] = None
) -> Tuple[np.ndarray, np.ndarray]:
"""
Estimates uncertainties on the expected probability and credibility values calculated with the
_get_tarp_coverage_single function using the bootstrapping method
Args:
samples: the samples to compute the coverage of, with shape ``(n_samples, n_sims, n_dims)``.
theta: the true parameter values for each samples, with shape ``(n_sims, n_dims)``.
references: the reference points to use for the DRP regions, with shape
``(n_references, n_sims)``, or the string ``"random"``. If the later, then
the reference points are chosen randomly from the unit hypercube over
the parameter space.
metric: the metric to use when computing the distance. Can be ``"euclidean"`` or
``"manhattan"``.
norm : whether to apply or not the normalization (Default = True)
seed: the seed to use for the random number generator. If ``None``, then no seed
Returns:
Mean and std of the expected coverage probability, and mean of the credibility
"""
num_sims = samples.shape[1]

boot_ecp = np.empty(shape=(num_sims, num_sims//10))
boot_alpha = np.empty(shape=(num_sims, num_sims//10))
for i in tqdm(range(num_sims)):
idx_remove = np.random.randint(num_sims)
idx_add = np.random.randint(num_sims)

# Replacing one simulation and its samples by another and its associated samples
samples[:, idx_remove, :] = samples[:, idx_add, :]
theta[idx_remove, :] = theta[idx_add, :]

boot_ecp[i, :], boot_alpha[i, :] = _get_tarp_coverage_single(samples,
theta,
references=references,
metric=metric,
norm=norm,
seed=seed)

# ecp_mean = boot_ecp.mean(axis=0)
# ecp_std = boot_ecp.std(axis=0)
# alpha_mean = boot_alpha.mean(axis=0)
# return ecp_mean, ecp_std, alpha_mean
return boot_ecp, boot_alpha


def get_tarp_coverage(
samples: np.ndarray,
theta: np.ndarray,
references: Union[str, np.ndarray] = "random",
metric: str = "euclidean",
norm: bool = False,
bootstrap: bool = False,
seed: Union[int, None] = None
) -> Tuple[np.ndarray, np.ndarray]:
"""
Estimates coverage with the TARP method.
Reference: `Lemos, Coogan et al 2023 <https://arxiv.org/abs/2302.03026>`_
Args:
samples: the samples to compute the coverage of, with shape ``(n_samples, n_sims, n_dims)``.
theta: the true parameter values for each samples, with shape ``(n_sims, n_dims)``.
references: the reference points to use for the DRP regions, with shape
``(n_references, n_sims)``, or the string ``"random"``. If the later, then
the reference points are chosen randomly from the unit hypercube over
the parameter space.
metric: the metric to use when computing the distance. Can be ``"euclidean"`` or
``"manhattan"``.
norm : whether to apply or not the normalization (Default = False)
bootstrap : whether to use bootstrap to estimate uncertainties (Default = False)
seed: the seed to use for the random number generator. If ``None``, then no seed
Returns:
Expected coverage probability (``ecp``) and credibility values (``alpha``).
If bootstrap is True, both arrays have an extra dimension corresponding to the number of bootstrap iterations
"""
if bootstrap:
ecp, alpha = _get_tarp_coverage_bootstrap(samples, theta, references, metric, norm, seed)
else:
ecp, alpha = _get_tarp_coverage_single(samples, theta, references, metric, norm, seed)
return ecp, alpha

49 changes: 42 additions & 7 deletions tests/test_drp.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
import unittest
import numpy as np
from tarp import get_tarp_coverage

from tarp import get_drp_coverage


def test():
num_samples = 10
num_sims = 10
def get_test_data():
num_samples = 100
num_sims = 100
num_dims = 5
theta = np.random.uniform(low=-5, high=5, size=(num_sims, num_dims))
log_sigma = np.random.uniform(low=-5, high=-1, size=(num_sims, num_dims))
Expand All @@ -14,8 +14,43 @@ def test():
loc=theta, scale=sigma, size=(num_samples, num_sims, num_dims)
)
theta = np.random.normal(loc=theta, scale=sigma, size=(num_sims, num_dims))
get_drp_coverage(samples, theta, references="random", metric="euclidean")
return samples, theta


class TarpTest(unittest.TestCase):
def test_single(self):
samples, theta = get_test_data()
ecp, alpha = get_tarp_coverage(samples,
theta,
references="random",
metric="euclidean",
norm=False,
bootstrap=False)
self.assertAlmostEqual(np.max(np.abs(ecp - alpha)), 0., delta=0.25)

def test_norm(self):
samples, theta = get_test_data()
ecp, alpha = get_tarp_coverage(samples,
theta,
references="random",
metric="euclidean",
norm=True,
bootstrap=False)
self.assertAlmostEqual(np.max(np.abs(ecp - alpha)), 0., delta=0.25)

def test_bootstrap(self):
samples, theta = get_test_data()
ecp, alpha = get_tarp_coverage(samples,
theta,
references="random",
metric="euclidean",
norm=False,
bootstrap=True)
ecp_mean = np.mean(ecp, axis=0)
ecp_std = np.std(ecp, axis=0)
alpha = np.mean(alpha, axis=0)
self.assertAlmostEqual(np.max(np.abs(ecp_mean - alpha)/ecp_std), 0., delta=10.)


if __name__ == "__main__":
test()
unittest.main()

0 comments on commit 8de0760

Please sign in to comment.