Skip to content

Commit

Permalink
Merge pull request #353 from hyanwong/API-extras
Browse files Browse the repository at this point in the history
Tidy the documentation and default values
  • Loading branch information
hyanwong authored Jan 5, 2024
2 parents 376e157 + f239842 commit 34c44b7
Show file tree
Hide file tree
Showing 3 changed files with 52 additions and 24 deletions.
6 changes: 6 additions & 0 deletions docs/methods.md
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,12 @@ input_ts = tskit.load("data/basic_example.trees")
ts = tsdate.date(input_ts, method="variational_gamma", population_size=100, mutation_rate=1e-8)
```

Alternatively each method can be called directly as a separate function:

```{code-cell} ipython3
ts = tsdate.variational_gamma(input_ts, population_size=100, mutation_rate=1e-8)
```

Currently the default is `inside_outside`, but this may change in future releases.


Expand Down
17 changes: 14 additions & 3 deletions tests/test_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -1630,7 +1630,10 @@ def test_node_metadata_simulated_tree(self):
larger_ts, mutation_rate=None, population_size=10000
)
mn_post, *_ = algorithm.run(
eps=1e-6, outside_standardize=True, probability_space=tsdate.base.LOG
eps=1e-6,
outside_standardize=True,
ignore_oldest_root=False,
probability_space=tsdate.base.LOG,
)
dated_ts = date(larger_ts, population_size=10000, mutation_rate=None)
metadata = dated_ts.tables.nodes.metadata
Expand Down Expand Up @@ -1846,7 +1849,10 @@ def test_sites_time_insideoutside(self):
dated = tsdate.date(ts, mutation_rate=None, population_size=1)
algorithm = InsideOutsideMethod(ts, mutation_rate=None, population_size=1)
mn_post, *_ = algorithm.run(
eps=1e-6, outside_standardize=True, probability_space=tsdate.base.LOG
eps=1e-6,
outside_standardize=True,
ignore_oldest_root=False,
probability_space=tsdate.base.LOG,
)
assert np.array_equal(
mn_post[ts.tables.mutations.node],
Expand Down Expand Up @@ -1953,7 +1959,12 @@ def test_sites_time_simulated(self):
algorithm = InsideOutsideMethod(
larger_ts, mutation_rate=None, population_size=10000
)
mn_post, *_ = algorithm.run(eps=1e-6, outside_standardize=True)
mn_post, *_ = algorithm.run(
eps=1e-6,
outside_standardize=True,
ignore_oldest_root=False,
probability_space=tsdate.base.LOG,
)
dated = date(larger_ts, mutation_rate=None, population_size=10000)
assert np.allclose(
mn_post[larger_ts.tables.mutations.node],
Expand Down
53 changes: 32 additions & 21 deletions tsdate/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -768,7 +768,7 @@ def outside_pass(
to convert to probabilities, call posterior.to_probabilities()
Standardizing *during* the outside process may be necessary if there is
overflow, but means that we cannot check the total functional value at each node
overflow, but means that we cannot check the total functional value at each node
Ignoring the oldest root may also be necessary when the oldest root node
causes numerical stability issues.
Expand Down Expand Up @@ -1344,8 +1344,8 @@ def run(
self,
eps,
outside_standardize,
ignore_oldest_root=None,
probability_space=None,
ignore_oldest_root,
probability_space,
num_threads=None,
cache_inside=None,
):
Expand Down Expand Up @@ -1486,8 +1486,10 @@ def maximization(
*,
eps=None,
num_threads=None,
cache_inside=None,
probability_space=None,
# below deliberately undocumented
cache_inside=None,
# Other params documented in `.date()`
**kwargs,
):
"""
Expand Down Expand Up @@ -1529,10 +1531,6 @@ def maximization(
Default: None, treated as 1e-6.
:param int num_threads: The number of threads to use when precalculating likelihoods.
A simpler unthreaded algorithm is used unless this is >= 1. Default: None
:param bool ignore_oldest_root: Should the oldest root in the tree sequence be
ignored in the outside algorithm (if ``"inside_outside"`` is used as the method).
Ignoring outside root provides greater stability when dating tree sequences
inferred from real data. Default: False
:param string probability_space: Should the internal algorithm save
probabilities in "logarithmic" (slower, less liable to to overflow) or
"linear" space (fast, may overflow). Default: None treated as"logarithmic"
Expand Down Expand Up @@ -1569,12 +1567,14 @@ def maximization(
def inside_outside(
tree_sequence,
*,
eps=1e-6,
eps=None,
num_threads=None,
outside_standardize=True,
ignore_oldest_root=False,
cache_inside=False,
outside_standardize=None,
ignore_oldest_root=None,
probability_space=None,
# below deliberately undocumented
cache_inside=False,
# Other params documented in `.date()`
**kwargs,
):
"""
Expand Down Expand Up @@ -1613,13 +1613,20 @@ def inside_outside(
:param ~tskit.TreeSequence tree_sequence: The input tree sequence to be dated.
:param float eps: The error factor in time difference calculations, and the
minimum distance separating parent and child ages in the returned tree sequence.
Default: 1e-6.
Default: None, treated as 1e-6.
:param int num_threads: The number of threads to use when precalculating likelihoods.
A simpler unthreaded algorithm is used unless this is >= 1. Default: None
:param bool outside_standardize: Should the likelihoods be standardized during the
outside step? This can help to avoid numerical under/overflow. Using
unstandardized values is mostly useful for testing (e.g. to obtain, in the
outside step, the total functional value for each node).
Default: None, treated as True.
:param bool ignore_oldest_root: Should the oldest root in the tree sequence be
ignored in the outside algorithm (if ``"inside_outside"`` is used as the method).
Ignoring outside root provides greater stability when dating tree sequences
inferred from real data. Default: False
Ignoring outside root can provide greater stability when dating tree sequences
inferred from real data, in particular if all local trees are assumed to coalesce
in a single "grand MRCA", as in older versions of ``tsinfer``.
Default: None, treated as False.
:param string probability_space: Should the internal algorithm save
probabilities in "logarithmic" (slower, less liable to to overflow) or
"linear" space (fast, may overflow). Default: "logarithmic"
Expand Down Expand Up @@ -1650,6 +1657,10 @@ def inside_outside(
eps = 1e-6
if probability_space is None:
probability_space = base.LOG
if outside_standardize is None:
outside_standardize = True
if ignore_oldest_root is None:
ignore_oldest_root = False
dating_method = InsideOutsideMethod(tree_sequence, **kwargs)
result = dating_method.run(
eps=eps,
Expand Down Expand Up @@ -1759,14 +1770,14 @@ def variational_gamma(
"variational_gamma": variational_gamma,
}
"""
The names of available estimation methods, mapped to the function to carry
out each estimation method. Names can be passed as strings to the
The names of available estimation methods, each mapped to a function to carry
out the appropriate method. Names can be passed as strings to the
:func:`~tsdate.date` function, or each named function can be called directly:
* :func:`tsdate.inside_outside` (empirically better, theoretically problematic)
* :func:`tsdate.maximization` (worse empirically, especially with gamma approximated
priors, but theoretically robust)
* :func:`tsdate.variational_gamma` (variational approximation, empirically most accurate)
* :func:`tsdate.inside_outside`: empirically better, theoretically problematic.
* :func:`tsdate.maximization`: worse empirically, especially with gamma approximated
priors, but theoretically robust
* :func:`tsdate.variational_gamma`: variational approximation, empirically most accurate.
"""


Expand Down

0 comments on commit 34c44b7

Please sign in to comment.