diff --git a/docs/methods.md b/docs/methods.md index ce2d3158..ee471c35 100644 --- a/docs/methods.md +++ b/docs/methods.md @@ -46,6 +46,12 @@ input_ts = tskit.load("data/basic_example.trees") ts = tsdate.date(input_ts, method="variational_gamma", population_size=100, mutation_rate=1e-8) ``` +Alternatively each method can be called directly as a separate function: + +```{code-cell} ipython3 +ts = tsdate.variational_gamma(input_ts, population_size=100, mutation_rate=1e-8) +``` + Currently the default is `inside_outside`, but this may change in future releases. diff --git a/tests/test_functions.py b/tests/test_functions.py index 776db07b..cbf5da68 100644 --- a/tests/test_functions.py +++ b/tests/test_functions.py @@ -1630,7 +1630,10 @@ def test_node_metadata_simulated_tree(self): larger_ts, mutation_rate=None, population_size=10000 ) mn_post, *_ = algorithm.run( - eps=1e-6, outside_standardize=True, probability_space=tsdate.base.LOG + eps=1e-6, + outside_standardize=True, + ignore_oldest_root=False, + probability_space=tsdate.base.LOG, ) dated_ts = date(larger_ts, population_size=10000, mutation_rate=None) metadata = dated_ts.tables.nodes.metadata @@ -1846,7 +1849,10 @@ def test_sites_time_insideoutside(self): dated = tsdate.date(ts, mutation_rate=None, population_size=1) algorithm = InsideOutsideMethod(ts, mutation_rate=None, population_size=1) mn_post, *_ = algorithm.run( - eps=1e-6, outside_standardize=True, probability_space=tsdate.base.LOG + eps=1e-6, + outside_standardize=True, + ignore_oldest_root=False, + probability_space=tsdate.base.LOG, ) assert np.array_equal( mn_post[ts.tables.mutations.node], @@ -1953,7 +1959,12 @@ def test_sites_time_simulated(self): algorithm = InsideOutsideMethod( larger_ts, mutation_rate=None, population_size=10000 ) - mn_post, *_ = algorithm.run(eps=1e-6, outside_standardize=True) + mn_post, *_ = algorithm.run( + eps=1e-6, + outside_standardize=True, + ignore_oldest_root=False, + probability_space=tsdate.base.LOG, + ) dated = date(larger_ts, mutation_rate=None, population_size=10000) assert np.allclose( mn_post[larger_ts.tables.mutations.node], diff --git a/tsdate/core.py b/tsdate/core.py index 6ce00b2d..1dd36fce 100644 --- a/tsdate/core.py +++ b/tsdate/core.py @@ -768,7 +768,7 @@ def outside_pass( to convert to probabilities, call posterior.to_probabilities() Standardizing *during* the outside process may be necessary if there is - overflow, but means that we cannot check the total functional value at each node + overflow, but means that we cannot check the total functional value at each node Ignoring the oldest root may also be necessary when the oldest root node causes numerical stability issues. @@ -1344,8 +1344,8 @@ def run( self, eps, outside_standardize, - ignore_oldest_root=None, - probability_space=None, + ignore_oldest_root, + probability_space, num_threads=None, cache_inside=None, ): @@ -1486,8 +1486,10 @@ def maximization( *, eps=None, num_threads=None, - cache_inside=None, probability_space=None, + # below deliberately undocumented + cache_inside=None, + # Other params documented in `.date()` **kwargs, ): """ @@ -1529,10 +1531,6 @@ def maximization( Default: None, treated as 1e-6. :param int num_threads: The number of threads to use when precalculating likelihoods. A simpler unthreaded algorithm is used unless this is >= 1. Default: None - :param bool ignore_oldest_root: Should the oldest root in the tree sequence be - ignored in the outside algorithm (if ``"inside_outside"`` is used as the method). - Ignoring outside root provides greater stability when dating tree sequences - inferred from real data. Default: False :param string probability_space: Should the internal algorithm save probabilities in "logarithmic" (slower, less liable to to overflow) or "linear" space (fast, may overflow). Default: None treated as"logarithmic" @@ -1569,12 +1567,14 @@ def maximization( def inside_outside( tree_sequence, *, - eps=1e-6, + eps=None, num_threads=None, - outside_standardize=True, - ignore_oldest_root=False, - cache_inside=False, + outside_standardize=None, + ignore_oldest_root=None, probability_space=None, + # below deliberately undocumented + cache_inside=False, + # Other params documented in `.date()` **kwargs, ): """ @@ -1613,13 +1613,20 @@ def inside_outside( :param ~tskit.TreeSequence tree_sequence: The input tree sequence to be dated. :param float eps: The error factor in time difference calculations, and the minimum distance separating parent and child ages in the returned tree sequence. - Default: 1e-6. + Default: None, treated as 1e-6. :param int num_threads: The number of threads to use when precalculating likelihoods. A simpler unthreaded algorithm is used unless this is >= 1. Default: None + :param bool outside_standardize: Should the likelihoods be standardized during the + outside step? This can help to avoid numerical under/overflow. Using + unstandardized values is mostly useful for testing (e.g. to obtain, in the + outside step, the total functional value for each node). + Default: None, treated as True. :param bool ignore_oldest_root: Should the oldest root in the tree sequence be ignored in the outside algorithm (if ``"inside_outside"`` is used as the method). - Ignoring outside root provides greater stability when dating tree sequences - inferred from real data. Default: False + Ignoring outside root can provide greater stability when dating tree sequences + inferred from real data, in particular if all local trees are assumed to coalesce + in a single "grand MRCA", as in older versions of ``tsinfer``. + Default: None, treated as False. :param string probability_space: Should the internal algorithm save probabilities in "logarithmic" (slower, less liable to to overflow) or "linear" space (fast, may overflow). Default: "logarithmic" @@ -1650,6 +1657,10 @@ def inside_outside( eps = 1e-6 if probability_space is None: probability_space = base.LOG + if outside_standardize is None: + outside_standardize = True + if ignore_oldest_root is None: + ignore_oldest_root = False dating_method = InsideOutsideMethod(tree_sequence, **kwargs) result = dating_method.run( eps=eps, @@ -1759,14 +1770,14 @@ def variational_gamma( "variational_gamma": variational_gamma, } """ -The names of available estimation methods, mapped to the function to carry -out each estimation method. Names can be passed as strings to the +The names of available estimation methods, each mapped to a function to carry +out the appropriate method. Names can be passed as strings to the :func:`~tsdate.date` function, or each named function can be called directly: -* :func:`tsdate.inside_outside` (empirically better, theoretically problematic) -* :func:`tsdate.maximization` (worse empirically, especially with gamma approximated - priors, but theoretically robust) -* :func:`tsdate.variational_gamma` (variational approximation, empirically most accurate) +* :func:`tsdate.inside_outside`: empirically better, theoretically problematic. +* :func:`tsdate.maximization`: worse empirically, especially with gamma approximated + priors, but theoretically robust +* :func:`tsdate.variational_gamma`: variational approximation, empirically most accurate. """