diff --git a/changelog.md b/changelog.md index 61bdfe21..50a9f0d4 100644 --- a/changelog.md +++ b/changelog.md @@ -1,3 +1,16 @@ +# 0.7.3 -- bug fix in average rate calculation + +This release fixes a problem that surfaced when inferring GTR models from trees of very similar sequences but quite a few gaps. This resulted in mutation counts like so: + +A: [[ 0. 1. 8. 3. 0.] +C: [ 1. 0. 2. 7. 0.] +G: [ 9. 0. 0. 2. 0.] +T: [ 1. 23. 6. 0. 0.] +-: [46. 22. 28. 38. 0.]] + +As a result, the rate "to gap" is inferred quite high, while the equilibrium gap fraction is low. Since we cap the equilibrium gap fraction from below to avoid reconstruction problems when branches are very short, this resulted in an average rate that had substantial contribution from and assumed 1% equilibrum gap frequency where gaps mutate at 20times the rate as others. Since gaps are ignored in distance calculations anyway, it is more sensible to exclude these transitions from the calculation of the average rate. This is now happening in line 7 of treetime/gtr.py. The average rate is restricted to mutation substitutions from non-gap states to any state. + + # 0.7.2 -- weights in discrete trait reconstruction This release implements a more consistent handling of weights (fixed equilibrium frequencies) in discrete state reconstruction. It also fixes a number of problems in who the arguments were processed. diff --git a/treetime/__init__.py b/treetime/__init__.py index 75371395..a5d7f8e1 100644 --- a/treetime/__init__.py +++ b/treetime/__init__.py @@ -1,5 +1,5 @@ from __future__ import print_function, division, absolute_import -version="0.7.2" +version="0.7.3" class TreeTimeError(Exception): """TreeTimeError class""" diff --git a/treetime/wrappers.py b/treetime/wrappers.py index cf5c2a02..206cc9ac 100644 --- a/treetime/wrappers.py +++ b/treetime/wrappers.py @@ -738,6 +738,7 @@ def reconstruct_discrete_traits(tree, traits, missing_data='?', pc=1.0, sampling raise TreeTimeError("More than half of discrete states missing from the weights file") unique_states=sorted(unique_states) + # note that gap character '-' is chr(45) and will never be included here alphabet = [chr(65+i) for i,state in enumerate(unique_states) if state!=missing_data] letter_to_state = {a:unique_states[i] for i,a in enumerate(alphabet)}