Skip to content

Commit 69fcc18

Browse files
Added cut min date command (gotree prune date --min-date)
1 parent 1b33332 commit 69fcc18

File tree

6 files changed

+191
-45
lines changed

6 files changed

+191
-45
lines changed

cmd/prune.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -150,5 +150,5 @@ func init() {
150150
pruneCmd.Flags().StringVarP(&outtreefile, "output", "o", "stdout", "Output tree")
151151
pruneCmd.Flags().StringVarP(&tipfile, "tipfile", "f", "none", "Tip file")
152152
pruneCmd.Flags().BoolVarP(&revert, "revert", "r", false, "If true, then revert the behavior: will keep only species given in the command line, or keep only the species that are specific to the input tree, or keep only randomly selected taxa")
153-
pruneCmd.PersistentFlags().IntVar(&randomtips, "random", 0, "Number of tips to randomly sample")
153+
pruneCmd.Flags().IntVar(&randomtips, "random", 0, "Number of tips to randomly sample")
154154
}

cmd/prunedate.go

Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,67 @@
1+
package cmd
2+
3+
import (
4+
goio "io"
5+
"os"
6+
7+
"github.com/evolbioinfo/gotree/io"
8+
"github.com/evolbioinfo/gotree/tree"
9+
"github.com/spf13/cobra"
10+
)
11+
12+
var pruneMinDate float64
13+
14+
// resolveCmd represents the resolve command
15+
var pruneDateCmd = &cobra.Command{
16+
Use: "date",
17+
Short: "Cut the input tree by keeping only parts in date window",
18+
Long: `Cut the input tree by keeping only parts in date window.
19+
20+
This command will extract part of the tree corresponding to >= min-date and <= max-date.
21+
22+
If min-date falls on an internal branch, it will create a new root node and will extract a tree starting at this node.
23+
If max-date falls on an internal branch, we do not take this part of the tree, and we remove branches that end into these cases.
24+
25+
`,
26+
RunE: func(cmd *cobra.Command, args []string) (err error) {
27+
var f *os.File
28+
var treefile goio.Closer
29+
var treechan <-chan tree.Trees
30+
var forest []*tree.Tree
31+
32+
if f, err = openWriteFile(outtreefile); err != nil {
33+
io.LogError(err)
34+
return
35+
}
36+
defer closeWriteFile(f, outtreefile)
37+
38+
if treefile, treechan, err = readTrees(intreefile); err != nil {
39+
io.LogError(err)
40+
return
41+
}
42+
defer treefile.Close()
43+
44+
for tr := range treechan {
45+
if tr.Err != nil {
46+
io.LogError(tr.Err)
47+
return tr.Err
48+
}
49+
if forest, err = tr.Tree.CutTreeMinDate(pruneMinDate); err != nil {
50+
io.LogError(err)
51+
return
52+
}
53+
for _, t := range forest {
54+
f.WriteString(t.Newick() + "\n")
55+
}
56+
}
57+
58+
return
59+
},
60+
}
61+
62+
func init() {
63+
pruneCmd.AddCommand(pruneDateCmd)
64+
pruneDateCmd.PersistentFlags().StringVarP(&intreefile, "input", "i", "stdin", "Input tree(s) file")
65+
pruneDateCmd.PersistentFlags().StringVarP(&outtreefile, "output", "o", "stdout", "Forest output file")
66+
pruneDateCmd.PersistentFlags().Float64Var(&pruneMinDate, "min-date", 0, "Minimum date to cut the tree")
67+
}

tree/dates.go

Lines changed: 116 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,9 @@ package tree
22

33
import (
44
"fmt"
5+
"regexp"
56
"sort"
7+
"strconv"
68

79
"github.com/evolbioinfo/gotree/io"
810
)
@@ -13,6 +15,47 @@ type LTTData struct {
1315
Y int // Number of lineages
1416
}
1517

18+
// Get Node dates
19+
// Returns a slice of float correspsponding to all node dates (internal and external)
20+
// Node IDs are their index in the slice.
21+
// If one node does not have date or a malformed date, returns an error
22+
func (t *Tree) NodeDates() (ndates []float64, err error) {
23+
var date float64
24+
var pattern *regexp.Regexp
25+
var matches []string
26+
27+
ndates = make([]float64, 0)
28+
pattern = regexp.MustCompile(`(?i)&date="(.+)"`)
29+
nnodes := 0
30+
t.PreOrder(func(cur *Node, prev *Node, e *Edge) (keep bool) {
31+
keep = true
32+
if cur.Id() != nnodes {
33+
err = fmt.Errorf("node id does not correspond to postorder traversal: %d vs %d", cur.Id(), nnodes)
34+
keep = false
35+
} else if len(cur.Comments()) > 0 {
36+
keep = false
37+
for _, c := range cur.Comments() {
38+
matches = pattern.FindStringSubmatch(c)
39+
if len(matches) < 2 {
40+
err = fmt.Errorf("no date found: %s", c)
41+
} else if date, err = strconv.ParseFloat(matches[1], 64); err != nil {
42+
err = fmt.Errorf("one of the node date is malformed: %s", c)
43+
} else {
44+
ndates = append(ndates, date)
45+
err = nil
46+
keep = true
47+
}
48+
}
49+
} else {
50+
err = fmt.Errorf("a node with no date found")
51+
keep = false
52+
}
53+
nnodes += 1
54+
return
55+
})
56+
return
57+
}
58+
1659
// LTTData describes a Lineage to Time data point
1760
func (t *Tree) LTT() (lttdata []LTTData) {
1861
var lttdatadup []LTTData
@@ -105,3 +148,76 @@ func (t *Tree) RTT() (rttdata []RTTData, err error) {
105148

106149
return
107150
}
151+
152+
// CutTreeMinDate traverses the tree, and only keep subtree starting at the given min date
153+
//
154+
// If a node has the exact same date as mindate: it becomes the root of a new tree
155+
// If a node has a date > mindate and its parent has a date < mindate: a new node is added as a the root of a new tree, with one child, the currrent node.
156+
// The output is a forest
157+
func (t *Tree) CutTreeMinDate(mindate float64) (forest []*Tree, err error) {
158+
var dates []float64
159+
forest = make([]*Tree, 0, 10)
160+
var tmpforest []*Tree
161+
162+
// If the field [&date=] exists, then takes it
163+
// Otherwise, returns an error
164+
if dates, err = t.NodeDates(); err != nil {
165+
io.LogWarning(err)
166+
err = fmt.Errorf("no dates provided in in the tree, of the form &date=")
167+
io.LogWarning(err)
168+
return
169+
}
170+
171+
if tmpforest, err = cutTreeMinDateRecur(t.Root(), nil, nil, mindate, dates); err != nil {
172+
return
173+
}
174+
forest = append(forest, tmpforest...)
175+
176+
return
177+
}
178+
179+
func cutTreeMinDateRecur(cur, prev *Node, e *Edge, mindate float64, dates []float64) (forest []*Tree, err error) {
180+
// We take the branches/nodes >= min-date
181+
var tmptree *Tree
182+
var tmpnode *Node
183+
var tmpedge *Edge
184+
var tmpforest []*Tree
185+
186+
forest = make([]*Tree, 0)
187+
// The current node is at the exact min date: we keep the subtree starting at this node
188+
// And disconnect the current node from its parent
189+
if dates[cur.Id()] == mindate || (prev == nil && dates[cur.Id()] >= mindate) {
190+
tmptree = NewTree()
191+
tmptree.SetRoot(cur)
192+
prev.delNeighbor(cur)
193+
cur.delNeighbor(prev)
194+
tmptree.ReinitIndexes()
195+
forest = append(forest, tmptree)
196+
return
197+
} else if prev != nil && dates[cur.Id()] > mindate && dates[prev.Id()] < mindate {
198+
tmptree = NewTree()
199+
tmpnode = tmptree.NewNode()
200+
tmptree.SetRoot(tmpnode)
201+
prev.delNeighbor(cur)
202+
cur.delNeighbor(prev)
203+
tmpedge = tmptree.ConnectNodes(tmpnode, cur)
204+
tmpnode.AddComment(fmt.Sprintf("&date=\"%f\"", mindate))
205+
tmpedge.SetLength(e.Length() * (dates[cur.Id()] - mindate) / (dates[cur.Id()] - dates[prev.Id()]))
206+
//tmptree.ReinitIndexes()
207+
forest = append(forest, tmptree)
208+
return
209+
}
210+
211+
edges := make([]*Edge, len(cur.Edges()))
212+
copy(edges, cur.Edges())
213+
neigh := make([]*Node, len(cur.neigh))
214+
copy(neigh, cur.neigh)
215+
for i, n := range neigh {
216+
if n != prev {
217+
tmpforest, err = cutTreeMinDateRecur(n, cur, edges[i], mindate, dates)
218+
forest = append(forest, tmpforest...)
219+
}
220+
}
221+
222+
return
223+
}

tree/node.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -231,7 +231,7 @@ func (n *Node) RotateNeighbors() {
231231
// from the current node
232232
func (n *Node) Newick(parent *Node, newick *bytes.Buffer) {
233233
if len(n.neigh) > 0 {
234-
if len(n.neigh) > 1 {
234+
if len(n.neigh) > 1 || parent == nil {
235235
newick.WriteString("(")
236236
}
237237
nbchild := 0
@@ -268,7 +268,7 @@ func (n *Node) Newick(parent *Node, newick *bytes.Buffer) {
268268
nbchild++
269269
}
270270
}
271-
if len(n.neigh) > 1 {
271+
if len(n.neigh) > 1 || parent == nil {
272272
newick.WriteString(")")
273273
}
274274
}

tree/stats.go

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,11 @@ func (t *Tree) MeanSupport() float64 {
7070
func (t *Tree) MedianSupport() float64 {
7171
edges := t.Edges()
7272
tips := t.Tips()
73-
supports := make([]float64, len(edges)-len(tips))
73+
nsup := len(edges) - len(tips)
74+
if nsup < 0 {
75+
nsup = 0
76+
}
77+
supports := make([]float64, nsup)
7478
if len(supports) == 0 {
7579
return math.NaN()
7680
}

tree/tree.go

Lines changed: 0 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -953,47 +953,6 @@ func (t *Tree) computeDepthUnRooted() {
953953
}
954954
}
955955

956-
// Get Node dates
957-
// Returns a slice of float correspsponding to all node dates (internal and external)
958-
// Node IDs are their index in the slice.
959-
// If one node does not have date or a malformed date, returns an error
960-
func (t *Tree) NodeDates() (ndates []float64, err error) {
961-
var date float64
962-
var pattern *regexp.Regexp
963-
var matches []string
964-
965-
ndates = make([]float64, 0)
966-
pattern = regexp.MustCompile(`(?i)&date="(.+)"`)
967-
nnodes := 0
968-
t.PreOrder(func(cur *Node, prev *Node, e *Edge) (keep bool) {
969-
keep = true
970-
if cur.Id() != nnodes {
971-
err = fmt.Errorf("node id does not correspond to postorder traversal: %d vs %d", cur.Id(), nnodes)
972-
keep = false
973-
} else if len(cur.Comments()) > 0 {
974-
keep = false
975-
for _, c := range cur.Comments() {
976-
matches = pattern.FindStringSubmatch(c)
977-
if len(matches) < 2 {
978-
err = fmt.Errorf("no date found: %s", c)
979-
} else if date, err = strconv.ParseFloat(matches[1], 64); err != nil {
980-
err = fmt.Errorf("one of the node date is malformed: %s", c)
981-
} else {
982-
ndates = append(ndates, date)
983-
err = nil
984-
keep = true
985-
}
986-
}
987-
} else {
988-
err = fmt.Errorf("a node with no date found")
989-
keep = false
990-
}
991-
nnodes += 1
992-
return
993-
})
994-
return
995-
}
996-
997956
// Computes distance of all nodes to root / pseudo root.
998957
// Indices of the nodes in the rdists slice correspond to node ids
999958
func (t *Tree) NodeRootDistance() (rdists []float64) {

0 commit comments

Comments
 (0)