Skip to content

Commit aa38338

Browse files
GertjanBisschopmergify[bot]
authored andcommitted
Edge array Python API
1 parent fb67c48 commit aa38338

File tree

8 files changed

+87
-5
lines changed

8 files changed

+87
-5
lines changed

docs/data-model.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -906,7 +906,7 @@ from IPython.display import HTML
906906
def html_quintuple_table(ts, show_virtual_root=False, show_convenience_arrays=False):
907907
tree = ts.first()
908908
columns = ["node", "parent", "left_child", "right_child", "left_sib", "right_sib"]
909-
convenience_arrays = ["num_children"]
909+
convenience_arrays = ["num_children", "edge"]
910910
if show_convenience_arrays:
911911
columns += convenience_arrays
912912
data = {k:[] for k in columns}
@@ -965,7 +965,7 @@ information on each node in the tree. These arrays are not essential to
965965
represent the trees within a treesequence. However, they can be useful for
966966
specific algorithms (e.g. when computing tree (im)balance metrics). The
967967
convience arrays that have been implemented are:
968-
{attr}`Tree.num_children_array`.
968+
{attr}`Tree.num_children_array`, {attr}`Tree.edge_array`.
969969

970970
Adding convenience arrays to the example above results in this table:
971971

docs/python-api.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -445,6 +445,7 @@ Node information
445445
Tree.right_child
446446
Tree.left_child
447447
Tree.children
448+
Tree.edge
448449
449450
Descendant nodes
450451
.. autosummary::
@@ -479,6 +480,7 @@ high performance interface which can be used in conjunction with the equivalent
479480
Tree.left_sib_array
480481
Tree.right_sib_array
481482
Tree.num_children_array
483+
Tree.edge_array
482484
```
483485

484486

python/CHANGELOG.rst

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,10 @@
7272
and maintainability. Please open an issue if this affects your application.
7373
(:user:`jeromekelleher`, :user:`benjeffery`, :pr:`2120`).
7474

75+
- Add ``Tree.edge_array`` and ``Tree.edge``. Returns the edge id of the edge encoding
76+
the relationship of each node with its parent.
77+
(:user:`GertjanBisschop`, :issue:`2361`, :pr:`2357`)
78+
7579
**Breaking Changes**
7680

7781
- The JSON metadata codec now interprets the empty string as an empty object. This means

python/_tskitmodule.c

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10311,6 +10311,22 @@ Tree_get_right_sib(Tree *self, PyObject *args)
1031110311
return ret;
1031210312
}
1031310313

10314+
static PyObject *
10315+
Tree_get_edge(Tree *self, PyObject *args)
10316+
{
10317+
PyObject *ret = NULL;
10318+
tsk_id_t edge_id;
10319+
int node;
10320+
10321+
if (Tree_get_node_argument(self, args, &node) != 0) {
10322+
goto out;
10323+
}
10324+
edge_id = self->tree->edge[node];
10325+
ret = Py_BuildValue("i", (int) edge_id);
10326+
out:
10327+
return ret;
10328+
}
10329+
1031410330
static PyObject *
1031510331
Tree_get_children(Tree *self, PyObject *args)
1031610332
{
@@ -11052,6 +11068,19 @@ Tree_get_num_children_array(Tree *self, void *closure)
1105211068
return ret;
1105311069
}
1105411070

11071+
static PyObject *
11072+
Tree_get_edge_array(Tree *self, void *closure)
11073+
{
11074+
PyObject *ret = NULL;
11075+
11076+
if (Tree_check_state(self) != 0) {
11077+
goto out;
11078+
}
11079+
ret = Tree_make_array(self, NPY_INT32, self->tree->edge);
11080+
out:
11081+
return ret;
11082+
}
11083+
1105511084
static PyGetSetDef Tree_getsetters[]
1105611085
= { { .name = "parent_array",
1105711086
.get = (getter) Tree_get_parent_array,
@@ -11071,6 +11100,9 @@ static PyGetSetDef Tree_getsetters[]
1107111100
{ .name = "num_children_array",
1107211101
.get = (getter) Tree_get_num_children_array,
1107311102
.doc = "The num_children array in the quintuply linked tree." },
11103+
{ .name = "edge_array",
11104+
.get = (getter) Tree_get_edge_array,
11105+
.doc = "The edge array in the quintuply linked tree." },
1107411106
{ NULL } };
1107511107

1107611108
static PyMethodDef Tree_methods[] = {
@@ -11182,6 +11214,10 @@ static PyMethodDef Tree_methods[] = {
1118211214
.ml_meth = (PyCFunction) Tree_get_right_sib,
1118311215
.ml_flags = METH_VARARGS,
1118411216
.ml_doc = "Returns the right-most sib of node u" },
11217+
{ .ml_name = "get_edge",
11218+
.ml_meth = (PyCFunction) Tree_get_edge,
11219+
.ml_flags = METH_VARARGS,
11220+
.ml_doc = "Returns the edge id connecting node u to its parent" },
1118511221
{ .ml_name = "get_children",
1118611222
.ml_meth = (PyCFunction) Tree_get_children,
1118711223
.ml_flags = METH_VARARGS,

python/tests/test_highlevel.py

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2364,7 +2364,8 @@ def modify(ts, func):
23642364
def test_tree_node_edges(self):
23652365
for ts in get_example_tree_sequences():
23662366
edge_visited = np.zeros(ts.num_edges, dtype=bool)
2367-
for mapping, tree in zip(ts._tree_node_edges(), ts.trees()):
2367+
for tree in ts.trees():
2368+
mapping = tree.edge_array
23682369
node_mapped = mapping >= 0
23692370
edge_visited[mapping[node_mapped]] = True
23702371
# Note that tree.nodes() does not necessarily list all the nodes
@@ -3711,13 +3712,15 @@ def verify_tree_arrays(self, tree):
37113712
assert tree.left_sib_array.shape == (N,)
37123713
assert tree.right_sib_array.shape == (N,)
37133714
assert tree.num_children_array.shape == (N,)
3715+
assert tree.edge_array.shape == (N,)
37143716
for u in range(N):
37153717
assert tree.parent(u) == tree.parent_array[u]
37163718
assert tree.left_child(u) == tree.left_child_array[u]
37173719
assert tree.right_child(u) == tree.right_child_array[u]
37183720
assert tree.left_sib(u) == tree.left_sib_array[u]
37193721
assert tree.right_sib(u) == tree.right_sib_array[u]
37203722
assert tree.num_children(u) == tree.num_children_array[u]
3723+
assert tree.edge(u) == tree.edge_array[u]
37213724

37223725
def verify_tree_arrays_python_ts(self, ts):
37233726
pts = tests.PythonTreeSequence(ts)
@@ -3730,6 +3733,7 @@ def verify_tree_arrays_python_ts(self, ts):
37303733
assert np.all(st1.left_sib_array == st2.left_sib)
37313734
assert np.all(st1.right_sib_array == st2.right_sib)
37323735
assert np.all(st1.num_children_array == st2.num_children)
3736+
assert np.all(st1.edge_array == st2.edge)
37333737

37343738
def test_tree_arrays(self):
37353739
ts = msprime.simulate(10, recombination_rate=1, random_seed=1)
@@ -3747,6 +3751,7 @@ def test_tree_arrays(self):
37473751
"left_sib",
37483752
"right_sib",
37493753
"num_children",
3754+
"edge",
37503755
],
37513756
)
37523757
def test_tree_array_properties(self, array):
@@ -3770,6 +3775,7 @@ def verify_empty_tree(self, tree):
37703775
assert tree.left_child(u) == tskit.NULL
37713776
assert tree.right_child(u) == tskit.NULL
37723777
assert tree.num_children(u) == 0
3778+
assert tree.edge(u) == tskit.NULL
37733779
if not ts.node(u).is_sample():
37743780
assert tree.left_sib(u) == tskit.NULL
37753781
assert tree.right_sib(u) == tskit.NULL
@@ -3817,6 +3823,7 @@ def verify_trees_identical(self, t1, t2):
38173823
assert np.all(t1.left_sib_array == t2.left_sib_array)
38183824
assert np.all(t1.right_sib_array == t2.right_sib_array)
38193825
assert np.all(t1.num_children_array == t2.num_children_array)
3826+
assert np.all(t1.edge_array == t2.edge_array)
38203827
assert list(t1.sites()) == list(t2.sites())
38213828

38223829
def test_copy_seek(self):
@@ -3924,7 +3931,8 @@ def test_node_edges(self):
39243931
for tree in ts.trees():
39253932
nodes = set(tree.nodes())
39263933
midpoint = sum(tree.interval) / 2
3927-
mapping = tree._node_edges()
3934+
# mapping = tree._node_edges()
3935+
mapping = tree.edge_array
39283936
for node, edge in enumerate(mapping):
39293937
if node in nodes and tree.parent(node) != tskit.NULL:
39303938
edge_above_node = np.where(

python/tests/test_lowlevel.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2684,6 +2684,7 @@ class TestTree(LowLevelTestCase):
26842684
"left_sib",
26852685
"right_sib",
26862686
"num_children",
2687+
"edge",
26872688
]
26882689

26892690
def test_options(self):
@@ -3036,6 +3037,7 @@ def check_tree(tree):
30363037
assert tree.get_left_child(u) == _tskit.NULL
30373038
assert tree.get_right_child(u) == _tskit.NULL
30383039
assert tree.get_num_children(u) == 0
3040+
assert tree.get_edge(u) == _tskit.NULL
30393041

30403042
tree = _tskit.Tree(ts)
30413043
check_tree(tree)

python/tskit/drawing.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1174,7 +1174,7 @@ def __init__(
11741174
tree_right = tree.interval.right
11751175
edge_left = ts.tables.edges.left
11761176
edge_right = ts.tables.edges.right
1177-
node_edges = tree._node_edges()
1177+
node_edges = tree.edge_array
11781178
# whittle mutations down so we only need look at those above the tree nodes
11791179
mut_t = ts.tables.mutations
11801180
focal_mutations = np.isin(mut_t.node, np.fromiter(nodes, mut_t.node.dtype))

python/tskit/trees.py

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -689,6 +689,7 @@ def _make_arrays(self):
689689
self._left_sib_array = self._ll_tree.left_sib_array
690690
self._right_sib_array = self._ll_tree.right_sib_array
691691
self._num_children_array = self._ll_tree.num_children_array
692+
self._edge_array = self._ll_tree.edge_array
692693

693694
@property
694695
def tree_sequence(self):
@@ -1199,6 +1200,35 @@ def num_children_array(self):
11991200
"""
12001201
return self._num_children_array
12011202

1203+
def edge(self, u):
1204+
"""
1205+
Returns the id of the edge encoding the relationship between ``u``
1206+
and its parent, or :data:`tskit.NULL` if ``u`` is a root, virtual root
1207+
or is not a node in the current tree.
1208+
1209+
:param int u: The node of interest.
1210+
:return: Id of edge connecting u to its parent.
1211+
:rtype: int
1212+
"""
1213+
return self._ll_tree.get_edge(u)
1214+
1215+
@property
1216+
def edge_array(self):
1217+
"""
1218+
A numpy array (dtype=np.int32) of edge ids encoding the relationship
1219+
between the child node ``u`` and its parent, such that
1220+
``tree.edge_array[u] == tree.edge(u)`` for all
1221+
``0 <= u <= ts.num_nodes``. See the :meth:`~.edge`
1222+
method for details on the semantics of tree edge and the
1223+
:ref:`sec_data_model_tree_structure` section for information on the
1224+
quintuply linked tree encoding.
1225+
1226+
.. include:: substitutions/virtual_root_array_note.rst
1227+
1228+
.. include:: substitutions/tree_array_warning.rst
1229+
"""
1230+
return self._edge_array
1231+
12021232
# Sample list.
12031233

12041234
def left_sample(self, u):

0 commit comments

Comments
 (0)