Skip to content

Commit 73e7332

Browse files
committed
TreeSequence.tables returns ImmutableTableCollection
1 parent e66da01 commit 73e7332

19 files changed

+2063
-450
lines changed

.gitignore

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,4 +6,4 @@ python/benchmark/*.html
66
.venv
77
.env
88
.vscode
9-
9+
env

docs/python-api.md

Lines changed: 4 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -64,20 +64,11 @@ The {class}`.TreeSequence` class provides access to underlying numerical
6464
data defined in the {ref}`data model<sec_data_model>` in two ways:
6565

6666
1. Via the {attr}`.TreeSequence.tables` property and the
67-
{ref}`Tables API<sec_tables_api_accessing_table_data>`
67+
{ref}`Tables API<sec_tables_api_accessing_table_data>`.
68+
Since version 1.0 this provides a direct, zero-copy, immutable view of the
69+
underlying memory.
6870
2. Via a set of properties on the ``TreeSequence`` class that provide
69-
direct and efficient access to the underlying memory.
70-
71-
:::{warning}
72-
Accessing table data via {attr}`.TreeSequence.tables` can be very inefficient
73-
at the moment because accessing the `.tables` property incurs a **full copy**
74-
of the data model. While we intend to implement this as a read-only view
75-
in the future, the engineering involved is nontrivial, and so we recommend
76-
using the properties listed here like ``ts.nodes_time`` in favour of
77-
``ts.tables.nodes.time``.
78-
Please see [issue #760](https://github.com/tskit-dev/tskit/issues/760)
79-
for more information.
80-
:::
71+
direct and efficient access to a single array in the underlying memory.
8172

8273

8374
```{eval-rst}

python/CHANGELOG.rst

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,10 @@
44

55
**Breaking Changes**
66

7+
- `TreeSequence.tables` now returns a zero-copy immutable view of the tables.
8+
To get a mutable copy, use `TreeSequence.dump_tables()`.
9+
(:user:`benjeffery`, :pr:`3288`, :issue:`760`)
10+
711
- For a tree seqeunce to be valid mutation parents in the table collection
812
must be correct and consistent with the topology of the tree at each mutation site.
913
``TableCollection.tree_sequence()`` will raise a ``_tskit.LibraryError`` if this

python/lwt_interface/dict_encoding_testlib.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -132,7 +132,7 @@ def verify(self, tables):
132132

133133
def test_simple(self):
134134
ts = msprime.simulate(10, mutation_rate=1, random_seed=2)
135-
self.verify(ts.tables)
135+
self.verify(ts.dump_tables())
136136

137137
def test_empty(self):
138138
tables = tskit.TableCollection(sequence_length=1)
@@ -152,7 +152,7 @@ def test_sequence_length(self):
152152
ts = msprime.simulate(
153153
10, recombination_rate=0.1, mutation_rate=1, length=0.99, random_seed=2
154154
)
155-
self.verify(ts.tables)
155+
self.verify(ts.dump_tables())
156156

157157
def test_migration(self):
158158
pop_configs = [msprime.PopulationConfiguration(5) for _ in range(2)]
@@ -164,7 +164,7 @@ def test_migration(self):
164164
record_migrations=True,
165165
random_seed=1,
166166
)
167-
self.verify(ts.tables)
167+
self.verify(ts.dump_tables())
168168

169169
def test_example(self, tables):
170170
tables.metadata_schema = tskit.MetadataSchema(

python/lwt_interface/test_example_c_module.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ def test_example_receiving():
3535
example_c_module.example_receiving(lwt)
3636

3737
# This tree sequence has one root so we get false
38-
tables = msprime.simulate(10).tables
38+
tables = msprime.simulate(10).dump_tables()
3939
lwt.fromdict(tables.asdict())
4040
assert not example_c_module.example_receiving(lwt)
4141

python/tests/test_genotypes.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -863,7 +863,7 @@ def test_nonbinary_trees(self):
863863
def test_acgt_mutations(self):
864864
ts = msprime.simulate(10, mutation_rate=10)
865865
assert ts.num_sites > 0
866-
tables = ts.tables
866+
tables = ts.dump_tables()
867867
sites = tables.sites
868868
mutations = tables.mutations
869869
sites.set_columns(
@@ -883,23 +883,23 @@ def test_acgt_mutations(self):
883883

884884
def test_fails_multiletter_mutations(self):
885885
ts = msprime.simulate(10, random_seed=2)
886-
tables = ts.tables
886+
tables = ts.dump_tables()
887887
tables.sites.add_row(0, "ACTG")
888888
tsp = tables.tree_sequence()
889889
with pytest.raises(TypeError):
890890
list(tsp.haplotypes())
891891

892892
def test_fails_deletion_mutations(self):
893893
ts = msprime.simulate(10, random_seed=2)
894-
tables = ts.tables
894+
tables = ts.dump_tables()
895895
tables.sites.add_row(0, "")
896896
tsp = tables.tree_sequence()
897897
with pytest.raises(TypeError):
898898
list(tsp.haplotypes())
899899

900900
def test_nonascii_mutations(self):
901901
ts = msprime.simulate(10, random_seed=2)
902-
tables = ts.tables
902+
tables = ts.dump_tables()
903903
tables.sites.add_row(0, chr(169)) # Copyright symbol
904904
tsp = tables.tree_sequence()
905905
with pytest.raises(TypeError):

python/tests/test_highlevel.py

Lines changed: 29 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1292,7 +1292,7 @@ def verify_edgesets(self, ts):
12921292
tskit.Edge(edgeset.left, edgeset.right, edgeset.parent, child)
12931293
)
12941294
# squash the edges.
1295-
t = ts.dump_tables().nodes.time
1295+
t = ts.tables.nodes.time
12961296
new_edges.sort(key=lambda e: (t[e.parent], e.parent, e.child, e.left))
12971297

12981298
squashed = []
@@ -1916,24 +1916,20 @@ def test_load_tables(self, ts):
19161916
with pytest.raises(
19171917
_tskit.LibraryError, match="Table collection must be indexed"
19181918
):
1919-
assert tskit.TreeSequence.load_tables(tables).dump_tables().has_index()
1919+
assert tskit.TreeSequence.load_tables(tables).tables.has_index()
19201920

19211921
# Tables not in tc, but rebuilt
1922-
assert (
1923-
tskit.TreeSequence.load_tables(tables, build_indexes=True)
1924-
.dump_tables()
1925-
.has_index()
1926-
)
1922+
assert tskit.TreeSequence.load_tables(
1923+
tables, build_indexes=True
1924+
).tables.has_index()
19271925

19281926
tables.build_index()
19291927
# Tables in tc, not rebuilt
1930-
assert (
1931-
tskit.TreeSequence.load_tables(tables, build_indexes=False)
1932-
.dump_tables()
1933-
.has_index()
1934-
)
1928+
assert tskit.TreeSequence.load_tables(
1929+
tables, build_indexes=False
1930+
).tables.has_index()
19351931
# Tables in tc, and rebuilt
1936-
assert tskit.TreeSequence.load_tables(tables).dump_tables().has_index()
1932+
assert tskit.TreeSequence.load_tables(tables).tables.has_index()
19371933

19381934
@pytest.mark.parametrize("ts", tsutil.get_example_tree_sequences())
19391935
def test_html_repr(self, ts):
@@ -1958,14 +1954,14 @@ def test_bad_provenance(self, ts_fixture):
19581954
assert "Could not parse provenance" in ts._repr_html_()
19591955

19601956
def test_provenance_summary_html(self, ts_fixture):
1961-
tables = ts_fixture.tables
1957+
tables = ts_fixture.dump_tables()
19621958
for _ in range(20):
19631959
# Add a row with isotimestamp
19641960
tables.provenances.add_row("foo", "bar")
19651961
assert "... 15 more" in tables.tree_sequence()._repr_html_()
19661962

19671963
def test_html_repr_limit(self, ts_fixture):
1968-
tables = ts_fixture.tables
1964+
tables = ts_fixture.dump_tables()
19691965
d = {n: n for n in range(50)}
19701966
d[0] = "N" * 200
19711967
tables.metadata = d
@@ -2656,7 +2652,8 @@ def verify_tables_api_equality(self, ts):
26562652
tables = ts.dump_tables()
26572653
tables.simplify(samples=samples)
26582654
tables.assert_equals(
2659-
ts.simplify(samples=samples).tables, ignore_timestamps=True
2655+
ts.simplify(samples=samples).dump_tables(),
2656+
ignore_timestamps=True,
26602657
)
26612658

26622659
@pytest.mark.parametrize("ts", tsutil.get_example_tree_sequences())
@@ -4012,7 +4009,7 @@ def test_first_last(self):
40124009

40134010
def test_eq_different_tree_sequence(self):
40144011
ts = msprime.simulate(4, recombination_rate=1, length=2, random_seed=42)
4015-
copy = ts.tables.tree_sequence()
4012+
copy = ts.dump_tables().tree_sequence()
40164013
for tree1, tree2 in zip(ts.aslist(), copy.aslist()):
40174014
assert tree1 != tree2
40184015

@@ -5608,6 +5605,21 @@ def test_ragged_array_not_supported(self, column):
56085605
):
56095606
getattr(ts, column)
56105607

5608+
@pytest.mark.skipif(_tskit.HAS_NUMPY_2, reason="Test only on Numpy 1.X")
5609+
def test_tables_emits_warning(self):
5610+
tables = tskit.TableCollection(sequence_length=1)
5611+
ts = tables.tree_sequence()
5612+
5613+
with warnings.catch_warnings(record=True) as caught:
5614+
warnings.simplefilter("always", UserWarning)
5615+
result = ts.tables
5616+
5617+
assert isinstance(result, tskit.TableCollection)
5618+
assert len(caught) == 1
5619+
warning = caught[0]
5620+
assert warning.category is UserWarning
5621+
assert "Immutable table views require tskit" in str(warning.message)
5622+
56115623

56125624
class TestSampleNodesByPloidy:
56135625
@pytest.mark.parametrize(

python/tests/test_ibd.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -275,7 +275,7 @@ def test_within_between_mutually_exclusive(self, ts):
275275

276276
@pytest.mark.parametrize("ts", example_ts())
277277
def test_tables_interface(self, ts):
278-
ibd_tab = ts.tables.ibd_segments(store_segments=True)
278+
ibd_tab = ts.dump_tables().ibd_segments(store_segments=True)
279279
ibd_ts = ts.ibd_segments(store_segments=True)
280280
assert ibd_tab == ibd_ts
281281

0 commit comments

Comments
 (0)