Skip to content

Commit

Permalink
Merge pull request #348 from hyanwong/fix-provenance
Browse files Browse the repository at this point in the history
Fix provenance
  • Loading branch information
hyanwong authored Dec 9, 2023
2 parents de9b00e + 41dfa3a commit cbd3d68
Show file tree
Hide file tree
Showing 3 changed files with 15 additions and 3 deletions.
5 changes: 4 additions & 1 deletion docs/usage.md
Original file line number Diff line number Diff line change
Expand Up @@ -241,7 +241,10 @@ for a tree sequence of 5 million edges covering


:::{todo}
Add some scaling plots.
Add some scaling plots. Some real-world examples: a dataset of 10K samples of half a
million sites (~4M edges) on one node of a
2023 Intel Platinum cluster takes ~30 mins (20GB max memory) for the `inside-outside`
method and ~10 mins (1.5GB max memory) using the `variational_gamma` method.
:::

Running the dating algorithm is linear in the number of edges in the tree sequence.
Expand Down
9 changes: 8 additions & 1 deletion tests/test_provenance.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,9 @@ def test_bad_get_dict(self):

def test_date_cmd_recorded(self):
ts = utility_functions.single_tree_ts_n2()
num_provenances = ts.num_provenances
dated_ts = tsdate.date(ts, population_size=1, mutation_rate=None)
assert dated_ts.num_provenances == num_provenances + 1
rec = json.loads(dated_ts.provenance(-1).record)
assert rec["software"]["name"] == "tsdate"
assert rec["parameters"]["command"] == "date"
Expand Down Expand Up @@ -83,26 +85,31 @@ def test_date_popsizehist_recorded(self, popdict):

def test_preprocess_cmd_recorded(self):
ts = utility_functions.ts_w_data_desert(40, 60, 100)
num_provenances = ts.num_provenances
preprocessed_ts = tsdate.preprocess_ts(ts)
assert preprocessed_ts.num_provenances == num_provenances + 1
rec = json.loads(preprocessed_ts.provenance(-1).record)
assert rec["software"]["name"] == "tsdate"
assert rec["parameters"]["command"] == "preprocess_ts"

def test_preprocess_defaults_recorded(self):
ts = utility_functions.ts_w_data_desert(40, 60, 100)
num_provenances = ts.num_provenances
preprocessed_ts = tsdate.preprocess_ts(ts)
assert preprocessed_ts.num_provenances == num_provenances + 1
rec = json.loads(preprocessed_ts.provenance(-1).record)
assert rec["parameters"]["remove_telomeres"]
assert rec["parameters"]["minimum_gap"] == 1000000
assert rec["parameters"]["delete_intervals"] == []

def test_preprocess_interval_recorded(self):
ts = utility_functions.ts_w_data_desert(40, 60, 100)
num_provenances = ts.num_provenances
preprocessed_ts = tsdate.preprocess_ts(
ts, minimum_gap=20, remove_telomeres=False
)
assert preprocessed_ts.num_provenances == num_provenances + 1
rec = json.loads(preprocessed_ts.provenance(-1).record)
print(rec)
assert rec["parameters"]["minimum_gap"] == 20
assert rec["parameters"]["remove_telomeres"] is not None
assert not rec["parameters"]["remove_telomeres"]
Expand Down
4 changes: 3 additions & 1 deletion tsdate/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -148,7 +148,9 @@ def preprocess_ts(
delete_intervals.append([gap_start, gap_end])
delete_intervals = sorted(delete_intervals, key=lambda x: x[0])
if len(delete_intervals) > 0:
tables.delete_intervals(delete_intervals, simplify=False)
tables.delete_intervals(
delete_intervals, simplify=False, record_provenance=False
)
tables.simplify(
filter_populations=filter_populations,
filter_individuals=filter_individuals,
Expand Down

0 comments on commit cbd3d68

Please sign in to comment.