Skip to content

Commit 812af67

Browse files
authored
Merge pull request #2401 from benjeffery/variant_samples_segfault
Copy samples when copying a variant to fix segfault when copy.samples is accessed
2 parents dc45651 + e2ec4d0 commit 812af67

File tree

6 files changed

+66
-7
lines changed

6 files changed

+66
-7
lines changed

c/CHANGELOG.rst

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,9 @@
1717
- Reduce the maximum number of rows in a table by 1. This removes edge cases so that a ``tsk_id_t`` can be
1818
used to count the number of rows. (:user:`benjeffery`, :issue:`2336`, :pr:`2337`)
1919

20+
- Samples are now copied by ``tsk_variant_restricted_copy``. (:user:`benjeffery`, :issue:`2400`, :pr:`XXXX`)
21+
22+
2023
--------------------
2124
[1.0.0] - 2022-05-24
2225
--------------------

c/tests/test_genotypes.c

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -979,14 +979,29 @@ test_variant_copy(void)
979979
ret = tsk_variant_decode(&var_copy, 0, 0);
980980
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_VARIANT_CANT_DECODE_COPY);
981981

982+
CU_ASSERT_EQUAL(
983+
0, memcmp(var.tree_sequence, var.tree_sequence, sizeof(*var.tree_sequence)));
984+
CU_ASSERT_EQUAL(0, memcmp(&var.tree, &var_copy.tree, sizeof(tsk_tree_t)));
982985
CU_ASSERT_EQUAL(0, memcmp(&var.site, &var_copy.site, sizeof(tsk_site_t)));
983-
CU_ASSERT_EQUAL(0, memcmp(var.genotypes, var_copy.genotypes, sizeof(int32_t)));
986+
CU_ASSERT_EQUAL(0, memcmp(var.genotypes, var_copy.genotypes,
987+
sizeof(int32_t) * (size_t) var.num_samples));
988+
CU_ASSERT_EQUAL(var.num_alleles, var_copy.num_alleles);
984989
CU_ASSERT_EQUAL(
985990
0, memcmp(var.allele_lengths, var_copy.allele_lengths, sizeof(tsk_size_t)));
986991
for (j = 0; j < var.num_alleles; j++) {
987992
CU_ASSERT_EQUAL(0,
988993
memcmp(var.alleles[j], var_copy.alleles[j], (size_t) var.allele_lengths[j]));
989994
}
995+
CU_ASSERT_EQUAL(var.has_missing_data, var_copy.has_missing_data);
996+
CU_ASSERT_EQUAL(var.num_alleles, var_copy.num_alleles);
997+
CU_ASSERT_EQUAL(var.num_samples, var_copy.num_samples);
998+
CU_ASSERT_EQUAL(0, memcmp(var.samples, var_copy.samples,
999+
sizeof(*var.samples) * (size_t) var.num_samples));
1000+
1001+
CU_ASSERT_EQUAL(var_copy.traversal_stack, NULL);
1002+
CU_ASSERT_EQUAL(var_copy.sample_index_map, NULL);
1003+
CU_ASSERT_EQUAL(var_copy.alt_samples, NULL);
1004+
CU_ASSERT_EQUAL(var_copy.alt_sample_index_map, NULL);
9901005

9911006
tsk_variant_free(&var);
9921007
tsk_variant_free(&var_copy);

c/tskit/genotypes.c

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -598,17 +598,20 @@ tsk_variant_restricted_copy(const tsk_variant_t *self, tsk_variant_t *other)
598598
for (j = 0; j < self->num_alleles; j++) {
599599
total_len += self->allele_lengths[j];
600600
}
601+
other->samples = tsk_malloc(other->num_samples * sizeof(tsk_id_t));
601602
other->genotypes = tsk_malloc(other->num_samples * sizeof(int32_t));
602603
other->user_alleles_mem = tsk_malloc(total_len * sizeof(char *));
603604
other->allele_lengths
604605
= tsk_malloc(other->num_alleles * sizeof(*other->allele_lengths));
605606
other->alleles = tsk_malloc(other->num_alleles * sizeof(*other->alleles));
606-
if (other->genotypes == NULL || other->user_alleles_mem == NULL
607-
|| other->allele_lengths == NULL || other->alleles == NULL) {
607+
if (other->samples == NULL || other->genotypes == NULL
608+
|| other->user_alleles_mem == NULL || other->allele_lengths == NULL
609+
|| other->alleles == NULL) {
608610
ret = TSK_ERR_NO_MEMORY;
609611
goto out;
610612
}
611-
613+
tsk_memcpy(
614+
other->samples, self->samples, other->num_samples * sizeof(*other->samples));
612615
tsk_memcpy(other->genotypes, self->genotypes, other->num_samples * sizeof(int32_t));
613616
tsk_memcpy(other->allele_lengths, self->allele_lengths,
614617
other->num_alleles * sizeof(*other->allele_lengths));
@@ -619,6 +622,7 @@ tsk_variant_restricted_copy(const tsk_variant_t *self, tsk_variant_t *other)
619622
other->alleles[j] = other->user_alleles_mem + offset;
620623
offset += other->allele_lengths[j];
621624
}
625+
622626
out:
623627
return ret;
624628
}

python/CHANGELOG.rst

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,12 @@
22
[0.5.1] - 2022-0X-XX
33
--------------------
44

5+
**Fixes**
6+
7+
- Copies of a `Variant` object would cause a segfault when ``.samples`` was accessed.
8+
(:user:`benjeffery`, :issue:`2400`, :pr:`XXXX`)
9+
10+
511
**Changes**
612

713
- Tables in a table collection can be replaced using the replace_with method

python/tests/test_genotypes.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -359,6 +359,8 @@ def test_samples(self):
359359
for var1, var2 in zip(ts.variants(), ts.variants(samples=s)):
360360
assert var1.site == var2.site
361361
assert var1.alleles == var2.alleles
362+
assert np.array_equal(var1.samples, ts.samples())
363+
assert np.array_equal(var2.samples, s)
362364
assert var2.genotypes.shape == (len(s),)
363365
assert np.array_equal(var1.genotypes[s], var2.genotypes)
364366
count += 1

python/tests/test_lowlevel.py

Lines changed: 32 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2414,13 +2414,37 @@ def test_variants_lifecycle(self):
24142414
del variant
24152415
assert np.array_equal(genotypes, expected)
24162416

2417-
def test_copy(self):
2417+
@pytest.mark.parametrize("isolated_as_missing", [True, False])
2418+
@pytest.mark.parametrize("samples", [None, [], (0,)])
2419+
@pytest.mark.parametrize("alleles", [None, ("1", "0")])
2420+
def test_copy(self, isolated_as_missing, samples, alleles):
24182421
ts = self.get_example_tree_sequence(random_seed=42)
2419-
variant = _tskit.Variant(ts)
2422+
variant = _tskit.Variant(
2423+
ts,
2424+
isolated_as_missing=isolated_as_missing,
2425+
samples=samples,
2426+
alleles=alleles,
2427+
)
2428+
2429+
# Test taking a copy before decode
2430+
variant2 = variant.restricted_copy()
2431+
assert variant.site_id == variant2.site_id
2432+
assert variant.alleles == variant2.alleles
2433+
print(variant.alleles)
2434+
assert np.array_equal(variant.genotypes, variant2.genotypes)
2435+
assert np.array_equal(variant.samples, variant2.samples)
2436+
assert variant.isolated_as_missing == variant2.isolated_as_missing
2437+
24202438
variant.decode(0)
24212439
# Everything below should work even if the Python ts is free'd
24222440
del ts
24232441
variant2 = variant.restricted_copy()
2442+
assert variant.site_id == variant2.site_id
2443+
assert variant.alleles == variant2.alleles
2444+
assert np.array_equal(variant.genotypes, variant2.genotypes)
2445+
assert np.array_equal(variant.samples, variant2.samples)
2446+
assert variant.isolated_as_missing == variant2.isolated_as_missing
2447+
24242448
# Take a copy for comparison, then move the variant to check the copy
24252449
# doesn't move too
24262450
genotypes = variant.genotypes
@@ -2434,13 +2458,18 @@ def test_copy(self):
24342458
variant2.decode(1)
24352459
assert site_id == variant2.site_id
24362460
assert alleles == variant2.alleles
2461+
# Other properties shouldn't have changed
2462+
assert np.array_equal(variant.samples, variant2.samples)
2463+
assert variant.isolated_as_missing == variant2.isolated_as_missing
24372464

24382465
# Variant should be equal to the copy we took earlier
24392466
assert np.array_equal(genotypes_copy, variant2.genotypes)
24402467
# But not equal to the un-copies genotypes anymore as they
24412468
# have decoded a new site as a side effect of reusing the
24422469
# array when decoding
2443-
assert not np.array_equal(genotypes, variant2.genotypes)
2470+
assert len(variant.samples) == 0 or not np.array_equal(
2471+
genotypes, variant2.genotypes
2472+
)
24442473

24452474
# Check the lifecycle of copies and copies of copies
24462475
del variant

0 commit comments

Comments
 (0)