Skip to content

Commit

Permalink
Handle missing parent B in non-CP populations
Browse files Browse the repository at this point in the history
  • Loading branch information
tomkurowski committed Oct 25, 2022
1 parent 43bbc34 commit d294984
Show file tree
Hide file tree
Showing 2 changed files with 28 additions and 9 deletions.
34 changes: 26 additions & 8 deletions src/site_to_marker.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
"""Functions for generating a JmMarker object based on a VariantSite object.
"""

from typing import Dict, List
from typing import Dict, List, Union

from src.variantsite import VariantSite
from src.jmmarker import JmMarker
Expand All @@ -20,10 +20,14 @@ def is_potential_marker(
To serve as a useful marker, each of the parents needs to have one of a
set of known genotype calls at the site.
For the F2 and RIx population types, if both parents have a genotype call at
the site, both calls have to be homozygous for different alleles. If only
parent A has a genotype call, it has to be homozygous and at least one
genotype call in the population has to be different from the parent A call.
For the CP population type, at least one of the parents must have
a heterozygous genotype, while the other can have either a homozygous or
a heterozygous genotype. For other population types, both parents must have
homozygous genotypes for different alleles.
a heterozygous genotype.
Args:
site:
Expand All @@ -36,13 +40,27 @@ def is_potential_marker(
String name of second parent.
"""
parent_a_gt = site.genotype_calls[parent_a]['GT']
parent_b_gt = site.genotype_calls[parent_b]['GT']
if population_type == 'CP':
parent_b_gt = site.genotype_calls[parent_b]['GT']
return (
(parent_a_gt in _HET_GENOTYPES and parent_b_gt in _HET_GENOTYPES)
or (parent_a_gt in _HOM_GENOTYPES and parent_b_gt in _HET_GENOTYPES)
or (parent_a_gt in _HET_GENOTYPES and parent_b_gt in _HOM_GENOTYPES)
)
# If parent B was not provided, check if A is homozygous
# and not identical to all other known samples.
if parent_b is None:
if parent_a_gt not in _HOM_GENOTYPES:
return False
gt_identical_to_parent_a = [
site.genotype_calls[sample_name]['GT'] in { parent_a_gt, './.' }
for sample_name in site.genotype_calls.keys()
]
if all(gt_identical_to_parent_a):
return False
return True
# Both parents were provided, check if they are homozygous and different
parent_b_gt = site.genotype_calls[parent_b]['GT']
return (
parent_a_gt != parent_b_gt
and parent_a_gt in _HOM_GENOTYPES
Expand Down Expand Up @@ -143,14 +161,14 @@ def _get_segregation_type(genotype_parent_a: str, genotype_parent_b: str):
def _convert_genotypes(
genotypes: Dict[str, str],
parent_a: str,
parent_b: str,
parent_b: Union[str, None],
sample_names: List[str]
):
return {
sample_name: _convert_genotype(
genotypes[sample_name],
genotypes[parent_a],
genotypes[parent_b]
None if parent_b is None else genotypes[parent_b]
)
for sample_name in sample_names
if sample_name not in [parent_a, parent_b]
Expand All @@ -160,13 +178,13 @@ def _convert_genotypes(
def _convert_genotype(
genotype: str,
parent_a_genotype: str,
parent_b_genotype: str
parent_b_genotype: Union[str, None]
):
genotype_code = '-'
if genotype in _HOM_GENOTYPES:
if genotype == parent_a_genotype:
genotype_code = 'a'
elif genotype == parent_b_genotype:
elif genotype == parent_b_genotype or parent_b_genotype is None:
genotype_code = 'b'
elif genotype in _HET_GENOTYPES:
genotype_code = 'h'
Expand Down
3 changes: 2 additions & 1 deletion vcf2loc.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,8 @@
child_dp = (
site.depth
- site.genotype_calls[args.parent_a]['DP']
- site.genotype_calls[args.parent_b]['DP']
- 0 if args.parent_b is None
else site.genotype_calls[args.parent_a]['DP']
)
if child_dp < args.min_dp:
# Skipping sites with low depth.
Expand Down

0 comments on commit d294984

Please sign in to comment.