Skip to content

Commit

Permalink
issue #1137 - clingen swap reference base
Browse files Browse the repository at this point in the history
  • Loading branch information
davmlaw committed Aug 13, 2024
1 parent 3214b67 commit b4b36f0
Show file tree
Hide file tree
Showing 4 changed files with 37 additions and 9 deletions.
11 changes: 10 additions & 1 deletion genes/hgvs/hgvs_converter.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,10 +17,17 @@ def is_internal_type(self) -> bool:


class HgvsMatchRefAllele:
def __init__(self, provided_ref: str, calculated_ref: str):
def __init__(self, provided_ref: str, calculated_ref: str, ref_type=None, ref_source=None):
self.provided_ref = provided_ref
self.calculated_ref = calculated_ref

if ref_type is None:
ref_type = "genomic"
self.ref_type = ref_type
if ref_source is None:
ref_source = "our build"
self.ref_source = ref_source

def __bool__(self):
if self.provided_ref:
return self.provided_ref == self.calculated_ref
Expand All @@ -40,6 +47,8 @@ def __eq__(self, other):
return True
return self.provided_ref == other.provided_ref and self.calculated_ref == other.calculated_ref

def get_message(self) -> str:
return f'Using {self.ref_type} reference "{self.calculated_ref}" from {self.ref_source}, in place of provided reference "{self.provided_ref}"'

# We need a common Exception
# Common HGVS Extra??
Expand Down
31 changes: 24 additions & 7 deletions genes/hgvs/hgvs_matcher.py
Original file line number Diff line number Diff line change
Expand Up @@ -145,7 +145,7 @@ def __init__(self, genome_build: GenomeBuild, hgvs_converter_type=None,
local_resolution=local_resolution,
clingen_resolution=clingen_resolution)

def _clingen_get_variant_coordinate(self, hgvs_string: str) -> VariantCoordinate:
def _clingen_get_variant_coordinate_and_matches_reference(self, hgvs_string: str, match_ref_allele=None) -> tuple[VariantCoordinate, bool]:
cleaned_hgvs = self.hgvs_converter.c_hgvs_remove_gene_symbol(hgvs_string)

try:
Expand All @@ -158,7 +158,9 @@ def _clingen_get_variant_coordinate(self, hgvs_string: str) -> VariantCoordinate
start=variant_coord.start,
ref=variant_coord.ref,
alt=variant_coord.ref) # ref == alt
return variant_coord
if match_ref_allele is None:
match_ref_allele = True
return variant_coord, match_ref_allele
except ClinGenAlleleAPIException:
self.attempt_clingen = False
raise
Expand All @@ -168,6 +170,23 @@ def _clingen_get_variant_coordinate(self, hgvs_string: str) -> VariantCoordinate
transcript_accession = self.hgvs_converter.get_transcript_accession(hgvs_string)
self._set_clingen_allele_registry_missing_transcript(transcript_accession)
else:
if settings.CLINGEN_ALLELE_REGISTRY_REATTEMPT_WITH_ACTUAL_REF and match_ref_allele is None:
# Don't do if already swapped (stop infinite recursion)
rjson = cga_se.response_json
if rjson["errorType"] == 'IncorrectReferenceAllele':
actual_allele = rjson['actualAllele']
given_allele = rjson['givenAllele']
transcript_reference_sequence = rjson["referenceSequence"]
hgvs_variant = self.create_hgvs_variant(cleaned_hgvs)
if hgvs_variant.ref_allele == given_allele:
hgvs_variant.ref_allele = actual_allele
hgvs_swapped_ref = hgvs_variant.format()
match_ref_allele = HgvsMatchRefAllele(provided_ref=given_allele,
calculated_ref=actual_allele,
ref_type=f"transcript {transcript_reference_sequence}",
ref_source="ClinGen Allele Registry")
return self._clingen_get_variant_coordinate_and_matches_reference(hgvs_swapped_ref, match_ref_allele=match_ref_allele)

self.attempt_clingen = False
raise

Expand All @@ -194,10 +213,9 @@ def _lrg_get_variant_coordinate_used_transcript_method_and_matches_reference(sel

hgvs_string = hgvs_variant.format()
try:
# ClinGen fails if reference base is different so matches_reference is always True
matches_reference = True
method = HGVSConverterType.CLINGEN_ALLELE_REGISTRY.name
return self._clingen_get_variant_coordinate(hgvs_string), lrg_transcript_accession, HGVSConverterType.CLINGEN_ALLELE_REGISTRY, method, matches_reference
variant_coordinate, matches_reference = self._clingen_get_variant_coordinate_and_matches_reference(hgvs_string)
return variant_coordinate, lrg_transcript_accession, HGVSConverterType.CLINGEN_ALLELE_REGISTRY, method, matches_reference
except ClinGenAllele.ClinGenAlleleRegistryException as cga_re:
raise ValueError(f"Could not retrieve {hgvs_string} from ClinGen Allele Registry") from cga_re

Expand Down Expand Up @@ -356,8 +374,7 @@ def get_variant_coordinate_used_transcript_kind_method_and_matches_reference(sel
if self._clingen_allele_registry_ok(tv.accession):
error_message = f"Could not convert \"{hgvs_string}\" using ClinGenAllele Registry"
try:
matches_reference = True # ClnGen fails if different
variant_coordinate = self._clingen_get_variant_coordinate(hgvs_string_for_version)
variant_coordinate, matches_reference = self._clingen_get_variant_coordinate_and_matches_reference(hgvs_string_for_version)
except ClinGenAlleleServerException as cga_se:
# If it's unknown reference we can just retry with another version, other errors are fatal
if cga_se.is_unknown_reference():
Expand Down
3 changes: 2 additions & 1 deletion snpdb/signals/variant_search.py
Original file line number Diff line number Diff line change
Expand Up @@ -537,7 +537,8 @@ def _search_hgvs(hgvs_string: str, user: User, genome_build: GenomeBuild, visibl
# reporting on the "provided" reference is slightly promblematic as it's not always provided directly, it could be indirectly

if isinstance(matches_reference, HgvsMatchRefAllele) and matches_reference.provided_ref:
search_messages.append(SearchMessage(f'Using genomic reference "{matches_reference.calculated_ref}" from our build, in place of provided reference "{matches_reference.provided_ref}"', LogLevel.ERROR, substituted=True))
msg = matches_reference.get_message()
search_messages.append(SearchMessage(msg, LogLevel.ERROR, substituted=True))
else:
# if no reference was provided, do we even need to provide a message?
# e.g. this is providing a ref for when we have a delins, e.g. delinsGT => delCCinsGT
Expand Down
1 change: 1 addition & 0 deletions variantgrid/settings/components/default_settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -374,6 +374,7 @@
CLINGEN_ALLELE_REGISTRY_PASSWORD = get_secret("CLINGEN_ALLELE_REGISTRY.password")
CLINGEN_ALLELE_REGISTRY_MAX_MANUAL_REQUESTS = 10_000 # On nodes and VCFs
CLINGEN_ALLELE_REGISTRY_REQUIRE_REF_ALLELE = True
CLINGEN_ALLELE_REGISTRY_REATTEMPT_WITH_ACTUAL_REF = True

NO_DNA_CONTROL_REGEX = "(^|[^a-zA-Z])NDC([^a-zA-Z]|$)" # No DNA Control - e.g. _NDC_ or -NDC_

Expand Down

0 comments on commit b4b36f0

Please sign in to comment.