diff --git a/genes/hgvs/hgvs_converter.py b/genes/hgvs/hgvs_converter.py index 7352ee08c..deccf663d 100644 --- a/genes/hgvs/hgvs_converter.py +++ b/genes/hgvs/hgvs_converter.py @@ -17,10 +17,17 @@ def is_internal_type(self) -> bool: class HgvsMatchRefAllele: - def __init__(self, provided_ref: str, calculated_ref: str): + def __init__(self, provided_ref: str, calculated_ref: str, ref_type=None, ref_source=None): self.provided_ref = provided_ref self.calculated_ref = calculated_ref + if ref_type is None: + ref_type = "genomic" + self.ref_type = ref_type + if ref_source is None: + ref_source = "our build" + self.ref_source = ref_source + def __bool__(self): if self.provided_ref: return self.provided_ref == self.calculated_ref @@ -40,6 +47,8 @@ def __eq__(self, other): return True return self.provided_ref == other.provided_ref and self.calculated_ref == other.calculated_ref + def get_message(self) -> str: + return f'Using {self.ref_type} reference "{self.calculated_ref}" from {self.ref_source}, in place of provided reference "{self.provided_ref}"' # We need a common Exception # Common HGVS Extra?? diff --git a/genes/hgvs/hgvs_matcher.py b/genes/hgvs/hgvs_matcher.py index fe69dc8a7..391692c02 100644 --- a/genes/hgvs/hgvs_matcher.py +++ b/genes/hgvs/hgvs_matcher.py @@ -145,7 +145,7 @@ def __init__(self, genome_build: GenomeBuild, hgvs_converter_type=None, local_resolution=local_resolution, clingen_resolution=clingen_resolution) - def _clingen_get_variant_coordinate(self, hgvs_string: str) -> VariantCoordinate: + def _clingen_get_variant_coordinate_and_matches_reference(self, hgvs_string: str, match_ref_allele=None) -> tuple[VariantCoordinate, bool]: cleaned_hgvs = self.hgvs_converter.c_hgvs_remove_gene_symbol(hgvs_string) try: @@ -158,7 +158,9 @@ def _clingen_get_variant_coordinate(self, hgvs_string: str) -> VariantCoordinate start=variant_coord.start, ref=variant_coord.ref, alt=variant_coord.ref) # ref == alt - return variant_coord + if match_ref_allele is None: + match_ref_allele = True + return variant_coord, match_ref_allele except ClinGenAlleleAPIException: self.attempt_clingen = False raise @@ -168,6 +170,23 @@ def _clingen_get_variant_coordinate(self, hgvs_string: str) -> VariantCoordinate transcript_accession = self.hgvs_converter.get_transcript_accession(hgvs_string) self._set_clingen_allele_registry_missing_transcript(transcript_accession) else: + if settings.CLINGEN_ALLELE_REGISTRY_REATTEMPT_WITH_ACTUAL_REF and match_ref_allele is None: + # Don't do if already swapped (stop infinite recursion) + rjson = cga_se.response_json + if rjson["errorType"] == 'IncorrectReferenceAllele': + actual_allele = rjson['actualAllele'] + given_allele = rjson['givenAllele'] + transcript_reference_sequence = rjson["referenceSequence"] + hgvs_variant = self.create_hgvs_variant(cleaned_hgvs) + if hgvs_variant.ref_allele == given_allele: + hgvs_variant.ref_allele = actual_allele + hgvs_swapped_ref = hgvs_variant.format() + match_ref_allele = HgvsMatchRefAllele(provided_ref=given_allele, + calculated_ref=actual_allele, + ref_type=f"transcript {transcript_reference_sequence}", + ref_source="ClinGen Allele Registry") + return self._clingen_get_variant_coordinate_and_matches_reference(hgvs_swapped_ref, match_ref_allele=match_ref_allele) + self.attempt_clingen = False raise @@ -194,10 +213,9 @@ def _lrg_get_variant_coordinate_used_transcript_method_and_matches_reference(sel hgvs_string = hgvs_variant.format() try: - # ClinGen fails if reference base is different so matches_reference is always True - matches_reference = True method = HGVSConverterType.CLINGEN_ALLELE_REGISTRY.name - return self._clingen_get_variant_coordinate(hgvs_string), lrg_transcript_accession, HGVSConverterType.CLINGEN_ALLELE_REGISTRY, method, matches_reference + variant_coordinate, matches_reference = self._clingen_get_variant_coordinate_and_matches_reference(hgvs_string) + return variant_coordinate, lrg_transcript_accession, HGVSConverterType.CLINGEN_ALLELE_REGISTRY, method, matches_reference except ClinGenAllele.ClinGenAlleleRegistryException as cga_re: raise ValueError(f"Could not retrieve {hgvs_string} from ClinGen Allele Registry") from cga_re @@ -356,8 +374,7 @@ def get_variant_coordinate_used_transcript_kind_method_and_matches_reference(sel if self._clingen_allele_registry_ok(tv.accession): error_message = f"Could not convert \"{hgvs_string}\" using ClinGenAllele Registry" try: - matches_reference = True # ClnGen fails if different - variant_coordinate = self._clingen_get_variant_coordinate(hgvs_string_for_version) + variant_coordinate, matches_reference = self._clingen_get_variant_coordinate_and_matches_reference(hgvs_string_for_version) except ClinGenAlleleServerException as cga_se: # If it's unknown reference we can just retry with another version, other errors are fatal if cga_se.is_unknown_reference(): diff --git a/snpdb/signals/variant_search.py b/snpdb/signals/variant_search.py index e808fce30..907237c6e 100644 --- a/snpdb/signals/variant_search.py +++ b/snpdb/signals/variant_search.py @@ -537,7 +537,8 @@ def _search_hgvs(hgvs_string: str, user: User, genome_build: GenomeBuild, visibl # reporting on the "provided" reference is slightly promblematic as it's not always provided directly, it could be indirectly if isinstance(matches_reference, HgvsMatchRefAllele) and matches_reference.provided_ref: - search_messages.append(SearchMessage(f'Using genomic reference "{matches_reference.calculated_ref}" from our build, in place of provided reference "{matches_reference.provided_ref}"', LogLevel.ERROR, substituted=True)) + msg = matches_reference.get_message() + search_messages.append(SearchMessage(msg, LogLevel.ERROR, substituted=True)) else: # if no reference was provided, do we even need to provide a message? # e.g. this is providing a ref for when we have a delins, e.g. delinsGT => delCCinsGT diff --git a/variantgrid/settings/components/default_settings.py b/variantgrid/settings/components/default_settings.py index e531410a8..bd839a2b0 100644 --- a/variantgrid/settings/components/default_settings.py +++ b/variantgrid/settings/components/default_settings.py @@ -374,6 +374,7 @@ CLINGEN_ALLELE_REGISTRY_PASSWORD = get_secret("CLINGEN_ALLELE_REGISTRY.password") CLINGEN_ALLELE_REGISTRY_MAX_MANUAL_REQUESTS = 10_000 # On nodes and VCFs CLINGEN_ALLELE_REGISTRY_REQUIRE_REF_ALLELE = True +CLINGEN_ALLELE_REGISTRY_REATTEMPT_WITH_ACTUAL_REF = True NO_DNA_CONTROL_REGEX = "(^|[^a-zA-Z])NDC([^a-zA-Z]|$)" # No DNA Control - e.g. _NDC_ or -NDC_