Skip to content

Commit

Permalink
issue #1113 - Simplify HGVS errors in search
Browse files Browse the repository at this point in the history
  • Loading branch information
davmlaw committed Aug 14, 2024
1 parent 5cdbe9b commit 6cf4813
Show file tree
Hide file tree
Showing 4 changed files with 86 additions and 29 deletions.
48 changes: 42 additions & 6 deletions genes/hgvs/biocommons_hgvs/hgvs_converter_biocommons.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,15 +7,17 @@
from django.conf import settings
from hgvs.assemblymapper import AssemblyMapper
from hgvs.edit import NARefAlt
from hgvs.exceptions import HGVSDataNotAvailableError, HGVSError, HGVSInvalidVariantError
from hgvs.exceptions import HGVSDataNotAvailableError, HGVSError, HGVSInvalidVariantError, HGVSInvalidIntervalError, \
HGVSNormalizationError, HGVSParseError, HGVSUnsupportedOperationError, HGVSInternalError, HGVSUsageError, \
HGVSVerifyFailedError
from hgvs.extras.babelfish import Babelfish
from hgvs.normalizer import Normalizer
from hgvs.parser import Parser
from hgvs.sequencevariant import SequenceVariant
from hgvs.validator import ExtrinsicValidator
from hgvs.variantmapper import VariantMapper

from genes.hgvs import HGVSVariant, HGVSException
from genes.hgvs import HGVSVariant, HGVSException, HGVSNomenclatureException, HGVSImplementationException
from genes.hgvs.biocommons_hgvs.data_provider import DjangoTranscriptDataProvider
from genes.hgvs.hgvs_converter import HGVSConverter, HgvsMatchRefAllele, HGVSConverterType
from genes.models import TranscriptVersion
Expand Down Expand Up @@ -153,7 +155,7 @@ def _parser_hgvs(hgvs_string: str) -> SequenceVariant:
else:
coord_span = abs(sequence_variant.posedit.length_change())
if coord_span != provided_span_length:
raise HGVSException(f"coordinate span ({coord_span}) not equal to provided ref length {provided_span_length}")
raise HGVSNomenclatureException(f"coordinate span ({coord_span}) not equal to provided ref length {provided_span_length}")
return sequence_variant

def create_hgvs_variant(self, hgvs_string: str) -> HGVSVariant:
Expand All @@ -162,7 +164,7 @@ def create_hgvs_variant(self, hgvs_string: str) -> HGVSVariant:
sequence_variant = self._parser_hgvs(hgvs_string)
return BioCommonsHGVSVariant(sequence_variant)
except HGVSError as e:
raise HGVSException from e
raise HGVSNomenclatureException from e

def _variant_coordinate_to_sequence_variant(self, vc: VariantCoordinate) -> SequenceVariant:
chrom, position, ref, alt, _svlen = vc.as_external_explicit(self.genome_build)
Expand All @@ -173,7 +175,7 @@ def _variant_coordinate_to_g_hgvs(self, vc: VariantCoordinate) -> HGVSVariant:
return BioCommonsHGVSVariant(var_g)

def variant_coordinate_to_c_hgvs(self, vc: VariantCoordinate, transcript_version) -> HGVSVariant:
""" In VG we call non-coding "c.HGVS" as well - so hanve to handle that """
""" In VG we call non-coding "c.HGVS" as well - so have to handle that """
try:
var_g = self._variant_coordinate_to_sequence_variant(vc) # returns normalized (default HGVS 3')
# Biocommons HGVS doesn't normalize introns as it works with transcript sequences so doesn't have introns
Expand All @@ -186,7 +188,8 @@ def variant_coordinate_to_c_hgvs(self, vc: VariantCoordinate, transcript_version
else:
var_c = self.am.g_to_n(var_g, transcript_version.accession)
except HGVSError as e: # Can be out of bounds etc
raise HGVSException(e) from e
klass = self._get_exception_class(e)
raise klass(e) from e

if gene_symbol := transcript_version.gene_symbol:
var_c.gene = gene_symbol.symbol
Expand Down Expand Up @@ -305,3 +308,36 @@ def _hgvs_to_g_hgvs(self, hgvs_string: str) -> tuple[SequenceVariant, HgvsMatchR
ref = var_g.posedit.edit.ref
matches_reference = HgvsMatchRefAllele(provided_ref='', calculated_ref=ref)
return var_g, matches_reference


@staticmethod
def _get_exception_class(hgvs_error: HGVSError) -> type:
""" Convert from HGVS to our generic errors """

exception_mappings = {
HGVSNomenclatureException: {
HGVSInvalidIntervalError,
HGVSInvalidVariantError,
HGVSNormalizationError,
HGVSParseError,
HGVSUnsupportedOperationError
},
HGVSImplementationException: {
HGVSDataNotAvailableError,
HGVSInternalError,
HGVSUsageError,
HGVSVerifyFailedError,
},
}

for our_ex, biocommons_hgvs_exceptions in exception_mappings.items():
for hgvs_ex in biocommons_hgvs_exceptions:
if isinstance(hgvs_error, hgvs_ex):
return our_ex
return HGVSException # General one...






11 changes: 11 additions & 0 deletions genes/hgvs/hgvs.py
Original file line number Diff line number Diff line change
Expand Up @@ -393,6 +393,17 @@ class HGVSException(Exception):
pass


class HGVSNomenclatureException(HGVSException):
""" HGVSException subclass for when problem is with HGVS string (users can fix) """
pass


class HGVSImplementationException(HGVSException):
""" HGVSException subclass for when problem is with the library (users can NOT fix) """
pass



class HGVSVariant(abc.ABC):
""" This class wraps pyhgvs HGVSName and BioCommons SequenceVariant functionality,
to allow library independent code """
Expand Down
14 changes: 7 additions & 7 deletions genes/hgvs/hgvs_matcher.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
from django.core.cache import cache
from django.db.models import Max, Min

from genes.hgvs import HGVSVariant, CHGVS
from genes.hgvs import HGVSVariant, CHGVS, HGVSImplementationException, HGVSNomenclatureException
from genes.hgvs.biocommons_hgvs.hgvs_converter_biocommons import BioCommonsHGVSConverter
from genes.hgvs.hgvs_converter import HGVSConverterType, HgvsMatchRefAllele, HGVSConverter
from genes.hgvs.hgvs_converter_combo import ComboCheckerHGVSConverter
Expand Down Expand Up @@ -96,7 +96,7 @@ def factory(genome_build: GenomeBuild, hgvs_converter_type: Optional[HGVSConvert
elif hgvs_converter_type == HGVSConverterType.CLINGEN_ALLELE_REGISTRY:
return ClinGenHGVSConverter(genome_build)

raise ValueError(f"HGVSConverter type {hgvs_converter_type} not supported")
raise HGVSImplementationException(f"HGVSConverter type {hgvs_converter_type} not supported")


class VariantResolvingError(ValueError):
Expand Down Expand Up @@ -217,7 +217,7 @@ def _lrg_get_variant_coordinate_used_transcript_method_and_matches_reference(sel
variant_coordinate, matches_reference = self._clingen_get_variant_coordinate_and_matches_reference(hgvs_string)
return variant_coordinate, lrg_transcript_accession, HGVSConverterType.CLINGEN_ALLELE_REGISTRY, method, matches_reference
except ClinGenAllele.ClinGenAlleleRegistryException as cga_re:
raise ValueError(f"Could not retrieve {hgvs_string} from ClinGen Allele Registry") from cga_re
raise HGVSImplementationException(f"Could not retrieve {hgvs_string} from ClinGen Allele Registry") from cga_re

@staticmethod
def _get_clingen_allele_registry_key(transcript_accession: str) -> str:
Expand Down Expand Up @@ -287,14 +287,14 @@ def filter_best_transcripts_and_converter_type_by_accession(self, transcript_acc
try:
transcript_id, version = TranscriptVersion.get_transcript_id_and_version(transcript_accession)
except ValueError:
raise ValueError(f"Error parsing transcript version from \"{transcript_accession}\"")
raise HGVSNomenclatureException(f"Error parsing transcript version from \"{transcript_accession}\"")

tv_qs = TranscriptVersion.objects.filter(genome_build=self.genome_build, transcript_id=transcript_id)
if not settings.VARIANT_TRANSCRIPT_VERSION_BEST_ATTEMPT:
if version:
return tv_qs.get(version=version)
else:
raise ValueError("Transcript version must be provided") # if settings.VARIANT_TRANSCRIPT_VERSION_BEST_ATTEMPT=False
raise HGVSNomenclatureException("Transcript version must be provided") # if settings.VARIANT_TRANSCRIPT_VERSION_BEST_ATTEMPT=False

tv_by_version = {tv.version: tv for tv in tv_qs}
if not tv_by_version:
Expand Down Expand Up @@ -452,7 +452,7 @@ def _lrg_variant_coordinate_to_hgvs(self, variant_coordinate: VariantCoordinate,
problems.append(f"{ca} didn't contain HGVS for '{lrg_identifier}'")

problem_str = ", ".join(problems)
raise ValueError(f"Could not convert {variant_coordinate} to HGVS using '{lrg_identifier}': {problem_str}")
raise HGVSImplementationException(f"Could not convert {variant_coordinate} to HGVS using '{lrg_identifier}': {problem_str}")

def variant_coordinate_to_hgvs_used_converter_type_and_method(self, variant_coordinate: VariantCoordinate,
transcript_accession: str = None) -> tuple[HGVSVariant, HGVSConverterType, str]:
Expand Down Expand Up @@ -519,7 +519,7 @@ def variant_coordinate_to_hgvs_used_converter_type_and_method(self, variant_coor
method += ": " + errors
method_and_errors.append(method)
attempts = ", ".join(method_and_errors)
raise ValueError(f"Could not convert {variant_coordinate} to HGVS - tried: {attempts}")
raise HGVSImplementationException(f"Could not convert {variant_coordinate} to HGVS - tried: {attempts}")
else:
# No methods tried, mustn't have had any transcripts
TranscriptVersion.raise_bad_or_missing_transcript(transcript_accession)
Expand Down
42 changes: 26 additions & 16 deletions snpdb/signals/variant_search.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,8 @@
from annotation.cosmic import CosmicAPI
from annotation.manual_variant_entry import check_can_create_variants, CreateManualVariantForbidden
from classification.models import Classification, CreateNoClassificationForbidden
from genes.hgvs import HGVSMatcher, HGVSException, VariantResolvingError
from genes.hgvs import HGVSMatcher, HGVSException, VariantResolvingError, HGVSImplementationException, \
HGVSNomenclatureException
from genes.hgvs.hgvs_converter import HgvsMatchRefAllele
from genes.models import MissingTranscript, MANE, TranscriptVersion
from genes.models_enums import AnnotationConsortium
Expand Down Expand Up @@ -410,13 +411,7 @@ def _search_hgvs_using_gene_symbol(
# result is a ClassifyVariantHgvs or similar, yield it and only care about real variants for the rest
yield result
except Exception as e:
if settings.SEARCH_HGVS_GENE_SYMBOL_REPORT_FAILURES:
# TODO: Should we do the "simplify for non-admin users" here?
message = f"Could not resolve HGVS for transcript {transcript_version.accession}: {e}"
yield SearchResult.error_result(message, genome_build)
else:
# Add, and only report if no results found
transcript_accessions_by_exception[str(e)].append(tv_message)
transcript_accessions_by_exception[str(e)].append(tv_message)

have_results = False
for variant_identifier, results_for_record in results_by_variant_identifier.items():
Expand Down Expand Up @@ -444,15 +439,19 @@ def _search_hgvs_using_gene_symbol(
preview.annotation_consortia = unique_annotation_consortia
yield SearchResult(preview=preview, messages=search_messages)

if not have_results:
# If we have a resolution error, throw it here

if transcript_accessions_by_exception:
resolution_errors = []
for exception_msg, tv_message_list in transcript_accessions_by_exception.items():
resolution_errors.append(f"Error resolving transcripts: {', '.join(tv_message_list)}: {exception_msg}")

if resolution_errors:
raise VariantResolvingError("\n".join(resolution_errors))
resolution_msg = "\n".join(resolution_errors)
if settings.SEARCH_HGVS_GENE_SYMBOL_REPORT_FAILURES:
yield SearchResult.error_result(resolution_msg, genome_build)
elif not have_results:
raise VariantResolvingError(resolution_msg)

if not have_results:
# In some special cases, add in special messages for no result
messages_as_strs = [str(message) for message in search_messages]
if settings.SEARCH_HGVS_GENE_SYMBOL_USE_MANE and not settings.SEARCH_HGVS_GENE_SYMBOL_USE_ALL_TRANSCRIPTS:
Expand Down Expand Up @@ -496,17 +495,28 @@ def search_hgvs(search_input: SearchInputInstance) -> Iterable[SearchResult]:
return [INVALID_INPUT]
for_all_genome_builds = []
for genome_build in search_input.genome_builds:
results = []
error_message = None
try:
results =_search_hgvs(hgvs_string=search_input.search_string, user=search_input.user,
genome_build=genome_build,
visible_variants=search_input.get_visible_variants(genome_build),
classify=search_input.classify)
results = list(results) # read iterator to trigger exceptions
except Exception as e:
except HGVSNomenclatureException as e:
# We want to return errors with the HGVS but not show implementation details...
message = str(e)
results = [SearchResult.error_result(message, genome_build)]
for_all_genome_builds.append(results)
error_message = str(e)
except (Exception, HGVSImplementationException) as e:
if search_input.user.is_superuser:
error_message = str(e)
else:
error_message = "Error resolving HGVS"

if error_message:
results = [SearchResult.error_result(error_message, genome_build)]
if results:
for_all_genome_builds.append(results)

return itertools.chain(*for_all_genome_builds)


Expand Down

0 comments on commit 6cf4813

Please sign in to comment.