Skip to content

Commit

Permalink
Merge pull request #855 from SACGF/#3485_stop_MedGen_termm_trigger_hgvs
Browse files Browse the repository at this point in the history
#3485 handled search error for MedGen and providing external Link.
  • Loading branch information
TheMadBug authored Aug 4, 2023
2 parents 736fe85 + 37f7cb6 commit a7d304a
Show file tree
Hide file tree
Showing 4 changed files with 36 additions and 12 deletions.
30 changes: 23 additions & 7 deletions ontology/models/models_ontology.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,14 +46,16 @@ class OntologyService(models.TextChoices):

DOID = "DOID", "DOID"
ORPHANET = "Orphanet", "Orphanet"
MEDGEN = "MedGen", "MedGen"

EXPECTED_LENGTHS: Dict[str, int] = Constant({
MONDO[0]: 7,
OMIM[0]: 6,
HPO[0]: 7,
HGNC[0]: 1, # HGNC ids aren't typically 0 padded, because they're not monsters
DOID[0]: None, # variable length with padded 0s
ORPHANET[0]: 1 # ORPHANET ids aren't typically 0 padded
ORPHANET[0]: 1, # ORPHANET ids aren't typically 0 padded
MEDGEN[0]: None
})

IMPORTANCE: Dict[str, int] = Constant({
Expand All @@ -62,7 +64,8 @@ class OntologyService(models.TextChoices):
HPO[0]: 4, # put HPO relationships last as they occasionally spam OMIM
DOID[0]: 5,
ORPHANET[0]: 6,
HGNC[0]: 1 # show gene relationships first
HGNC[0]: 1, # show gene relationships first
MEDGEN[0]: 7
})

URLS: Dict[str, str] = Constant({
Expand All @@ -71,7 +74,8 @@ class OntologyService(models.TextChoices):
HPO[0]: "https://hpo.jax.org/app/browse/term/HP:${1}",
HGNC[0]: "https://www.genenames.org/data/gene-symbol-report/#!/hgnc_id/HGNC:${1}",
DOID[0]: "https://www.ebi.ac.uk/ols/ontologies/doid/terms?iri=http%3A%2F%2Fpurl.obolibrary.org%2Fobo%2FDOID_${1}",
ORPHANET[0]: "https://www.orpha.net/consor/cgi-bin/OC_Exp.php?lng=EN&Expert=${1}"
ORPHANET[0]: "https://www.orpha.net/consor/cgi-bin/OC_Exp.php?lng=EN&Expert=${1}",
MEDGEN[0]: "https://www.ncbi.nlm.nih.gov/medgen/${1}"
})

LOCAL_ONTOLOGY_PREFIXES: Set[str] = Constant({
Expand All @@ -86,7 +90,8 @@ class OntologyService(models.TextChoices):
OMIM[0],
HPO[0],
DOID[0],
ORPHANET[0]
ORPHANET[0],
MEDGEN[0]
})

@staticmethod
Expand Down Expand Up @@ -239,6 +244,12 @@ class OntologyIdNormalized:
def num_part(self) -> int:
return int(self.postfix)

def num_part_safe(self) -> int:
try:
return self.num_part
except:
return 0

@staticmethod
def normalize(dirty_id: str) -> 'OntologyIdNormalized':
parts = re.split("[:|_]", dirty_id)
Expand All @@ -250,17 +261,18 @@ def normalize(dirty_id: str) -> 'OntologyIdNormalized':
prefix = "Orphanet"
elif prefix.upper() == "MIM":
prefix = "OMIM"
elif prefix.upper() == "MEDGEN":
prefix = "MedGen"
prefix = OntologyService(prefix)
postfix = parts[1].strip()
try:
num_part = int(postfix)
num_part = str(postfix)
clean_id: str
if OntologyService.EXPECTED_LENGTHS[prefix]:
clean_id = OntologyService.index_to_id(prefix, num_part)
else:
# variable length IDs like DOID
clean_id = f"{prefix}:{postfix}"

return OntologyIdNormalized(prefix=prefix, postfix=postfix, full_id=clean_id, clean=True)

except ValueError:
Expand Down Expand Up @@ -443,10 +455,14 @@ def get_or_stub(id_str: Union[str, OntologyIdNormalized]) -> 'OntologyTerm':
if normal_id.clean:
if existing := OntologyTerm.objects.filter(id=normal_id.full_id).first():
return existing
try:
index_num_part_value = normal_id.num_part
except :
index_num_part_value = normal_id.num_part_safe # Ontologies like MedGen can have alpha characters in the "index", providing an index of 0 until we update the model
return OntologyTerm(
id=normal_id.full_id,
ontology_service=normal_id.prefix,
index=normal_id.num_part,
index=index_num_part_value,
name=""
)
else:
Expand Down
3 changes: 1 addition & 2 deletions ontology/models/ontology_search.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,7 @@ def validate_ontology(term: OntologyTerm, preview_proxy: Optional[PreviewProxyMo
return SearchResult(preview, messages=messages)


ONTOLOGY_TERM_PATTERN = re.compile(r"^(MONDO|OMIM|MIM|HPO|HP|DOID|ORPHANET)\s*:\s*[0-9]+$", re.IGNORECASE)

ONTOLOGY_TERM_PATTERN = re.compile(r"^((MONDO|OMIM|MIM|HPO|HP|DOID|ORPHANET)\s*:\s*[0-9]+)|(MEDGEN\s*:\s*\w{0,2}[0-9]+)$", re.IGNORECASE)

@search_receiver(
search_type=OntologyTerm,
Expand Down
10 changes: 9 additions & 1 deletion snpdb/search.py
Original file line number Diff line number Diff line change
Expand Up @@ -794,6 +794,11 @@ def search_data(user: User, search_string: str, classify: bool = False) -> Searc
return SearchResponsesCombined(search_input, responses)


INVALID_INPUT = object()
"""
Return this (and only this) from a search if you want to act as if the search pattern wasn't met
"""

def search_receiver(
search_type: Optional[PreviewCoordinator],
pattern: Pattern = HAS_ANYTHING,
Expand Down Expand Up @@ -870,7 +875,10 @@ def search_func(sender: Any, search_input: SearchInput, **kwargs):
# as Variants get merged into Alleles, we want to avoid limiting them (except under extreme conditions)
limit = MAX_VARIANT_RESULTS if search_type.preview_category() == "Variant" else MAX_RESULTS_PER_SEARCH
for result in func(SearchInputInstance(expected_type=search_type, search_input=search_input, match=match)):
if result is None:
if result == INVALID_INPUT:
matched_pattern = False
break
elif result is None:
raise ValueError(f"Search {sender.__name__} returned None")
elif isinstance(result, SearchMessageOverall):
overall_messages.add(result)
Expand Down
5 changes: 3 additions & 2 deletions snpdb/signals/variant_search.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
from snpdb.models import Variant, LOCUS_PATTERN, LOCUS_NO_REF_PATTERN, DbSNP, DBSNP_PATTERN, VariantCoordinate, \
ClinGenAllele, GenomeBuild, Contig, HGVS_UNCLEANED_PATTERN
from snpdb.search import search_receiver, SearchInputInstance, SearchExample, SearchResult, SearchMessageOverall, \
SearchMessage
SearchMessage, INVALID_INPUT
from upload.models import ModifiedImportedVariant

COSMIC_PATTERN = re.compile(r"^(COS[VM])[0-9]{3,}$", re.IGNORECASE)
Expand Down Expand Up @@ -398,7 +398,6 @@ def _search_hgvs_using_gene_symbol(
elif not (settings.SEARCH_HGVS_GENE_SYMBOL_USE_MANE or settings.SEARCH_HGVS_GENE_SYMBOL_USE_ALL_TRANSCRIPTS):
yield SearchMessageOverall("\n".join(messages_as_strs))


@search_receiver(
search_type=Variant,
pattern=HGVS_UNCLEANED_PATTERN,
Expand All @@ -409,6 +408,8 @@ def _search_hgvs_using_gene_symbol(
)
)
def search_hgvs(search_input: SearchInputInstance) -> Iterable[SearchResult]:
if search_input.search_string.lower().startswith("medgen"):
return [INVALID_INPUT]
for_all_genome_builds = []
for genome_build in search_input.genome_builds:
for_all_genome_builds.append(_search_hgvs(hgvs_string=search_input.search_string, user=search_input.user, genome_build=genome_build, visible_variants=search_input.get_visible_variants(genome_build), classify=search_input.classify))
Expand Down

0 comments on commit a7d304a

Please sign in to comment.