Skip to content

Commit a9a2042

Browse files
committed
Merge branch 'issue-426' into disease-conflict
2 parents 1a0ffdc + 9d96296 commit a9a2042

File tree

6 files changed

+166
-71
lines changed

6 files changed

+166
-71
lines changed

src/metakb/transformers/base.py

Lines changed: 43 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -9,9 +9,6 @@
99
from pathlib import Path
1010
from typing import ClassVar, TypeVar
1111

12-
from disease.schemas import (
13-
SYSTEM_URI_TO_NAMESPACE as DISEASE_SYSTEM_URI_TO_NAMESPACE,
14-
)
1512
from disease.schemas import (
1613
NamespacePrefix as DiseaseNamespacePrefix,
1714
)
@@ -35,7 +32,12 @@
3532
)
3633
from ga4gh.va_spec.base import Document, Method, TherapyGroup
3734
from ga4gh.vrs.models import Allele
38-
from gene.schemas import NormalizeService as NormalizedGene
35+
from gene.schemas import (
36+
NamespacePrefix as GeneNamespacePrefix,
37+
)
38+
from gene.schemas import (
39+
NormalizeService as NormalizedGene,
40+
)
3941
from pydantic import BaseModel, Field, StrictStr, ValidationError
4042
from therapy.schemas import NormalizationService as NormalizedTherapy
4143

@@ -557,20 +559,24 @@ def _get_vicc_normalizer_mappings(
557559
:return: List of VICC Normalizer data represented as mappable concept
558560
"""
559561

560-
def _add_merged_id_ext(
562+
def _update_mapping(
561563
mapping: ConceptMapping,
562-
is_priority: bool,
563-
label: str | None = None,
564+
normalized_id: str,
565+
normalizer_label: str,
564566
) -> Extension:
565-
"""Update ``mapping`` to include extension on whether mapping is from merged identifier
567+
"""Update ``mapping`` to include extension on whether ``mapping`` contains
568+
code that matches the merged record's primary identifier.
566569
567570
:param mapping: ConceptMapping from vicc normalizer. This will be mutated.
568-
:param is_priority: ``True`` if concept mapping contains primaryCode that
569-
matches merged record primaryCode. ``False`` otherwise (meaning it comes
570-
from merged record mappings)
571-
:param label: Merged concept label, if found
572-
:return: ConceptMapping with normalizer extension added
571+
Extensions will be added. Label will be added if mapping identifier
572+
matches normalized merged identifier.
573+
:param normalized_id: Concept ID from normalized record
574+
:param normalizer_label: Label from normalized record
575+
:return: ConceptMapping with normalizer extension added as well as label (
576+
if mapping id matches normalized merged id)
573577
"""
578+
is_priority = normalized_id == mapping.coding.code.root
579+
574580
merged_id_ext = Extension(
575581
name=NormalizerExtensionName.PRIORITY.value, value=is_priority
576582
)
@@ -579,40 +585,40 @@ def _add_merged_id_ext(
579585
else:
580586
mapping.extensions = [merged_id_ext]
581587

582-
if label:
583-
mapping.coding.label = label
588+
if is_priority:
589+
mapping.coding.label = normalizer_label
584590

585591
return mapping
586592

587593
mappings: list[ConceptMapping] = []
588594
attr_name = NORMALIZER_INSTANCE_TO_ATTR[type(normalizer_resp)]
589595
normalizer_resp_obj = getattr(normalizer_resp, attr_name)
596+
normalizer_label = normalizer_resp_obj.label
597+
is_disease = isinstance(normalizer_resp, NormalizedDisease)
598+
is_gene = isinstance(normalizer_resp, NormalizedGene)
590599

591600
normalizer_mappings = normalizer_resp_obj.mappings or []
592-
if isinstance(normalizer_resp, NormalizedDisease):
593-
for mapping in normalizer_mappings:
601+
for mapping in normalizer_mappings:
602+
if normalized_id == mapping.coding.code.root:
603+
mappings.append(
604+
_update_mapping(mapping, normalized_id, normalizer_label)
605+
)
606+
else:
607+
mapping_code_lower = mapping.coding.code.root.lower()
594608
if (
595-
DISEASE_SYSTEM_URI_TO_NAMESPACE.get(mapping.coding.system)
596-
== DiseaseNamespacePrefix.MONDO.value
609+
is_disease
610+
and mapping_code_lower.startswith(
611+
DiseaseNamespacePrefix.MONDO.value
612+
)
613+
) or (
614+
is_gene
615+
and mapping_code_lower.startswith(
616+
(GeneNamespacePrefix.NCBI.value, GeneNamespacePrefix.HGNC.value)
617+
)
597618
):
598-
mappings.append(_add_merged_id_ext(mapping, is_priority=False))
599-
else:
600-
if normalized_id == mapping.coding.code.root:
601-
mappings.append(
602-
_add_merged_id_ext(
603-
mapping,
604-
label=normalizer_resp_obj.label,
605-
is_priority=True,
606-
)
607-
)
608-
else:
609-
mappings.extend(
610-
_add_merged_id_ext(
611-
mapping, label=normalizer_resp_obj.label, is_priority=True
612-
)
613-
for mapping in normalizer_mappings
614-
if normalized_id == mapping.coding.code.root
615-
)
619+
mappings.append(
620+
_update_mapping(mapping, normalized_id, normalizer_label)
621+
)
616622
return mappings
617623

618624
def create_json(self, cdm_filepath: Path | None = None) -> None:

src/metakb/transformers/civic.py

Lines changed: 39 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -816,20 +816,27 @@ def _add_genes(self, genes: list[dict]) -> None:
816816
817817
:param genes: All genes in CIViC
818818
"""
819+
820+
def _get_ncbi_concept_mapping(ncbigene_id: str, gene: dict) -> ConceptMapping:
821+
"""Get NCBI gene mapping
822+
823+
:param ncbigene_id: ID for NCBI Gene
824+
:param gene: CIViC gene record
825+
:return: Concept Mapping for NCBI Gene
826+
"""
827+
return ConceptMapping(
828+
coding=Coding(
829+
id=ncbigene_id,
830+
code=str(gene["entrez_id"]),
831+
system="https://www.ncbi.nlm.nih.gov/gene/",
832+
),
833+
relation=Relation.EXACT_MATCH,
834+
)
835+
819836
for gene in genes:
820837
gene_id = f"civic.gid:{gene['id']}"
821838
ncbigene = f"ncbigene:{gene['entrez_id']}"
822839
queries = [ncbigene, gene["name"]] + gene["aliases"]
823-
mappings = [
824-
ConceptMapping(
825-
coding=Coding(
826-
id=ncbigene,
827-
code=str(gene["entrez_id"]),
828-
system="https://www.ncbi.nlm.nih.gov/gene/",
829-
),
830-
relation=Relation.EXACT_MATCH,
831-
),
832-
]
833840
extensions = []
834841

835842
gene_norm_resp, normalized_gene_id = self.vicc_normalizers.normalize_gene(
@@ -843,13 +850,31 @@ def _add_genes(self, genes: list[dict]) -> None:
843850
queries,
844851
)
845852
extensions.append(self._get_vicc_normalizer_failure_ext())
853+
mappings = [_get_ncbi_concept_mapping(ncbigene, gene)]
846854
else:
847-
mappings.extend(
848-
self._get_vicc_normalizer_mappings(
849-
normalized_gene_id, gene_norm_resp
850-
)
855+
mappings = self._get_vicc_normalizer_mappings(
856+
normalized_gene_id, gene_norm_resp
851857
)
852858

859+
civic_ncbi_annotation_match = False
860+
for mapping in mappings:
861+
if mapping.coding.code.root.startswith("ncbigene:"):
862+
if mapping.coding.code.root == ncbigene:
863+
mapping.extensions.append(
864+
Extension(name="civic_annotation", value=True)
865+
)
866+
civic_ncbi_annotation_match = True
867+
break
868+
869+
_logger.debug(
870+
"CIViC NCBI gene and Gene Normalizer mismatch: %s vs %s",
871+
ncbigene,
872+
mapping.coding.code.root,
873+
)
874+
875+
if not civic_ncbi_annotation_match:
876+
mappings.append(_get_ncbi_concept_mapping(ncbigene, gene))
877+
853878
if gene["aliases"]:
854879
extensions.append(Extension(name="aliases", value=gene["aliases"]))
855880

tests/conftest.py

Lines changed: 33 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,11 @@ def pytest_configure(config):
4646
logging.getLogger(lib).setLevel(logging.ERROR)
4747

4848

49+
def get_vicc_normalizer_ext(is_priority: bool):
50+
"""Create test fixture for vicc normalizer priority extension"""
51+
return [{"name": "vicc_normalizer_priority", "value": is_priority}]
52+
53+
4954
def check_source_harvest(tmp_path: Path, harvester: Harvester):
5055
"""Test that source harvest method works correctly"""
5156
harvested_data = harvester.harvest()
@@ -499,11 +504,14 @@ def civic_gid5(braf_normalizer_mappings):
499504
"mappings": [
500505
{
501506
"coding": {
502-
"id": "ncbigene:673",
503-
"code": "673",
507+
"code": "ncbigene:673",
504508
"system": "https://www.ncbi.nlm.nih.gov/gene/",
505509
},
506-
"relation": "exactMatch",
510+
"relation": "relatedMatch",
511+
"extensions": [
512+
*get_vicc_normalizer_ext(is_priority=False),
513+
{"name": "civic_annotation", "value": True},
514+
],
507515
},
508516
*braf_normalizer_mappings,
509517
],
@@ -732,11 +740,14 @@ def civic_gid19():
732740
"mappings": [
733741
{
734742
"coding": {
735-
"id": "ncbigene:1956",
736-
"code": "1956",
743+
"code": "ncbigene:1956",
737744
"system": "https://www.ncbi.nlm.nih.gov/gene/",
738745
},
739-
"relation": "exactMatch",
746+
"relation": "relatedMatch",
747+
"extensions": [
748+
*get_vicc_normalizer_ext(is_priority=False),
749+
{"name": "civic_annotation", "value": True},
750+
],
740751
},
741752
{
742753
"coding": {
@@ -1568,10 +1579,13 @@ def civic_gid29():
15681579
{
15691580
"coding": {
15701581
"system": "https://www.ncbi.nlm.nih.gov/gene/",
1571-
"id": "ncbigene:3815",
1572-
"code": "3815",
1582+
"code": "ncbigene:3815",
15731583
},
1574-
"relation": "exactMatch",
1584+
"relation": "relatedMatch",
1585+
"extensions": [
1586+
*get_vicc_normalizer_ext(is_priority=False),
1587+
{"name": "civic_annotation", "value": True},
1588+
],
15751589
},
15761590
{
15771591
"coding": {
@@ -1781,8 +1795,16 @@ def moa_abl1():
17811795
"system": "https://www.genenames.org",
17821796
},
17831797
"relation": "exactMatch",
1784-
"extensions": get_vicc_normalizer_priority_ext(is_priority=True),
1785-
}
1798+
"extensions": get_vicc_normalizer_ext(is_priority=True),
1799+
},
1800+
{
1801+
"coding": {
1802+
"code": "ncbigene:25",
1803+
"system": "https://www.ncbi.nlm.nih.gov/gene/",
1804+
},
1805+
"relation": "relatedMatch",
1806+
"extensions": get_vicc_normalizer_ext(is_priority=False),
1807+
},
17861808
],
17871809
}
17881810

tests/unit/transformers/test_civic_transformer_diagnostic.py

Lines changed: 17 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,11 @@
44

55
import pytest
66
import pytest_asyncio
7-
from tests.conftest import TEST_TRANSFORMERS_DIR, get_vicc_normalizer_priority_ext
7+
from tests.conftest import (
8+
TEST_TRANSFORMERS_DIR,
9+
get_vicc_normalizer_ext,
10+
get_vicc_normalizer_priority_ext,
11+
)
812

913
from metakb.transformers.civic import CivicTransformer
1014

@@ -184,11 +188,14 @@ def civic_gid38():
184188
"mappings": [
185189
{
186190
"coding": {
187-
"id": "ncbigene:5156",
188-
"code": "5156",
191+
"code": "ncbigene:5156",
189192
"system": "https://www.ncbi.nlm.nih.gov/gene/",
190193
},
191-
"relation": "exactMatch",
194+
"relation": "relatedMatch",
195+
"extensions": [
196+
*get_vicc_normalizer_ext(is_priority=False),
197+
{"name": "civic_annotation", "value": True},
198+
],
192199
},
193200
{
194201
"coding": {
@@ -460,11 +467,14 @@ def civic_gid42():
460467
"mappings": [
461468
{
462469
"coding": {
463-
"id": "ncbigene:5979",
464-
"code": "5979",
470+
"code": "ncbigene:5979",
465471
"system": "https://www.ncbi.nlm.nih.gov/gene/",
466472
},
467-
"relation": "exactMatch",
473+
"relation": "relatedMatch",
474+
"extensions": [
475+
*get_vicc_normalizer_ext(is_priority=False),
476+
{"name": "civic_annotation", "value": True},
477+
],
468478
},
469479
{
470480
"coding": {

tests/unit/transformers/test_moa_transformer_prognostic.py

Lines changed: 21 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,11 @@
44

55
import pytest
66
import pytest_asyncio
7-
from tests.conftest import TEST_TRANSFORMERS_DIR, get_vicc_normalizer_priority_ext
7+
from tests.conftest import (
8+
TEST_TRANSFORMERS_DIR,
9+
get_vicc_normalizer_ext,
10+
get_vicc_normalizer_priority_ext,
11+
)
812

913
from metakb.transformers.moa import MoaTransformer
1014

@@ -162,6 +166,14 @@ def moa_bcor():
162166
"relation": "exactMatch",
163167
"extensions": get_vicc_normalizer_priority_ext(is_priority=True),
164168
},
169+
{
170+
"coding": {
171+
"code": "ncbigene:54880",
172+
"system": "https://www.ncbi.nlm.nih.gov/gene/",
173+
},
174+
"relation": "relatedMatch",
175+
"extensions": get_vicc_normalizer_ext(is_priority=False),
176+
},
165177
],
166178
}
167179

@@ -329,6 +341,14 @@ def moa_sf3b1():
329341
"relation": "exactMatch",
330342
"extensions": get_vicc_normalizer_priority_ext(is_priority=True),
331343
},
344+
{
345+
"coding": {
346+
"code": "ncbigene:23451",
347+
"system": "https://www.ncbi.nlm.nih.gov/gene/",
348+
},
349+
"relation": "relatedMatch",
350+
"extensions": get_vicc_normalizer_ext(is_priority=False),
351+
},
332352
],
333353
}
334354

0 commit comments

Comments
 (0)