diff --git a/src/metakb/transformers/civic.py b/src/metakb/transformers/civic.py index 99e4e720..9d73d040 100644 --- a/src/metakb/transformers/civic.py +++ b/src/metakb/transformers/civic.py @@ -836,7 +836,8 @@ def _add_genes(self, genes: list[dict]) -> None: gene_norm_resp, normalized_gene_id = self.vicc_normalizers.normalize_gene( queries ) - if not gene_norm_resp: + + if not normalized_gene_id: _logger.debug( "Gene Normalizer unable to normalize: %s using queries %s", gene_id, diff --git a/tests/data/transformers/therapeutic/civic_harvester_not_normalizable.json b/tests/data/transformers/therapeutic/civic_harvester_not_normalizable.json index 63d740d8..e8ea15d3 100644 --- a/tests/data/transformers/therapeutic/civic_harvester_not_normalizable.json +++ b/tests/data/transformers/therapeutic/civic_harvester_not_normalizable.json @@ -39,20 +39,20 @@ "type": "disease" }, "source": { - "id": 1725, - "name": "PubMed: Dungo et al., 2013", - "title": "Afatinib: first global approval.", - "citation": "Dungo et al., 2013", - "citation_id": "23982599", + "id": 123456789, + "name": "PubMed: Fake name", + "title": "My fake civic source", + "citation": "John Doe et al., 2022", + "citation_id": "123456789", "source_type": "PUBMED", - "abstract": "Afatinib, an irreversible inhibitor of the ErbB family of tyrosine kinases, is under development with Boehringer Ingelheim for the once-daily, oral treatment of cancer. Afatinib downregulates ErbB signalling by covalently binding to epidermal growth factor receptor (EGFR), human epidermal growth factor receptor (HER) 2 and HER4, irreversibly inhibiting tyrosine kinase autophosphorylation. It also inhibits transphosphorylation of HER3. Oral afatinib (Gilotrif\u2122) has been approved in the US for the first-line treatment of patients with metastatic non-small-cell lung cancer (NSCLC) who have tumours with EGFR exon 19 deletions or exon 21 (L858R) substitution mutations as detected by a US FDA-approved test. Afatinib has also been approved in Taiwan for the first-line treatment of patients with EGFR mutation-positive NSCLC. In addition, the European Medicines Agency's Committee for Medicinal Products for Human Use has recommended the approval of afatinib (Giotrif\u00ae) for the treatment of patients with locally advanced or metastatic NSCLC with activating EGFR mutations who are EGFR tyrosine kinase inhibitor na\u00efve. Afatinib is also under regulatory review in Canada, Japan and other Asian countries. This article summarizes the milestones in the development of afatinib, leading to this first approval in patients with metastatic NSCLC.", + "abstract": "A really great abstract", "asco_abstract_id": null, - "author_string": "Rosselle T Dungo, Gillian M Keating", + "author_string": "John Doe", "full_journal_title": "Drugs", "journal": "Drugs", "pmc_id": null, - "publication_date": "2013-9", - "source_url": "http://www.ncbi.nlm.nih.gov/pubmed/23982599", + "publication_date": "2022-9", + "source_url": "http://www.ncbi.nlm.nih.gov/pubmed/123456789", "clinical_trials": [], "type": "source" }, @@ -66,7 +66,7 @@ "id": 6, "name": "BRCA1. This should fail normalization.", "entrez_id": 0, - "description": "BRCA1 mutations in the germline have become a hallmark for hereditary breast and ovarian cancers. Variants that have been demonstrated to reduce the function of the protein have been shown to increase the risk for these cancers, as well as prostate and pancreatic cancer. These findings have been the impetus for the increased popularity of genetic testing of healthy individuals to assess risk. Recent studies in ovarian cancer have also demonstrated that BRCA mutation status can predict treatment response. A number of trials assessing BRCA mutation status have shown an improved response to platinum agents, and more recently has led to the FDA-approval of PARP inhibitors for BRCA-positive ovarian cancers. These studies have resulted in the Society of Gynecologic Oncology to recommend germline BRCA testing in all patients with a diagnosis of ovarian cancer.", + "description": "This is a fake gene that fails normalization.", "sources": [ { "id": 11, diff --git a/tests/unit/transformers/test_civic_transformer_therapeutic.py b/tests/unit/transformers/test_civic_transformer_therapeutic.py index 3418522c..1b440f27 100644 --- a/tests/unit/transformers/test_civic_transformer_therapeutic.py +++ b/tests/unit/transformers/test_civic_transformer_therapeutic.py @@ -1,32 +1,49 @@ """Test CIViC Transformation to common data model for Therapeutic Response.""" import json +from pathlib import Path import pytest import pytest_asyncio from tests.conftest import TEST_TRANSFORMERS_DIR +from metakb.normalizers import ViccNormalizers from metakb.transformers.civic import CivicTransformer DATA_DIR = TEST_TRANSFORMERS_DIR / "therapeutic" -FILENAME = "civic_cdm.json" -NON_NORMALIZABLE_FILE_NAME = "civic_cdm2.json" +NORMALIZABLE_FILENAME = "civic_cdm.json" +NOT_NORMALIZABLE_FILE_NAME = "civic_cdm_normalization_failure.json" -@pytest_asyncio.fixture(scope="module") -async def normalizable_data(normalizers): - """Create a CIViC Transformer test fixture.""" - harvester_path = DATA_DIR / "civic_harvester.json" +async def _get_transformed_data( + harvester_path: Path, normalizers: ViccNormalizers, output_cdm_fn: str +) -> dict: + """Get transformed data + + :param harvester_path: Path to harvester file + :param normalizers: Vicc Normalizers + :param output_cdm_fn: Name of output CDM file + :return: Transformed data given harvester data + """ c = CivicTransformer( data_dir=DATA_DIR, harvester_path=harvester_path, normalizers=normalizers ) harvested_data = c.extract_harvested_data() await c.transform(harvested_data) - c.create_json(DATA_DIR / FILENAME) - with (DATA_DIR / FILENAME).open() as f: + c.create_json(DATA_DIR / output_cdm_fn) + with (DATA_DIR / output_cdm_fn).open() as f: return json.load(f) +@pytest_asyncio.fixture(scope="module") +async def normalizable_data(normalizers): + """Create a CIViC Transformer test fixture.""" + harvester_path = DATA_DIR / "civic_harvester.json" + return await _get_transformed_data( + harvester_path, normalizers, NORMALIZABLE_FILENAME + ) + + @pytest_asyncio.fixture(scope="module") async def not_normalizable_data(normalizers): """Create a CIViC Transformer test fixture for data that cannot be normalized.""" @@ -34,14 +51,9 @@ async def not_normalizable_data(normalizers): # However, it does include some actual civic records that fail to normalize # Gene record was modified to fail harvester_path = DATA_DIR / "civic_harvester_not_normalizable.json" - c = CivicTransformer( - data_dir=DATA_DIR, harvester_path=harvester_path, normalizers=normalizers + return await _get_transformed_data( + harvester_path, normalizers, NOT_NORMALIZABLE_FILE_NAME ) - harvested_data = c.extract_harvested_data() - await c.transform(harvested_data) - c.create_json(DATA_DIR / NON_NORMALIZABLE_FILE_NAME) - with (DATA_DIR / NON_NORMALIZABLE_FILE_NAME).open() as f: - return json.load(f) @pytest.fixture(scope="module") @@ -60,14 +72,253 @@ def statements( ] +@pytest.fixture(scope="module") +def civic_tid579(): + """Create test fixture for CIViC therapy ID 579""" + return { + "id": "civic.tid:579", + "conceptType": "Therapy", + "label": "FOLFOX Regimen", + "mappings": [ + { + "coding": { + "id": "ncit:C11197", + "code": "C11197", + "system": "https://ncit.nci.nih.gov/ncitbrowser/ConceptReport.jsp?dictionary=NCI_Thesaurus&code=", + }, + "relation": "exactMatch", + }, + ], + "extensions": [ + {"name": "vicc_normalizer_failure", "value": True}, + { + "name": "aliases", + "value": [ + "CF/5-FU/L-OHP", + "FOLFOX", + "Fluorouracil/Leucovorin Calcium/Oxaliplatin", + ], + }, + ], + } + + +@pytest.fixture(scope="module") +def civic_did3433(): + """Create test fixture for CIViC DID3433.""" + return { + "id": "civic.did:3433", + "conceptType": "Disease", + "label": "B-lymphoblastic Leukemia/lymphoma With PAX5 P80R", + "extensions": [ + {"name": "vicc_normalizer_failure", "value": True}, + ], + } + + +@pytest.fixture(scope="session") +def civic_gid6_modified(): + """Create test fixture for CIViC GID6, which has been modified to fail normalization.""" + return { + "id": "civic.gid:6", + "conceptType": "Gene", + "label": "BRCA1. This should fail normalization.", + "mappings": [ + { + "coding": { + "id": "ncbigene:0", + "code": "0", + "system": "https://www.ncbi.nlm.nih.gov/gene/", + }, + "relation": "exactMatch", + }, + ], + "extensions": [ + {"name": "vicc_normalizer_failure", "value": True}, + { + "name": "description", + "value": "This is a fake gene that fails normalization.", + }, + { + "name": "aliases", + "value": ["Fake alias 1", "Fake alias 2"], + }, + ], + } + + +@pytest.fixture(scope="module") +def civic_mpid473(): + """Create CIViC MPID 473""" + return { + "id": "civic.mpid:473", + "type": "CategoricalVariant", + "label": "BRCA1 P968FS", + "mappings": [ + { + "coding": { + "code": "CA001889", + "system": "https://reg.clinicalgenome.org/redmine/projects/registry/genboree_registry/by_canonicalid?canonicalid=", + }, + "relation": "relatedMatch", + }, + { + "coding": { + "code": "91602", + "system": "https://www.ncbi.nlm.nih.gov/clinvar/variation/", + }, + "relation": "relatedMatch", + }, + { + "coding": { + "code": "rs398122670", + "system": "https://www.ncbi.nlm.nih.gov/snp/", + }, + "relation": "relatedMatch", + }, + { + "coding": { + "id": "civic.vid:477", + "code": "477", + "system": "https://civicdb.org/variants/", + }, + "relation": "exactMatch", + }, + ], + "extensions": [ + {"name": "vicc_normalizer_failure", "value": True}, + { + "name": "aliases", + "value": [ + "3021INSTC", + "PRO968LEUFS", + ], + }, + { + "name": "CIViC representative coordinate", + "value": { + "chromosome": "17", + "start": 41244645, + "stop": 41244646, + "variant_bases": "GA", + "representative_transcript": "ENST00000471181.2", + "ensembl_version": 75, + "reference_build": "GRCh37", + "type": "coordinates", + }, + }, + { + "name": "CIViC Molecular Profile Score", + "value": 20.0, + }, + { + "name": "Variant types", + "value": [ + { + "id": "SO:0001910", + "code": "SO:0001910", + "system": "http://www.sequenceontology.org/browser/current_svn/term/", + "label": "frameshift_truncation", + } + ], + }, + { + "name": "expressions", + "value": [ + {"syntax": "hgvs.c", "value": "NM_007294.3:c.2902_2903insTC"}, + {"syntax": "hgvs.p", "value": "NP_009225.1:p.Pro968Leufs"}, + { + "syntax": "hgvs.g", + "value": "NC_000017.10:g.41244645_41244646insGA", + }, + {"syntax": "hgvs.c", "value": "ENST00000471181.2:c.2902_2903insTC"}, + ], + }, + ], + } + + +@pytest.fixture(scope="session") +def civic_source123456789(): + """Create fixture for a fake civic source 123456789""" + return { + "id": "civic.source:123456789", + "label": "John Doe et al., 2022", + "title": "My fake civic source", + "pmid": 123456789, + "type": "Document", + } + + +@pytest.fixture(scope="module") +def civic_not_normalizable_stmt( + civic_tid579, + civic_did3433, + civic_gid6_modified, + civic_mpid473, + civic_method, + civic_source123456789, +): + """Create test fixture for fake civic statement that fails to normalize gene, + variant, disease, and therapy. + """ + return { + "id": "civic.eid:123456789", + "type": "Statement", + "description": "This is a fake evidence item.", + "direction": "supports", + "strength": { + "primaryCode": "e000001", + "label": "authoritative evidence", + "mappings": [ + { + "coding": { + "id": "vicc:e000001", + "system": "https://go.osu.edu/evidence-codes", + "label": "authoritative evidence", + "code": "e000001", + }, + "relation": "exactMatch", + }, + { + "coding": { + "id": "civic.evidence_level:A", + "system": "https://civic.readthedocs.io/en/latest/model/evidence/level.html", + "code": "A", + }, + "relation": "exactMatch", + }, + ], + }, + "proposition": { + "type": "VariantTherapeuticResponseProposition", + "predicate": "predictsSensitivityTo", + "objectTherapeutic": civic_tid579, + "conditionQualifier": civic_did3433, + "alleleOriginQualifier": {"label": "somatic"}, + "geneContextQualifier": civic_gid6_modified, + "subjectVariant": civic_mpid473, + }, + "specifiedBy": civic_method, + "reportedIn": [civic_source123456789], + } + + def test_civic_cdm(normalizable_data, statements, check_transformed_cdm): """Test that civic transformation works correctly.""" - check_transformed_cdm(normalizable_data, statements, DATA_DIR / FILENAME) + check_transformed_cdm( + normalizable_data, statements, DATA_DIR / NORMALIZABLE_FILENAME + ) def test_civic_cdm_not_normalizable( - not_normalizable_data, statements, check_transformed_cdm + not_normalizable_data, civic_not_normalizable_stmt, check_transformed_cdm ): + """Test that civic transformation works correctly for CIViC records that cannot + normalize (gene, disease, variant, and therapy) + """ check_transformed_cdm( - not_normalizable_data, statements, DATA_DIR / NON_NORMALIZABLE_FILE_NAME + not_normalizable_data, + [civic_not_normalizable_stmt], + DATA_DIR / NOT_NORMALIZABLE_FILE_NAME, )