Skip to content

Commit

Permalink
wip: add test for transformer
Browse files Browse the repository at this point in the history
  • Loading branch information
korikuzma committed Jan 29, 2025
1 parent f11dc65 commit 879dbe1
Show file tree
Hide file tree
Showing 3 changed files with 281 additions and 29 deletions.
3 changes: 2 additions & 1 deletion src/metakb/transformers/civic.py
Original file line number Diff line number Diff line change
Expand Up @@ -836,7 +836,8 @@ def _add_genes(self, genes: list[dict]) -> None:
gene_norm_resp, normalized_gene_id = self.vicc_normalizers.normalize_gene(
queries
)
if not gene_norm_resp:

if not normalized_gene_id:
_logger.debug(
"Gene Normalizer unable to normalize: %s using queries %s",
gene_id,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -39,20 +39,20 @@
"type": "disease"
},
"source": {
"id": 1725,
"name": "PubMed: Dungo et al., 2013",
"title": "Afatinib: first global approval.",
"citation": "Dungo et al., 2013",
"citation_id": "23982599",
"id": 123456789,
"name": "PubMed: Fake name",
"title": "My fake civic source",
"citation": "John Doe et al., 2022",
"citation_id": "123456789",
"source_type": "PUBMED",
"abstract": "Afatinib, an irreversible inhibitor of the ErbB family of tyrosine kinases, is under development with Boehringer Ingelheim for the once-daily, oral treatment of cancer. Afatinib downregulates ErbB signalling by covalently binding to epidermal growth factor receptor (EGFR), human epidermal growth factor receptor (HER) 2 and HER4, irreversibly inhibiting tyrosine kinase autophosphorylation. It also inhibits transphosphorylation of HER3. Oral afatinib (Gilotrif\u2122) has been approved in the US for the first-line treatment of patients with metastatic non-small-cell lung cancer (NSCLC) who have tumours with EGFR exon 19 deletions or exon 21 (L858R) substitution mutations as detected by a US FDA-approved test. Afatinib has also been approved in Taiwan for the first-line treatment of patients with EGFR mutation-positive NSCLC. In addition, the European Medicines Agency's Committee for Medicinal Products for Human Use has recommended the approval of afatinib (Giotrif\u00ae) for the treatment of patients with locally advanced or metastatic NSCLC with activating EGFR mutations who are EGFR tyrosine kinase inhibitor na\u00efve. Afatinib is also under regulatory review in Canada, Japan and other Asian countries. This article summarizes the milestones in the development of afatinib, leading to this first approval in patients with metastatic NSCLC.",
"abstract": "A really great abstract",
"asco_abstract_id": null,
"author_string": "Rosselle T Dungo, Gillian M Keating",
"author_string": "John Doe",
"full_journal_title": "Drugs",
"journal": "Drugs",
"pmc_id": null,
"publication_date": "2013-9",
"source_url": "http://www.ncbi.nlm.nih.gov/pubmed/23982599",
"publication_date": "2022-9",
"source_url": "http://www.ncbi.nlm.nih.gov/pubmed/123456789",
"clinical_trials": [],
"type": "source"
},
Expand All @@ -66,7 +66,7 @@
"id": 6,
"name": "BRCA1. This should fail normalization.",
"entrez_id": 0,
"description": "BRCA1 mutations in the germline have become a hallmark for hereditary breast and ovarian cancers. Variants that have been demonstrated to reduce the function of the protein have been shown to increase the risk for these cancers, as well as prostate and pancreatic cancer. These findings have been the impetus for the increased popularity of genetic testing of healthy individuals to assess risk. Recent studies in ovarian cancer have also demonstrated that BRCA mutation status can predict treatment response. A number of trials assessing BRCA mutation status have shown an improved response to platinum agents, and more recently has led to the FDA-approval of PARP inhibitors for BRCA-positive ovarian cancers. These studies have resulted in the Society of Gynecologic Oncology to recommend germline BRCA testing in all patients with a diagnosis of ovarian cancer.",
"description": "This is a fake gene that fails normalization.",
"sources": [
{
"id": 11,
Expand Down
287 changes: 269 additions & 18 deletions tests/unit/transformers/test_civic_transformer_therapeutic.py
Original file line number Diff line number Diff line change
@@ -1,47 +1,59 @@
"""Test CIViC Transformation to common data model for Therapeutic Response."""

import json
from pathlib import Path

import pytest
import pytest_asyncio
from tests.conftest import TEST_TRANSFORMERS_DIR

from metakb.normalizers import ViccNormalizers
from metakb.transformers.civic import CivicTransformer

DATA_DIR = TEST_TRANSFORMERS_DIR / "therapeutic"
FILENAME = "civic_cdm.json"
NON_NORMALIZABLE_FILE_NAME = "civic_cdm2.json"
NORMALIZABLE_FILENAME = "civic_cdm.json"
NOT_NORMALIZABLE_FILE_NAME = "civic_cdm_normalization_failure.json"


@pytest_asyncio.fixture(scope="module")
async def normalizable_data(normalizers):
"""Create a CIViC Transformer test fixture."""
harvester_path = DATA_DIR / "civic_harvester.json"
async def _get_transformed_data(
harvester_path: Path, normalizers: ViccNormalizers, output_cdm_fn: str
) -> dict:
"""Get transformed data
:param harvester_path: Path to harvester file
:param normalizers: Vicc Normalizers
:param output_cdm_fn: Name of output CDM file
:return: Transformed data given harvester data
"""
c = CivicTransformer(
data_dir=DATA_DIR, harvester_path=harvester_path, normalizers=normalizers
)
harvested_data = c.extract_harvested_data()
await c.transform(harvested_data)
c.create_json(DATA_DIR / FILENAME)
with (DATA_DIR / FILENAME).open() as f:
c.create_json(DATA_DIR / output_cdm_fn)
with (DATA_DIR / output_cdm_fn).open() as f:
return json.load(f)


@pytest_asyncio.fixture(scope="module")
async def normalizable_data(normalizers):
"""Create a CIViC Transformer test fixture."""
harvester_path = DATA_DIR / "civic_harvester.json"
return await _get_transformed_data(
harvester_path, normalizers, NORMALIZABLE_FILENAME
)


@pytest_asyncio.fixture(scope="module")
async def not_normalizable_data(normalizers):
"""Create a CIViC Transformer test fixture for data that cannot be normalized."""
# NOTE: This file was manually generated to create a fake evidence item
# However, it does include some actual civic records that fail to normalize
# Gene record was modified to fail
harvester_path = DATA_DIR / "civic_harvester_not_normalizable.json"
c = CivicTransformer(
data_dir=DATA_DIR, harvester_path=harvester_path, normalizers=normalizers
return await _get_transformed_data(
harvester_path, normalizers, NOT_NORMALIZABLE_FILE_NAME
)
harvested_data = c.extract_harvested_data()
await c.transform(harvested_data)
c.create_json(DATA_DIR / NON_NORMALIZABLE_FILE_NAME)
with (DATA_DIR / NON_NORMALIZABLE_FILE_NAME).open() as f:
return json.load(f)


@pytest.fixture(scope="module")
Expand All @@ -60,14 +72,253 @@ def statements(
]


@pytest.fixture(scope="module")
def civic_tid579():
"""Create test fixture for CIViC therapy ID 579"""
return {
"id": "civic.tid:579",
"conceptType": "Therapy",
"label": "FOLFOX Regimen",
"mappings": [
{
"coding": {
"id": "ncit:C11197",
"code": "C11197",
"system": "https://ncit.nci.nih.gov/ncitbrowser/ConceptReport.jsp?dictionary=NCI_Thesaurus&code=",
},
"relation": "exactMatch",
},
],
"extensions": [
{"name": "vicc_normalizer_failure", "value": True},
{
"name": "aliases",
"value": [
"CF/5-FU/L-OHP",
"FOLFOX",
"Fluorouracil/Leucovorin Calcium/Oxaliplatin",
],
},
],
}


@pytest.fixture(scope="module")
def civic_did3433():
"""Create test fixture for CIViC DID3433."""
return {
"id": "civic.did:3433",
"conceptType": "Disease",
"label": "B-lymphoblastic Leukemia/lymphoma With PAX5 P80R",
"extensions": [
{"name": "vicc_normalizer_failure", "value": True},
],
}


@pytest.fixture(scope="session")
def civic_gid6_modified():
"""Create test fixture for CIViC GID6, which has been modified to fail normalization."""
return {
"id": "civic.gid:6",
"conceptType": "Gene",
"label": "BRCA1. This should fail normalization.",
"mappings": [
{
"coding": {
"id": "ncbigene:0",
"code": "0",
"system": "https://www.ncbi.nlm.nih.gov/gene/",
},
"relation": "exactMatch",
},
],
"extensions": [
{"name": "vicc_normalizer_failure", "value": True},
{
"name": "description",
"value": "This is a fake gene that fails normalization.",
},
{
"name": "aliases",
"value": ["Fake alias 1", "Fake alias 2"],
},
],
}


@pytest.fixture(scope="module")
def civic_mpid473():
"""Create CIViC MPID 473"""
return {
"id": "civic.mpid:473",
"type": "CategoricalVariant",
"label": "BRCA1 P968FS",
"mappings": [
{
"coding": {
"code": "CA001889",
"system": "https://reg.clinicalgenome.org/redmine/projects/registry/genboree_registry/by_canonicalid?canonicalid=",
},
"relation": "relatedMatch",
},
{
"coding": {
"code": "91602",
"system": "https://www.ncbi.nlm.nih.gov/clinvar/variation/",
},
"relation": "relatedMatch",
},
{
"coding": {
"code": "rs398122670",
"system": "https://www.ncbi.nlm.nih.gov/snp/",
},
"relation": "relatedMatch",
},
{
"coding": {
"id": "civic.vid:477",
"code": "477",
"system": "https://civicdb.org/variants/",
},
"relation": "exactMatch",
},
],
"extensions": [
{"name": "vicc_normalizer_failure", "value": True},
{
"name": "aliases",
"value": [
"3021INSTC",
"PRO968LEUFS",
],
},
{
"name": "CIViC representative coordinate",
"value": {
"chromosome": "17",
"start": 41244645,
"stop": 41244646,
"variant_bases": "GA",
"representative_transcript": "ENST00000471181.2",
"ensembl_version": 75,
"reference_build": "GRCh37",
"type": "coordinates",
},
},
{
"name": "CIViC Molecular Profile Score",
"value": 20.0,
},
{
"name": "Variant types",
"value": [
{
"id": "SO:0001910",
"code": "SO:0001910",
"system": "http://www.sequenceontology.org/browser/current_svn/term/",
"label": "frameshift_truncation",
}
],
},
{
"name": "expressions",
"value": [
{"syntax": "hgvs.c", "value": "NM_007294.3:c.2902_2903insTC"},
{"syntax": "hgvs.p", "value": "NP_009225.1:p.Pro968Leufs"},
{
"syntax": "hgvs.g",
"value": "NC_000017.10:g.41244645_41244646insGA",
},
{"syntax": "hgvs.c", "value": "ENST00000471181.2:c.2902_2903insTC"},
],
},
],
}


@pytest.fixture(scope="session")
def civic_source123456789():
"""Create fixture for a fake civic source 123456789"""
return {
"id": "civic.source:123456789",
"label": "John Doe et al., 2022",
"title": "My fake civic source",
"pmid": 123456789,
"type": "Document",
}


@pytest.fixture(scope="module")
def civic_not_normalizable_stmt(
civic_tid579,
civic_did3433,
civic_gid6_modified,
civic_mpid473,
civic_method,
civic_source123456789,
):
"""Create test fixture for fake civic statement that fails to normalize gene,
variant, disease, and therapy.
"""
return {
"id": "civic.eid:123456789",
"type": "Statement",
"description": "This is a fake evidence item.",
"direction": "supports",
"strength": {
"primaryCode": "e000001",
"label": "authoritative evidence",
"mappings": [
{
"coding": {
"id": "vicc:e000001",
"system": "https://go.osu.edu/evidence-codes",
"label": "authoritative evidence",
"code": "e000001",
},
"relation": "exactMatch",
},
{
"coding": {
"id": "civic.evidence_level:A",
"system": "https://civic.readthedocs.io/en/latest/model/evidence/level.html",
"code": "A",
},
"relation": "exactMatch",
},
],
},
"proposition": {
"type": "VariantTherapeuticResponseProposition",
"predicate": "predictsSensitivityTo",
"objectTherapeutic": civic_tid579,
"conditionQualifier": civic_did3433,
"alleleOriginQualifier": {"label": "somatic"},
"geneContextQualifier": civic_gid6_modified,
"subjectVariant": civic_mpid473,
},
"specifiedBy": civic_method,
"reportedIn": [civic_source123456789],
}


def test_civic_cdm(normalizable_data, statements, check_transformed_cdm):
"""Test that civic transformation works correctly."""
check_transformed_cdm(normalizable_data, statements, DATA_DIR / FILENAME)
check_transformed_cdm(
normalizable_data, statements, DATA_DIR / NORMALIZABLE_FILENAME
)


def test_civic_cdm_not_normalizable(
not_normalizable_data, statements, check_transformed_cdm
not_normalizable_data, civic_not_normalizable_stmt, check_transformed_cdm
):
"""Test that civic transformation works correctly for CIViC records that cannot
normalize (gene, disease, variant, and therapy)
"""
check_transformed_cdm(
not_normalizable_data, statements, DATA_DIR / NON_NORMALIZABLE_FILE_NAME
not_normalizable_data,
[civic_not_normalizable_stmt],
DATA_DIR / NOT_NORMALIZABLE_FILE_NAME,
)

0 comments on commit 879dbe1

Please sign in to comment.