diff --git a/metakb/database.py b/metakb/database.py index b70ba308..6093bbee 100644 --- a/metakb/database.py +++ b/metakb/database.py @@ -203,6 +203,15 @@ def _add_descriptor(tx, descriptor: Dict, added_ids: Set[str]): 'description', 'xrefs', 'alternate_labels')) + if descr_type == 'TherapyDescriptor': + # handle extensions field in therapy descriptor + extensions = descriptor.get('extensions', []) + for ext in extensions: + name = ext['name'] + if name == 'regulatory_approval': + descriptor[name] = json.dumps(ext['value']) + descr_keys += f", {name}:${name}" + query = f''' MERGE (descr:{descr_type} {{ {descr_keys} }}) MERGE (value:{value_type} {{ id:${value_id} }}) diff --git a/metakb/normalizers.py b/metakb/normalizers.py index 150f00cb..5e8e8eef 100644 --- a/metakb/normalizers.py +++ b/metakb/normalizers.py @@ -1,11 +1,11 @@ """Module for VICC normalizers.""" -from typing import Optional, Tuple +from typing import List, Optional, Tuple from ga4gh.vrsatile.pydantic.vrs_models import VRSTypes -from ga4gh.vrsatile.pydantic.vrsatile_models import VariationDescriptor +from ga4gh.vrsatile.pydantic.vrsatile_models import VariationDescriptor, Extension from variation.query import QueryHandler as VariationQueryHandler from therapy.query import QueryHandler as TherapyQueryHandler -from therapy.schemas import NormalizationService as NormalizedTherapy +from therapy.schemas import NormalizationService as NormalizedTherapy, ApprovalRating from disease.query import QueryHandler as DiseaseQueryHandler from disease.schemas import NormalizationService as NormalizedDisease from gene.query import QueryHandler as GeneQueryHandler @@ -133,3 +133,65 @@ def normalize_therapy(self, queries)\ if highest_match == 100: break return therapy_norm_resp, normalized_therapy_id + + @staticmethod + def get_regulatory_approval_extension(therapy_norm_resp: NormalizedTherapy) -> List: + """Given therapy normalization service response, extract out the regulatory + approval extension + + :param NormalizedTherapy therapy_norm_resp: Response from normalizing therapy + :return: List containing regulatory approval extension if it exists + """ + therapy_norm_resp = therapy_norm_resp.dict() + tn_resp_exts = therapy_norm_resp.get("therapy_descriptor", {}).get("extensions") + tn_resp_exts = tn_resp_exts if tn_resp_exts else [] + regulatory_approval_extension = list() + + for ext in tn_resp_exts: + if ext["name"] == "regulatory_approval": + ext_value = ext["value"] + approval_ratings = ext_value.get("approval_ratings", []) + matched_ext_value = None + + if any(ar in [ApprovalRating.FDA_PRESCRIPTION, ApprovalRating.FDA_OTC] + for ar in approval_ratings): + matched_ext_value = "FDA" + if ApprovalRating.FDA_DISCONTINUED in approval_ratings: + if ApprovalRating.CHEMBL_4 not in approval_ratings: + matched_ext_value = None + elif ApprovalRating.CHEMBL_4 in approval_ratings: + matched_ext_value = "chembl_phase_4" + + if matched_ext_value: + has_indications = ext_value.get("has_indication", []) + matched_indications = list() + + for indication in has_indications: + indication_exts = indication.get("extensions", []) + for indication_ext in indication_exts: + if indication_ext["value"] == matched_ext_value: + matched_indications.append({ + "id": indication["id"], + "type": indication["type"], + "label": indication["label"], + "disease_id": indication["disease_id"] + }) + + if matched_ext_value == "FDA": + approval_rating = "FDA" + else: + approval_rating = "ChEMBL" + + regulatory_approval_extension.append( + Extension( + name="regulatory_approval", + value={ + "approval_rating": approval_rating, + "has_indications": matched_indications + } + ) + ) + + break + + return regulatory_approval_extension diff --git a/metakb/query.py b/metakb/query.py index db1b8a9d..ae2e9dd5 100644 --- a/metakb/query.py +++ b/metakb/query.py @@ -836,9 +836,17 @@ def _get_therapy_descriptor( "label": therapy_descriptor.get("label"), "therapy_id": None, "alternate_labels": therapy_descriptor.get("alternate_labels"), - "xrefs": therapy_descriptor.get("xrefs") + "xrefs": therapy_descriptor.get("xrefs"), + "extensions": [] } + key = "regulatory_approval" + val = therapy_descriptor.get(key) + if val: + td_params["extensions"].append(Extension(name=key, value=json.loads(val))) + else: + del td_params["extensions"] + with self.driver.session() as session: value_object = session.read_transaction( self._find_descriptor_value_object, td_params["id"] diff --git a/metakb/transform/civic.py b/metakb/transform/civic.py index 98a235d2..964a1ac1 100644 --- a/metakb/transform/civic.py +++ b/metakb/transform/civic.py @@ -679,7 +679,7 @@ def _get_therapy_descriptor(self, drug) \ ncit_id = f"ncit:{drug['ncit_id']}" queries = [ncit_id, label] - _, normalized_therapy_id = \ + therapy_norm_resp, normalized_therapy_id = \ self.vicc_normalizers.normalize_therapy(queries) if not normalized_therapy_id: @@ -687,13 +687,17 @@ def _get_therapy_descriptor(self, drug) \ f"using queries {ncit_id} and {label}") return None + regulatory_approval_extension = \ + self.vicc_normalizers.get_regulatory_approval_extension(therapy_norm_resp) # noqa: E501 + therapy_descriptor = ValueObjectDescriptor( id=therapy_id, type="TherapyDescriptor", label=label, therapy_id=normalized_therapy_id, alternate_labels=drug['aliases'], - xrefs=[ncit_id] + xrefs=[ncit_id], + extensions=regulatory_approval_extension if regulatory_approval_extension else None # noqa: E501 ).dict(exclude_none=True) return therapy_descriptor diff --git a/metakb/transform/moa.py b/metakb/transform/moa.py index b0fd38a8..dcca545a 100644 --- a/metakb/transform/moa.py +++ b/metakb/transform/moa.py @@ -381,12 +381,15 @@ def _get_therapy_descriptors(self, assertion): return [] if normalized_therapy_id: + regulatory_approval_extension = \ + self.vicc_normalizers.get_regulatory_approval_extension(therapy_norm_resp) # noqa: E501 therapy_descriptor = ValueObjectDescriptor( id=f"{schemas.SourceName.MOA.value}." f"{therapy_norm_resp.therapy_descriptor.id}", type="TherapyDescriptor", label=label, - therapy_id=normalized_therapy_id + therapy_id=normalized_therapy_id, + extensions=regulatory_approval_extension if regulatory_approval_extension else None # noqa: E501 ).dict(exclude_none=True) else: return [] diff --git a/tests/conftest.py b/tests/conftest.py index 5a500898..15210b7e 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -173,6 +173,29 @@ def civic_tid146(): ], "xrefs": [ "ncit:C66940" + ], + "extensions": [ + { + "type": "Extension", + "name": "regulatory_approval", + "value": { + "approval_rating": "FDA", + "has_indications": [ + { + "id": "hemonc:25316", + "type": "DiseaseDescriptor", + "label": "Non-small cell lung cancer Squamous", + "disease_id": None + }, + { + "id": "hemonc:642", + "type": "DiseaseDescriptor", + "label": "Non-small cell lung cancer", + "disease_id": "ncit:C2926" + } + ] + } + } ] } @@ -1192,7 +1215,58 @@ def moa_imatinib(): "id": "moa.normalize.therapy:Imatinib", "type": "TherapyDescriptor", "label": "Imatinib", - "therapy_id": "rxcui:282388" + "therapy_id": "rxcui:282388", + "extensions": [{ + "type": "Extension", + "name": "regulatory_approval", + "value": { + "approval_rating": "FDA", + "has_indications": [ + { + "id": "hemonc:634", + "type": "DiseaseDescriptor", + "label": "Myelodysplastic syndrome", + "disease_id": "ncit:C3247" + }, + { + "id": "hemonc:616", + "type": "DiseaseDescriptor", + "label": "Hypereosinophilic syndrome", + "disease_id": "ncit:C27038" + }, + { + "id": "hemonc:582", + "type": "DiseaseDescriptor", + "label": "Chronic myelogenous leukemia", + "disease_id": "ncit:C3174" + }, + { + "id": "hemonc:669", + "type": "DiseaseDescriptor", + "label": "Systemic mastocytosis", + "disease_id": "ncit:C9235" + }, + { + "id": "hemonc:24309", + "type": "DiseaseDescriptor", + "label": "Acute lymphoblastic leukemia", + "disease_id": "ncit:C3167" + }, + { + "id": "hemonc:667", + "type": "DiseaseDescriptor", + "label": "Soft tissue sarcoma", + "disease_id": "ncit:C9306" + }, + { + "id": "hemonc:602", + "type": "DiseaseDescriptor", + "label": "Gastrointestinal stromal tumor", + "disease_id": "ncit:C3868" + } + ] + } + }] }