Skip to content

Commit 7e4353d

Browse files
authored
Add ClinicalTrials.gov source (#307)
This PR adds an initial ClinicalTrials.gov source
1 parent 0d0098b commit 7e4353d

File tree

3 files changed

+161
-0
lines changed

3 files changed

+161
-0
lines changed

pyproject.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -84,6 +84,7 @@ dependencies = [
8484
"drugbank_downloader",
8585
"chembl_downloader",
8686
"umls_downloader>=0.1.3",
87+
"clinicaltrials_downloader>=0.0.2",
8788
"typing_extensions",
8889
"rdflib",
8990
]

src/pyobo/sources/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
from .cgnc import CGNCGetter
99
from .chembl import ChEMBLCompoundGetter
1010
from .civic_gene import CIVICGeneGetter
11+
from .clinicaltrials import ClinicalTrialsGetter
1112
from .complexportal import ComplexPortalGetter
1213
from .conso import CONSOGetter
1314
from .cpt import CPTGetter
@@ -70,6 +71,7 @@
7071
"CPTGetter",
7172
"CVXGetter",
7273
"ChEMBLCompoundGetter",
74+
"ClinicalTrialsGetter",
7375
"ComplexPortalGetter",
7476
"CreditGetter",
7577
"DepMapGetter",

src/pyobo/sources/clinicaltrials.py

Lines changed: 158 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,158 @@
1+
"""A source for ClinicalTrials.gov."""
2+
3+
from collections.abc import Iterable
4+
5+
from clinicaltrials_downloader import get_studies_slim
6+
7+
from pyobo import Obo, Reference, Term, TypeDef, default_reference
8+
from pyobo.struct.struct import CHARLIE_TERM, HUMAN_TERM, PYOBO_INJECTED
9+
from pyobo.struct.typedef import has_contributor
10+
11+
__all__ = [
12+
"ClinicalTrialsGetter",
13+
]
14+
15+
PREFIX = "clinicaltrials"
16+
17+
INVESTIGATES_CONDITION = TypeDef(
18+
reference=Reference(
19+
prefix=PREFIX, identifier="investigates_condition", name="investigates condition"
20+
),
21+
is_metadata_tag=True,
22+
)
23+
HAS_INTERVENTION = TypeDef(
24+
reference=Reference(prefix=PREFIX, identifier="has_intervention", name="has intervention"),
25+
is_metadata_tag=True,
26+
)
27+
28+
STUDY_TERM = Term(reference=default_reference(PREFIX, "study", name="study"))
29+
30+
CLINICAL_TRIAL_TERM = Term(
31+
reference=default_reference(PREFIX, "clinical-trial", name="clinical trial")
32+
).append_parent(STUDY_TERM)
33+
34+
INTERVENTIONAL_CLINICAL_TRIAL_TERM = Term(
35+
reference=default_reference(
36+
PREFIX, "interventional-clinical-trial", name="interventional clinical trial"
37+
)
38+
).append_parent(CLINICAL_TRIAL_TERM)
39+
40+
RANDOMIZED_INTERVENTIONAL_CLINICAL_TRIAL_TERM = Term(
41+
reference=default_reference(
42+
PREFIX,
43+
"randomized-interventional-clinical-trial",
44+
name="randomized interventional clinical trial",
45+
)
46+
).append_parent(INTERVENTIONAL_CLINICAL_TRIAL_TERM)
47+
48+
NON_RANDOMIZED_INTERVENTIONAL_CLINICAL_TRIAL_TERM = Term(
49+
reference=default_reference(
50+
PREFIX,
51+
"non-randomized-interventional-clinical-trial",
52+
name="non-randomized interventional clinical trial",
53+
)
54+
).append_parent(INTERVENTIONAL_CLINICAL_TRIAL_TERM)
55+
56+
OBSERVATIONAL_CLINICAL_TRIAL_TERM = Term(
57+
reference=default_reference(
58+
PREFIX, "observational-clinical-trial", name="observational clinical trial"
59+
)
60+
).append_parent(CLINICAL_TRIAL_TERM)
61+
62+
EXPANDED_ACCESS_STUDY_TERM = Term(
63+
reference=default_reference(PREFIX, "expanded-access-study", name="expanded access study")
64+
).append_parent(STUDY_TERM)
65+
66+
TERMS = [
67+
STUDY_TERM,
68+
CLINICAL_TRIAL_TERM,
69+
OBSERVATIONAL_CLINICAL_TRIAL_TERM,
70+
INTERVENTIONAL_CLINICAL_TRIAL_TERM,
71+
EXPANDED_ACCESS_STUDY_TERM,
72+
RANDOMIZED_INTERVENTIONAL_CLINICAL_TRIAL_TERM,
73+
NON_RANDOMIZED_INTERVENTIONAL_CLINICAL_TRIAL_TERM,
74+
]
75+
76+
# These were identified as the 4 possibilities for study
77+
# types in ClinicalTrials.gov. See summary script at
78+
# https://gist.github.com/cthoyt/12a3cb3c63ad68d73fe5a2f0d506526f
79+
PARENTS: dict[tuple[str | None, str | None], Term] = {
80+
("INTERVENTIONAL", None): INTERVENTIONAL_CLINICAL_TRIAL_TERM,
81+
("INTERVENTIONAL", "NA"): INTERVENTIONAL_CLINICAL_TRIAL_TERM,
82+
("INTERVENTIONAL", "RANDOMIZED"): RANDOMIZED_INTERVENTIONAL_CLINICAL_TRIAL_TERM,
83+
("INTERVENTIONAL", "NON_RANDOMIZED"): NON_RANDOMIZED_INTERVENTIONAL_CLINICAL_TRIAL_TERM,
84+
("OBSERVATIONAL", None): OBSERVATIONAL_CLINICAL_TRIAL_TERM,
85+
("EXPANDED_ACCESS", None): EXPANDED_ACCESS_STUDY_TERM,
86+
(None, None): STUDY_TERM,
87+
}
88+
89+
90+
class ClinicalTrialsGetter(Obo):
91+
"""Get the ClinicalTrials.gov database as an ontology."""
92+
93+
ontology = PREFIX
94+
dynamic_version = True
95+
typedefs = [has_contributor, INVESTIGATES_CONDITION, HAS_INTERVENTION]
96+
root_terms = [STUDY_TERM.reference]
97+
98+
def iter_terms(self, force: bool = False) -> Iterable[Term]:
99+
"""Iterate over terms for studies."""
100+
yield CHARLIE_TERM
101+
yield HUMAN_TERM
102+
for term in TERMS:
103+
term.append_contributor(CHARLIE_TERM)
104+
term.append_comment(PYOBO_INJECTED)
105+
yield term
106+
yield from iterate_studies()
107+
108+
109+
def iterate_studies(*, force: bool = False) -> Iterable[Term]:
110+
"""Iterate over terms for studies."""
111+
studies = get_studies_slim(force=force)
112+
for study in studies:
113+
yield _process_study(study)
114+
115+
116+
def _process_study(raw_study) -> Term:
117+
protocol_section = raw_study["protocolSection"]
118+
identification_module = protocol_section["identificationModule"]
119+
identifier = identification_module["nctId"]
120+
121+
name = identification_module.get("officialTitle")
122+
synonym = identification_module.get("briefTitle")
123+
if synonym and not name:
124+
name, synonym = synonym, None
125+
126+
term = Term(
127+
reference=Reference(prefix=PREFIX, identifier=identifier, name=name), type="Instance"
128+
)
129+
if synonym:
130+
term.append_synonym(synonym)
131+
132+
design_module = protocol_section.get("designModule", {})
133+
study_type = design_module.get("studyType")
134+
allocation = design_module.get("designInfo", {}).get("allocation")
135+
term.append_parent(PARENTS[study_type, allocation])
136+
137+
references_module = protocol_section.get("referencesModule", {})
138+
for reference in references_module.get("references", []):
139+
if pubmed_id := reference.get("pmid"):
140+
term.append_see_also(Reference(prefix="pubmed", identifier=pubmed_id))
141+
142+
derived_section = raw_study["derivedSection"]
143+
for mesh_record in derived_section.get("conditionBrowseModule", {}).get("meshes", []):
144+
term.annotate_object(INVESTIGATES_CONDITION, _mesh(mesh_record))
145+
146+
for mesh_record in derived_section.get("interventionBrowseModule", {}).get("meshes", []):
147+
term.annotate_object(HAS_INTERVENTION, _mesh(mesh_record))
148+
return term
149+
150+
151+
def _mesh(mesh_record: dict[str, str]) -> Reference:
152+
return Reference(
153+
prefix="mesh", identifier=mesh_record["id"], name=mesh_record.get("term") or None
154+
)
155+
156+
157+
if __name__ == "__main__":
158+
ClinicalTrialsGetter.cli()

0 commit comments

Comments
 (0)