Skip to content

Commit

Permalink
feat: implement query functions for bridge term ids (#240)
Browse files Browse the repository at this point in the history
  • Loading branch information
nayib-jose-gloria authored Oct 31, 2024
1 parent 956c9da commit c808bd5
Show file tree
Hide file tree
Showing 19 changed files with 229 additions and 16 deletions.
2 changes: 1 addition & 1 deletion api/python/ontology-assets-version.txt
Original file line number Diff line number Diff line change
@@ -1 +1 @@
2d8e0e937424cb3ee4c65d8b2cf2fa9fd1f5e1db
c44a803df9554ef534839ad490545b6f236be61c
61 changes: 61 additions & 0 deletions api/python/src/cellxgene_ontology_guide/ontology_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -664,3 +664,64 @@ def get_term_id_by_label(self, term_label: str, ontology_name: str) -> Optional[
"""
ontology_term_label_to_id_map = self.get_term_label_to_id_map(ontology_name)
return ontology_term_label_to_id_map.get(term_label)

def get_bridge_term_id(self, term_id: str, cross_ontology: str) -> Optional[str]:
"""
For a given term ID, fetch the equivalent term ID from a given ontology. Only returns exact match if it exists.
If no applicable match is found, returns None.
Raises ValueError if term ID or cross_ontology are not valid member of a supported ontology.
Example
>>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
>>> ontology_parser = OntologyParser()
>>> ontology_parser.get_bridge_term_id("FBbt:00000001", "UBERON")
'UBERON:0000468'
:param term_id: str ontology term to find equivalent term for
:param cross_ontology: str name of ontology to search for equivalent term in
:return: Optional[str] equivalent term ID from the cross_ontology
"""
if cross_ontology not in self.cxg_schema.cross_ontology_mappings:
raise ValueError(
f"{cross_ontology} is not in the set of supported cross ontology mappings "
f"{self.cxg_schema.cross_ontology_mappings}."
)
ontology_name = self._parse_ontology_name(term_id)
cross_ontology_terms = self.cxg_schema.ontology(ontology_name)[term_id].get("cross_ontology_terms")
bridge_term_id: Optional[str] = None
if cross_ontology_terms:
bridge_term_id = cross_ontology_terms.get(cross_ontology)
return bridge_term_id

def get_closest_bridge_term_ids(self, term_id: str, cross_ontology: str) -> List[str]:
"""
For a given term ID, fetch the equivalent term ID from a given ontology. If match is found,
returns a list of 1 with the exact match. If no exact match is found, traverses the ancestors
of the term for the closest match.
If no applicable match is found, returns an empty list.
If multiple ancestors of the same distance have matches, returns all possible closest matches.
Raises ValueError if term ID or cross_ontology are not valid member of a supported ontology.
Example
>>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
>>> ontology_parser = OntologyParser()
>>> ontology_parser.get_closest_bridge_term_ids("FBbt:00000039", "UBERON")
['UBERON:0000476', 'UBERON:0000920']
:param term_id: str ontology term to find closest term for
:param cross_ontology: str name of ontology to search for closest term in
:return: List[str] list of closest term IDs from the cross_ontology
"""
closest_bridge_terms: List[str] = []
terms_to_match = [term_id]
while terms_to_match and not closest_bridge_terms:
for term in terms_to_match:
if closest_bridge_term := self.get_bridge_term_id(term, cross_ontology):
closest_bridge_terms.append(closest_bridge_term)
terms_to_match = [parent for child in terms_to_match for parent in self.get_term_parents(child)]
return closest_bridge_terms
3 changes: 3 additions & 0 deletions api/python/src/cellxgene_ontology_guide/supported_versions.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,9 @@ def __init__(self, version: Optional[str] = None):
for ontology, info in self.supported_ontologies.items()
for imported_ontology in info.get("additional_ontologies", [])
}
self.cross_ontology_mappings = {
ontology for ontology, info in self.supported_ontologies.items() if info.get("cross_ontology_mapping")
}
self.ontology_file_names: Dict[str, str] = {}
self.deprecated_on = ontology_info[_version].get("deprecated_on")
if self.deprecated_on:
Expand Down
115 changes: 113 additions & 2 deletions api/python/tests/test_ontology_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,31 +72,118 @@ def ontology_dict_with_imports():


@pytest.fixture
def mock_CXGSchema(ontology_dict, ontology_dict_with_imports, mock_load_supported_versions, mock_load_ontology_file):
def ontology_dict_with_cross_ontology_terms():
return {
# test cases: terms with exact matches + ancestors of terms without exact matches
"ZFA:0000000": {
"ancestors": {},
"cross_ontology_terms": {
"CL": "CL:0000000",
},
},
"ZFA:0000001": {
"ancestors": {
"ZFA:0000000": 1,
},
"cross_ontology_terms": {
"CL": "CL:0000001",
},
},
"ZFA:0000002": {
"ancestors": {
"ZFA:0000000": 1,
},
"cross_ontology_terms": {
"CL": "CL:0000002",
},
},
"ZFA:0000003": {
"ancestors": {
"ZFA:0000000": 1,
},
"cross_ontology_terms": {
"CL": "CL:0000003",
},
},
# test case: term with no exact term and multiple closest terms 1 edge away
"ZFA:0000004": {
"ancestors": {
"ZFA:0000001": 1,
"ZFA:0000002": 1,
"ZFA:0000000": 2,
},
},
# test case: term with no exact term and 1 closest term, 1 edge away
"ZFA:0000005": {
"ancestors": {
"ZFA:0000003": 1,
"ZFA:0000000": 2,
},
},
# test case: term with no exact term and multiple closest terms 2 edges away
"ZFA:0000006": {
"ancestors": {
"ZFA:0000004": 1,
"ZFA:0000005": 1,
"ZFA:0000001": 2,
"ZFA:0000002": 2,
"ZFA:0000003": 2,
"ZFA:0000000": 3,
},
},
# test case: term with no exact or closest term
"ZFA:0000007": {
"ancestors": {},
},
}


@pytest.fixture
def mock_CXGSchema(
ontology_dict,
ontology_dict_with_imports,
ontology_dict_with_cross_ontology_terms,
mock_load_supported_versions,
mock_load_ontology_file,
):
mock_load_supported_versions.return_value = {
"5.0.0": {
"ontologies": {
"CL": {"version": "2024-01-01", "source": "http://example.com", "filename": "cl.owl"},
"CL": {
"version": "2024-01-01",
"source": "http://example.com",
"filename": "cl.owl",
"cross_ontology_mapping": "cl.sssom",
},
"HANCESTRO": {
"version": "2024-01-01",
"source": "http://example.com",
"filename": "cl.owl",
"additional_ontologies": ["AfPO"],
},
"ZFA": {
"version": "2024-01-01",
"source": "http://example.com",
"filename": "zfa.owl",
"map_to": ["CL"],
},
}
}
}
cxg_schema = CXGSchema()
cxg_schema.ontology_file_names = {
"CL": "CL-ontology-2024-01-01.json.gz",
"HANCESTRO": "HANCESTRO-ontology-2024-01-01.json.gz",
"ZFA": "ZFA-ontology-2024-01-01.json.gz",
}

def get_mock_ontology_dict(file_name):
if "CL" in file_name:
return ontology_dict
if "HANCESTRO" in file_name:
return ontology_dict_with_imports
if "ZFA" in file_name:
return ontology_dict_with_cross_ontology_terms
return None

mock_load_ontology_file.side_effect = get_mock_ontology_dict
Expand Down Expand Up @@ -584,3 +671,27 @@ def test_get_term_id_by_label(ontology_parser, label, ontology_name, expected):
def test_get_term_id_by_label__unsupported_ontology_name(ontology_parser):
with pytest.raises(ValueError):
ontology_parser.get_term_id_by_label("gene A", "GO")


@pytest.mark.parametrize("term_id,expected", [("ZFA:0000000", "CL:0000000"), ("ZFA:0000004", None)])
def test_get_bridge_term_id(ontology_parser, term_id, expected):
assert ontology_parser.get_bridge_term_id(term_id, "CL") == expected


def test_get_bridge_term_id__unsupported_cross_ontology(ontology_parser):
with pytest.raises(ValueError):
ontology_parser.get_bridge_term_id("ZFA:0000000", "HANCESTRO")


@pytest.mark.parametrize(
"term_id,expected",
[
("ZFA:0000007", []),
("ZFA:0000006", ["CL:0000001", "CL:0000002", "CL:0000003"]),
("ZFA:0000005", ["CL:0000003"]),
("ZFA:0000004", ["CL:0000001", "CL:0000002"]),
("ZFA:0000000", ["CL:0000000"]),
],
)
def test_get_closest_bridge_term_ids(ontology_parser, term_id, expected):
assert ontology_parser.get_closest_bridge_term_ids(term_id, "CL") == expected
8 changes: 7 additions & 1 deletion api/python/tests/test_supported_versions.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,12 @@ def ontology_info_content():
return {
"5.0.0": {
"ontologies": {
"CL": {"version": "v2024-01-01", "source": "http://example.com", "filename": "cl.owl"},
"CL": {
"version": "v2024-01-01",
"source": "http://example.com",
"filename": "cl.owl",
"cross_ontology_mapping": "cl.sssom",
},
"HANCESTRO": {
"version": "v2024-01-01",
"source": "http://example.com",
Expand Down Expand Up @@ -94,6 +99,7 @@ def test__init__defaults(self, ontology_info_content, initialized_CXGSchemaInfo)
assert initialized_CXGSchemaInfo.version == "5.0.0"
assert initialized_CXGSchemaInfo.supported_ontologies == ontology_info_content["5.0.0"]["ontologies"]
assert initialized_CXGSchemaInfo.imported_ontologies == {"FOO": "HANCESTRO", "OOF": "HANCESTRO"}
assert initialized_CXGSchemaInfo.cross_ontology_mappings == {"CL"}

@pytest.mark.parametrize("version", ["v0.0.1", "0.0.1"])
def test__init__specific_version(self, version, mock_load_supported_versions):
Expand Down
Binary file modified ontology-assets/CL-ontology-v2024-08-16.json.gz
Binary file not shown.
Binary file modified ontology-assets/EFO-ontology-v3.69.0.json.gz
Binary file not shown.
Binary file modified ontology-assets/FBbt-ontology-v2024-10-17.json.gz
Binary file not shown.
Binary file modified ontology-assets/FBdv-ontology-v2024-10-17.json.gz
Binary file not shown.
Binary file modified ontology-assets/HANCESTRO-ontology-3.0.json.gz
Binary file not shown.
Binary file modified ontology-assets/HsapDv-ontology-v2024-05-28.json.gz
Binary file not shown.
Binary file modified ontology-assets/MONDO-ontology-v2024-08-06.json.gz
Binary file not shown.
Binary file modified ontology-assets/MmusDv-ontology-v2024-05-28.json.gz
Binary file not shown.
Binary file modified ontology-assets/NCBITaxon-ontology-v2023-06-20.json.gz
Binary file not shown.
Binary file modified ontology-assets/PATO-ontology-v2023-05-18.json.gz
Binary file not shown.
Binary file modified ontology-assets/UBERON-ontology-v2024-08-07.json.gz
Binary file not shown.
Binary file modified ontology-assets/ZFA-ontology-v2022-12-09.json.gz
Binary file not shown.
5 changes: 2 additions & 3 deletions tools/ontology-builder/src/all_ontology_generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -253,9 +253,8 @@ def _extract_ontology_term_metadata(
# no current use-case for NCBITaxon
term_dict[term_id]["ancestors"] = {} if onto.name == "NCBITaxon" else ancestors

term_dict[term_id]["cross_ontology_terms"] = _extract_cross_ontology_terms(
term_id, map_to_cross_ontologies, cross_ontology_map
)
if cross_ontology_terms := _extract_cross_ontology_terms(term_id, map_to_cross_ontologies, cross_ontology_map):
term_dict[term_id]["cross_ontology_terms"] = cross_ontology_terms

term_dict[term_id]["label"] = onto_term.label[0] if onto_term.label else ""

Expand Down
51 changes: 42 additions & 9 deletions tools/ontology-builder/tests/test_all_ontology_generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,9 @@
import pytest
from all_ontology_generator import (
_download_ontologies,
_extract_cross_ontology_terms,
_extract_ontology_term_metadata,
_load_cross_ontology_map,
_parse_ontologies,
deprecate_previous_cellxgene_schema_versions,
get_ontology_info_file,
Expand All @@ -23,6 +25,7 @@ def mock_ontology_info():
"source": "http://example.com",
"version": "v1",
"filename": "ontology_name.owl",
"cross_ontology_mapping": "ontology_name.sssom.tsv",
}
}

Expand All @@ -43,6 +46,10 @@ def mock_raw_ontology_dir(tmpdir):
sub_dir = tmpdir.mkdir(sub_dir_name)
onto_owl_file = tmpdir.join(sub_dir_name, "ontology_name.owl")
onto_owl_file.write("")
cross_onto_tsv_file = tmpdir.join(sub_dir_name, "ontology_name.sssom.tsv")
cross_onto_tsv_file.write(
"""subject_id\tsubject_label\tpredicate_id\tobject_id\tobject_label\nFOO:000002\tTest Term\tsemapv:crossSpeciesExactMatch\tOOF:000002\ttest match term"""
)
return str(sub_dir)


Expand Down Expand Up @@ -76,20 +83,24 @@ def test_download_ontologies(mock_ontology_info, mock_raw_ontology_dir):
# Call the function
_download_ontologies(ontology_info=mock_ontology_info, output_dir=mock_raw_ontology_dir)

mock_urlretrieve.assert_called_once()
assert mock_urlretrieve.call_count == len(os.listdir(mock_raw_ontology_dir))


def test_parse_ontologies(mock_ontology_info, mock_raw_ontology_dir, tmpdir):
# Mocking _load_ontology_object and _extract_ontology_term_metadata
with (
patch("all_ontology_generator._load_ontology_object") as mock_load_ontology,
patch("all_ontology_generator._load_cross_ontology_map") as mock_load_cross_ontology_map,
patch("all_ontology_generator._extract_ontology_term_metadata") as mock_extract_metadata,
patch("all_ontology_generator._extract_cross_ontology_terms") as mock_extract_cross_ontology_terms,
):
# Mock return values
MockOntologyObject = MagicMock()
MockOntologyObject.name = "ontology_name" # Must match the name of the ontology file
mock_load_ontology.return_value = MockOntologyObject
mock_extract_metadata.return_value = {"term_id": {"label": "Term Label", "deprecated": False, "ancestors": {}}}
mock_load_cross_ontology_map.return_value = {}
mock_extract_cross_ontology_terms.return_value = {}

# Mock output path
output_path = tmpdir.mkdir("output")
Expand All @@ -98,17 +109,23 @@ def test_parse_ontologies(mock_ontology_info, mock_raw_ontology_dir, tmpdir):
ontology_info=mock_ontology_info, working_dir=mock_raw_ontology_dir, output_path=output_path
)

num_cross_ontology_files = 1
num_ontologies = len(os.listdir(mock_raw_ontology_dir)) - num_cross_ontology_files

# Assert the output file is created
assert all(os.path.isfile(file) for file in output_files)

# Assert output_path has the same number of files as mock_raw_ontology_dir.
assert len(os.listdir(output_path)) == len(os.listdir(mock_raw_ontology_dir))
# Assert output_path has the same number of files as mock_raw_ontology_dir, minus the cross_ontology files
assert len(os.listdir(output_path)) == num_ontologies

# Assert _load_ontology_object is called for each ontology file, minus the cross_ontology files
assert mock_load_ontology.call_count == num_ontologies

# Assert _load_ontology_object is called for each ontology file
assert mock_load_ontology.call_count == len(os.listdir(mock_raw_ontology_dir))
# Assert _extract_ontology_term_metadata is called for each ontology object, minus the cross_ontology files
assert mock_extract_metadata.call_count == num_ontologies

# Assert _extract_ontology_term_metadata is called for each ontology object
assert mock_extract_metadata.call_count == len(os.listdir(mock_raw_ontology_dir))
# Assert _load_cross_ontology_map is called once, no matter how many cross_ontology files
assert mock_load_cross_ontology_map.call_count == 1


def test_download_ontologies_http_error(mock_ontology_info, mock_raw_ontology_dir):
Expand Down Expand Up @@ -277,7 +294,9 @@ class FOO_000004(OOF_000002, FOO_000003):

def test_extract_ontology_term_metadata(sample_ontology):
allowed_ontologies = ["FOO"]
result = _extract_ontology_term_metadata(sample_ontology, allowed_ontologies)
result = _extract_ontology_term_metadata(
sample_ontology, allowed_ontologies, map_to_cross_ontologies=[], cross_ontology_map={}
)

expected_result = {
"FOO:000001": {
Expand Down Expand Up @@ -312,7 +331,9 @@ def test_extract_ontology_term_metadata(sample_ontology):

def test_extract_ontology_term_metadata_multiple_allowed_ontologies(sample_ontology):
allowed_ontologies = ["FOO", "OOF"]
result = _extract_ontology_term_metadata(sample_ontology, allowed_ontologies)
result = _extract_ontology_term_metadata(
sample_ontology, allowed_ontologies, map_to_cross_ontologies=[], cross_ontology_map={}
)

expected_result = {
"FOO:000001": {
Expand Down Expand Up @@ -353,3 +374,15 @@ def test_extract_ontology_term_metadata_multiple_allowed_ontologies(sample_ontol
}

assert result == expected_result


def test_extract_cross_ontology_terms(mock_raw_ontology_dir, mock_ontology_info):
cross_ontology_map = _load_cross_ontology_map(mock_raw_ontology_dir, mock_ontology_info)

assert cross_ontology_map == {"ontology_name": {"OOF:000002": "FOO:000002"}}

result = _extract_cross_ontology_terms("OOF:000002", ["ontology_name"], cross_ontology_map)

expected_result = {"ontology_name": "FOO:000002"}

assert result == expected_result

0 comments on commit c808bd5

Please sign in to comment.