Skip to content

Commit

Permalink
Merge pull request #475 from biolink/issue-470-meta-property-propagation
Browse files Browse the repository at this point in the history
Issue#470 - fix propagation of 'deprecate' metadata flag
  • Loading branch information
RichardBruskiewich authored Nov 7, 2023
2 parents fca4aa5 + c132302 commit ada9516
Show file tree
Hide file tree
Showing 3 changed files with 148 additions and 7 deletions.
20 changes: 15 additions & 5 deletions kgx/source/obograph_source.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
import gzip
import tarfile
import typing
from itertools import chain
from typing import Optional, Tuple, Dict, Generator, Any
Expand Down Expand Up @@ -104,6 +103,9 @@ def read_node(self, node: Dict) -> Optional[Tuple[str, Dict]]:
curie = self.prefix_manager.contract(node["id"])
node_properties = {}
if "meta" in node:
# Returns a dictionary that contains 'description', 'subsets',
# 'synonym', 'xrefs', a 'deprecated' flag and/or
# 'equivalent_nodes', if the corresponding key values are set
node_properties = self.parse_meta(node["id"], node["meta"])

fixed_node = dict()
Expand All @@ -114,22 +116,30 @@ def read_node(self, node: Dict) -> Optional[Tuple[str, Dict]]:

if "description" in node_properties:
fixed_node["description"] = node_properties["description"]

if "subsets" in node_properties:
fixed_node["subsets"] = node_properties["subsets"]

if "synonym" in node_properties:
fixed_node["synonym"] = node_properties["synonym"]

if "xrefs" in node_properties:
fixed_node["xref"] = node_properties["xrefs"]
if "subsets" in node_properties:
fixed_node["subsets"] = node_properties["subsets"]

if "deprecated" in node_properties:
fixed_node["deprecated"] = node_properties["deprecated"]

if "category" not in node:
category = self.get_category(curie, node)
if category:
fixed_node["category"] = [category]
else:
fixed_node["category"] = ["biolink:OntologyClass"]

if "equivalent_nodes" in node_properties:
equivalent_nodes = node_properties["equivalent_nodes"]
fixed_node["same_as"] = equivalent_nodes

return super().read_node(fixed_node)

def read_edges(self, filename: str, compression: Optional[str] = None) -> Generator:
Expand Down Expand Up @@ -301,8 +311,8 @@ def parse_meta(self, node: str, meta: Dict) -> Dict:
Returns
-------
Dict
A dictionary that contains 'description', 'synonyms',
'xrefs', and 'equivalent_nodes'.
A dictionary that contains 'description', 'subsets',
'synonyms', 'xrefs', a 'deprecated' flag and/or 'equivalent_nodes'.
"""
# cross species links are in meta; this needs to be parsed properly too
Expand Down
53 changes: 53 additions & 0 deletions tests/resources/phenio.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
{
"graphs": [
{
"nodes": [
{
"id": "http://purl.obolibrary.org/obo/GO_0051370",
"lbl": "obsolete ZASP binding",
"type": "INDIVIDUAL",
"meta": {
"definition": {
"val": "OBSOLETE. Binding to Z-band alternatively spliced PDZ motif protein (ZASP). ZASP is a Z-band protein specifically expressed in heart and skeletal muscle. This protein contains N-terminal PDZ domain and C-terminal LIM domain.",
"xrefs": [
"PMID:10427098",
"PMID:11699871"
]
},
"comments": [
"This term was made obsolete because it represents binding to an individual protein."
],
"synonyms": [
{
"pred": "hasExactSynonym",
"val": "Z-band alternatively spliced PDZ-motif protein binding"
},
{
"pred": "hasExactSynonym",
"val": "ZASP binding"
}
],
"basicPropertyValues": [
{
"pred": "http://purl.obolibrary.org/obo/IAO_0100001",
"val": "GO:0008092"
},
{
"pred": "http://www.geneontology.org/formats/oboInOwl#hasOBONamespace",
"val": "molecular_function"
}
],
"deprecated": true
}
}
],
"edges": [],
"id": "http://purl.obolibrary.org/obo/phenio.owl",
"meta": {
"subsets": [],
"xrefs": [],
"basicPropertyValues": []
}
}
]
}
82 changes: 80 additions & 2 deletions tests/unit/test_source/test_obograph_source.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,10 @@

import pytest

from kgx.source import ObographSource
from kgx.cli import transform
from kgx.source import ObographSource, TsvSource
from kgx.transformer import Transformer
from tests import RESOURCE_DIR
from tests import RESOURCE_DIR, TARGET_DIR


def test_read_obograph1():
Expand Down Expand Up @@ -107,6 +108,52 @@ def test_read_jsonl2():
assert "GO slim generic" in n2["provided_by"]


def test_read_deprecated_term():
"""
Read from an PATO JSON using ObographSource,
to validate capture of "deprecate" status
"""
t = Transformer()
s = ObographSource(t)
g = s.parse(
os.path.join(RESOURCE_DIR, "pato.json"),
knowledge_source="Phenotype and Trait Ontology",
)
nodes = {}
for rec in g:
if rec:
if len(rec) != 4:
nodes[rec[0]] = rec[1]

n1 = nodes["PATO:0000000"]
assert n1["id"] == "PATO:0000000"
assert n1["name"] == "obsolete pato"
assert n1["deprecated"] is True


def test_read_deprecated_term_phenio():
"""
Read from a Phenio JSON using ObographSource,
to validate capture of "deprecate" status
"""
t = Transformer()
s = ObographSource(t)
g = s.parse(
os.path.join(RESOURCE_DIR, "phenio.json"),
knowledge_source="Phenomics Integrative Ontology",
)
nodes = {}
for rec in g:
if rec:
if len(rec) != 4:
nodes[rec[0]] = rec[1]

n1 = nodes["GO:0051370"]
assert n1["id"] == "GO:0051370"
assert n1["name"] == "obsolete ZASP binding"
assert n1["deprecated"] is True


@pytest.mark.parametrize(
"query",
[
Expand Down Expand Up @@ -205,3 +252,34 @@ def test_error_detection():
t.write_report(None, "Error")
if len(t.get_errors("Warning")) > 0:
t.write_report(None, "Warning")


def test_phenio_obojson_to_tsv():
"""
Testing transitive propagation of node properties
(mainly node 'deprecated' status)
from a Phenio JSON to TSV file format
"""
transform(
inputs=[os.path.join(RESOURCE_DIR, "phenio.json")],
input_format="obojson",
output=os.path.join(TARGET_DIR, "phenio"),
output_format="tsv",
stream=False
)

tin = Transformer()
s = TsvSource(tin)

g = s.parse(filename=os.path.join(TARGET_DIR, "phenio_nodes.tsv"), format="tsv")

nodes = {}
for rec in g:
if rec:
if len(rec) != 4:
nodes[rec[0]] = rec[1]

n1 = nodes["GO:0051370"]
assert n1["id"] == "GO:0051370"
assert n1["name"] == "obsolete ZASP binding"
assert n1["deprecated"] is True

0 comments on commit ada9516

Please sign in to comment.