Skip to content

Commit

Permalink
Merge pull request #290 from geneontology/json-export
Browse files Browse the repository at this point in the history
Json Export
  • Loading branch information
tmushayahama authored Nov 30, 2022
2 parents 794461f + aad573f commit ab1dd2c
Show file tree
Hide file tree
Showing 19 changed files with 9,824 additions and 582 deletions.
60 changes: 60 additions & 0 deletions .github/workflows/gen-project-linkml.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
# Built from:
# https://docs.github.com/en/actions/guides/building-and-testing-python
# https://github.com/snok/install-poetry#workflows-and-tips

name: Build and test LinkML output of GO domain/range constraints as JSON

on: [pull_request]

jobs:
test:

runs-on: ubuntu-latest
strategy:
matrix:
python-version: ["3.9", "3.10"]

steps:

#----------------------------------------------
# check-out repo and set-up python
#----------------------------------------------
- name: Check out repository
uses: actions/checkout@v2

- name: Set up Python ${{ "{{" }} matrix.python-version {{ "}}" }}
uses: actions/setup-python@v2
with:
python-version: ${{ "{{" }} matrix.python-version {{ "}}" }}

#----------------------------------------------
# install & configure poetry
#----------------------------------------------
- name: Install Poetry
uses: snok/install-poetry@v1.3

#----------------------------------------------
# install dependencies if cache does not exist
#----------------------------------------------
- name: Install dependencies
run: poetry install --no-interaction --no-root

#----------------------------------------------
# install your root project, if required
#----------------------------------------------
- name: Install library
run: poetry install --no-interaction

#----------------------------------------------
# regerate LinkML artifacts
#----------------------------------------------
- name: Regenerate LinkML artifacts
run: |
make clean-artifacts
make -B gen-artifacts
if [ $? -eq 0 ]; then
echo "LinkML artifacts generated successfully"
else
echo "LinkML artifacts generation failed"
exit 1
fi
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ python/tests/__pycache__/
java/.idea/
.DS_Store

.venv/
*.log
scala/target/*
scala/test/*
Expand Down
15 changes: 15 additions & 0 deletions python/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -41,4 +41,19 @@ p-%: tests/data/p-%.ttl
f-%: tests/data/f-%.ttl
python ./gocam_validator.py $< && exit -1 || echo FAILED AS EXPECTED

gen-python:
rm -f shex_json_linkml.py && gen-python schema/shex_json_linkml.yaml > shex_json_linkml.py

#gen-pydantic:
# rm -f shex_json_linkml.py && gen-pydantic schema/shex_json_linkml.yaml > shex_json_linkml.py

gen-jsonschema:
rm -f target/jsonschema/shex_json_linkml.json && gen-json-schema schema/shex_json_linkml.yaml > target/jsonschema/shex_json_linkml.json

gen-typescript:
rm -f target/typescript/shex_json_linkml.ts && gen-typescript schema/shex_json_linkml.yaml > target/typescript/shex_json_linkml.ts

linkml: gen-python gen-jsonschema gen-typescript


.PRECIOUS: tests/data/%.ttl
167 changes: 167 additions & 0 deletions python/json_export.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,167 @@
from os import path
import json
from ontobio.rdfgen.assoc_rdfgen import prefix_context
from prefixcommons.curie_util import contract_uri
from pyshexc.parser_impl import generate_shexj
from typing import Optional, List, Union
from ShExJSG.ShExJ import Shape, ShapeAnd, ShapeOr, ShapeNot, TripleConstraint, shapeExpr, \
shapeExprLabel, tripleExpr, tripleExprLabel, OneOf, EachOf, Annotation
from pyshex import PrefixLibrary
import requests
from shex_json_linkml import Association, AssociationCollection
from linkml_runtime.dumpers import JSONDumper
from linkml_runtime.loaders import JSONLoader
from pathlib import Path
import os

OUT_JSON = os.path.join('../shapes/json/shex_dump.json')


def get_suffix(uri):
suffix = contract_uri(uri, cmaps=[prefix_context])
if len(suffix) > 0:
return suffix[0]

return path.basename(uri)


class NoctuaFormShex:
def __init__(self, shex_text):
self.exclude_ext_pred = 'http://purl.obolibrary.org/obo/go/shapes/exclude_from_extensions'
self.json_shapes = []

self.shex = generate_shexj.parse(shex_text)
pref = PrefixLibrary(shex_text)
self.pref_dict = {
k: get_suffix(str(v)) for (k, v) in dict(pref).items()
if str(v).startswith('http://purl.obolibrary.org/obo/')}
# remove this filter and make sure that it works because it needs to be
del self.pref_dict['OBO']
# working for every shape.

def get_shape_name(self, uri, clean=False):
name = path.basename(uri).upper()
if '/go/' in uri:
name = 'GO' + name
return self.pref_dict.get(name, None if clean else uri)

def gen_lookup_table(self):
goApi = 'http://api.geneontology.org/api/ontology/term/'
table = list()
for k, v in self.pref_dict.items():
resp = requests.get(goApi+v)
term = resp.json()
table.append({
'id': term['goid'],
'label': term['label'],
'definition': term.get('definition', ""),
'comment': term.get('comment', ""),
'synonyms': term.get('synonyms', "")
})
return table

def _load_expr(self, subject: str, expr: Optional[Union[shapeExprLabel, shapeExpr]], preds=None) -> List:

if preds is None:
preds = {}
if isinstance(expr, str) and isinstance(preds, list):
preds.append(self.get_shape_name(expr))
if isinstance(expr, (ShapeOr, ShapeAnd)):
for expr2 in expr.shapeExprs:
self._load_expr(subject, expr2, preds)
elif isinstance(expr, ShapeNot):
self._load_expr(subject, expr.shapeExpr, preds)
elif isinstance(expr, Shape) and expr.expression is not None:
self._load_triple_expr(subject, expr.expression, preds)

# throw an error here if pred list is empty
return preds

def _load_triple_expr(self, subject: str, expr: Union[tripleExpr, tripleExprLabel], preds=None) -> None:

if isinstance(expr, (OneOf, EachOf)):
for expr2 in expr.expressions:
self._load_triple_expr(subject, expr2, preds)
elif isinstance(expr, TripleConstraint) and expr.valueExpr is not None:
predicate = get_suffix(expr.predicate)

if predicate not in self.pref_dict.values():
return preds

objects = []
self._load_expr(subject, expr.valueExpr, objects)

exclude_from_extensions = ""
if isinstance(expr.annotations, list):
exclude_from_extensions = self._load_annotation(
expr, self.exclude_ext_pred)

is_multivalued = False
if expr.max is not None and expr.max == -1:
is_multivalued = True

goshape = Association(
subject=subject,
object=objects,
predicate=predicate,
is_multivalued=is_multivalued,
is_required=False,
context=""
)
if exclude_from_extensions != "":
goshape.exclude_from_extensions = exclude_from_extensions,
self.json_shapes.append(goshape)

return preds

def _load_annotation(self, expr: Union[tripleExpr, tripleExprLabel], annotation_key):
for annotation in expr.annotations:
if isinstance(annotation, Annotation) and annotation.predicate == annotation_key:
return True if annotation.object.value == "true" else False

return False

def parse_raw(self):
return json.loads(self.shex._as_json_dumps())

def parse(self):
shapes = self.shex.shapes

for shape in shapes:
shape_name = self.get_shape_name(shape['id'], True)

if shape_name is None:
continue

print('Parsing Shape: ' + shape['id'])

shexps = shape.shapeExprs or []

for expr in shexps:
self._load_expr(shape_name, expr)


if __name__ == "__main__":

base_path = Path(__file__).parent
shex_fp = (base_path / "../shapes/go-cam-shapes.shex").resolve()
json_shapes_fp = (base_path / "../shapes/json/shex_dump.json").resolve()
look_table_fp = (base_path / "../shapes/json/look_table.json").resolve()
shex_full_fp = (base_path / "../shapes/json/shex_full.json").resolve()

with open(shex_fp) as f:
shex_text = f.read()

nfShex = NoctuaFormShex(shex_text)
nfShex.parse()

with open(json_shapes_fp, "w") as sf:
jd = JSONDumper()
coll = AssociationCollection(goshapes=nfShex.json_shapes)
jd.dump(coll, to_file=OUT_JSON)

""" with open(look_table_fp, "w") as sf:
json.dump(nfShex.gen_lookup_table(), sf, indent=2) """

with open(shex_full_fp, "w") as sf:
json.dump(nfShex.parse_raw(), sf, indent=2)
Loading

0 comments on commit ab1dd2c

Please sign in to comment.