Skip to content

Commit 9e75445

Browse files
authored
Merge pull request #77 from linkml/enrich
Schema enricher and Docker config
2 parents be6d7f0 + c0295e4 commit 9e75445

File tree

10 files changed

+190
-10
lines changed

10 files changed

+190
-10
lines changed

Dockerfile

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
# set base image (host OS)
2+
FROM python:3.9
3+
4+
# https://stackoverflow.com/questions/53835198/integrating-python-poetry-with-docker
5+
ENV YOUR_ENV=${YOUR_ENV} \
6+
PYTHONFAULTHANDLER=1 \
7+
PYTHONUNBUFFERED=1 \
8+
PYTHONHASHSEED=random \
9+
PIP_NO_CACHE_DIR=off \
10+
PIP_DISABLE_PIP_VERSION_CHECK=on \
11+
PIP_DEFAULT_TIMEOUT=100 \
12+
POETRY_VERSION=1.1.13
13+
14+
# System deps:
15+
RUN pip install "poetry==$POETRY_VERSION"
16+
17+
# set the working directory in the container
18+
WORKDIR /work
19+
20+
RUN pip install schema-automator
21+
22+
#COPY poetry.lock pyproject.toml /code/
23+
24+
# Project initialization:
25+
#RUN poetry install
26+
27+
28+
# command to run on container start
29+
CMD [ "bash" ]

Makefile

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,10 @@
1+
VERSION = $(shell git tag | tail -1)
2+
13
.PHONY: all clean test
24

35
all: clean test target/soil_meanings.yaml
46

7+
58
clean:
69
rm -rf target/soil_meanings.yaml
710
rm -rf target/soil_meanings_generated.yaml
@@ -63,3 +66,41 @@ target/availabilities_g_s_strain_202112151116_org_meanings_curated.yaml: target/
6366
# this can be used outside the poetry environment
6467
bin/schemauto:
6568
echo `poetry run which schemauto` '"$$@"' > $@ && chmod +x $@
69+
70+
71+
################################################
72+
#### Commands for building the Docker image ####
73+
################################################
74+
75+
IM=linkml/schema-automator
76+
77+
docker-build-no-cache:
78+
@docker build --no-cache -t $(IM):$(VERSION) . \
79+
&& docker tag $(IM):$(VERSION) $(IM):latest
80+
81+
docker-build:
82+
@docker build -t $(IM):$(VERSION) . \
83+
&& docker tag $(IM):$(VERSION) $(IM):latest
84+
85+
docker-build-use-cache-dev:
86+
@docker build -t $(DEV):$(VERSION) . \
87+
&& docker tag $(DEV):$(VERSION) $(DEV):latest
88+
89+
docker-clean:
90+
docker kill $(IM) || echo not running ;
91+
docker rm $(IM) || echo not made
92+
93+
docker-publish-no-build:
94+
@docker push $(IM):$(VERSION) \
95+
&& docker push $(IM):latest
96+
97+
docker-publish-dev-no-build:
98+
@docker push $(DEV):$(VERSION) \
99+
&& docker push $(DEV):latest
100+
101+
docker-publish: docker-build
102+
@docker push $(IM):$(VERSION) \
103+
&& docker push $(IM):latest
104+
105+
docker-run:
106+
@docker run -v $(PWD):/work -w /work -ti $(IM):$(VERSION)

docs/conf.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,8 +20,8 @@
2020
# -- Project information -----------------------------------------------------
2121

2222
project = 'Schema Automator'
23-
copyright = '2022, Chris Mungall'
24-
author = 'Chris Mungall, Harshad Hegde'
23+
copyright = '2022, LinkML Developers'
24+
author = 'Chris Mungall, Harshad Hegde, Mark Miller'
2525

2626
# The full version, including alpha/beta/rc tags
2727
# release = '0.1.4'

docs/index.rst

Lines changed: 18 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,29 @@
11
LinkML Schema Automator
22
============================================
33

4-
Schema Automator is a toolkit for bootstrapping and automatically enhancing LinkML schemas from a variety of sources
4+
Schema Automator is a toolkit for bootstrapping and automatically enhancing schemas from a variety of sources.
5+
6+
Use cases include:
7+
8+
1. Inferring an initial schema or data dictionary from a dataset that is a collection of TSVs
9+
2. Automatically annotating schema elements and enumerations using the BioPortal annotator
10+
3. Importing from a language like RDFS/OWL
11+
12+
The primary output of Schema Automator is a `LinkML Schema <https://linkml.io/linkml>`_. This can be converted to other
13+
schema frameworks, including:
14+
15+
* JSON-Schema
16+
* SQL DDL
17+
* SHACL
18+
* ShEx
19+
* RDFS/OWL
20+
* Python dataclasses or Pydantic
521

622
.. toctree::
723
:maxdepth: 3
824
:caption: Contents:
925

26+
index
1027
introduction
1128
install
1229
cli

docs/install.rst

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,7 @@
11
Installation
2+
======
3+
4+
Direct Installation
25
------------
36

47
``schema-automator`` and its components require Python 3.9 or greater.
@@ -13,3 +16,19 @@ To check this works:
1316
1417
schemauto --help
1518
19+
Running via Docker
20+
------------
21+
22+
You can use the `Schema Automator Docker Container <https://hub.docker.com/r/linkml/schema-automator>`_
23+
24+
To start a shell
25+
26+
.. code:: bash
27+
28+
docker run -v $PWD:/work -w /work -ti linkml/schema-automator
29+
30+
Within the shell you should see all your files, and you should have access:
31+
32+
.. code:: bash
33+
34+
schemauto --help

docs/introduction.rst

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,6 @@
1-
LinkML Schema Automator
1+
.. _introduction:
2+
3+
Introduction
24
=======================
35

46
This is a toolkit that assists with generating and enhancing schemas and data models from a variety
@@ -17,17 +19,17 @@ See :ref:`generalizers`
1719

1820
Generalizers allow you to *bootstrap* a schema by generalizing from existing data files
1921

20-
- TSVs and spreadsheets
21-
- SQLite databases
22-
- RDF instance graphs
22+
* TSVs and spreadsheets
23+
* SQLite databases
24+
* RDF instance graphs
2325

24-
Importing from alternative modeling framework
26+
Importing from alternative modeling frameworks
2527
---------------------------------
2628

2729
See :ref:`importers`
2830

29-
- OWL (but this only works for schema-style OWL)
30-
- JSON-Schema
31+
* OWL (but this only works for schema-style OWL)
32+
* JSON-Schema
3133

3234
In future other frameworks will be supported
3335

schema_automator/annotators/schema_annotator.py

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -90,6 +90,25 @@ def annotate_schema(self, schema: Union[SchemaDefinition, str], curie_only=True)
9090

9191
return sv.schema
9292

93+
def enrich(self, schema: Union[SchemaDefinition, str]) -> SchemaDefinition:
94+
sv = SchemaView(schema)
95+
oi = self.ontology_implementation
96+
for elt_name, elt in sv.all_elements().items():
97+
curies = [sv.get_uri(elt)]
98+
for rel, ms in sv.get_mappings().items():
99+
curies += ms
100+
for x in curies:
101+
print(f"X={x}")
102+
if elt.description:
103+
break
104+
try:
105+
defn = oi.get_definition_by_curie(x)
106+
if defn:
107+
elt.description = defn
108+
except Exception:
109+
pass
110+
return sv.schema
111+
93112

94113
@click.command()
95114
@click.argument('schema')

schema_automator/cli.py

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -245,6 +245,23 @@ def annotate_schema(schema: str, input: str, output: str, curie_only: bool, **ar
245245
write_schema(schema, output)
246246

247247

248+
@main.command()
249+
@click.argument('schema')
250+
@click.option('--input', '-i', help="OAK input ontology selector")
251+
@output_option
252+
def enrich_schema(schema: str, input: str, output: str, **args):
253+
"""
254+
Annotate all elements of a schema
255+
256+
Requires Bioportal API key
257+
"""
258+
impl = get_implementation_from_shorthand(input)
259+
logging.basicConfig(level=logging.INFO)
260+
annr = SchemaAnnotator(impl)
261+
schema = annr.enrich(schema)
262+
write_schema(schema, output)
263+
264+
248265
@main.command()
249266
@click.argument('schema')
250267
@output_option

tests/resources/so-mini.obo

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
[Term]
2+
id: SO:0000704
3+
name: gene
4+
def: "A region (or regions) that includes all of the sequence elements necessary to encode a functional transcript. A gene may include regulatory regions, transcribed regions and/or other functional sequence regions." []
Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
# -*- coding: utf-8 -*-
2+
3+
import logging
4+
import os
5+
import unittest
6+
from linkml.utils.schema_builder import SchemaBuilder
7+
from linkml_runtime.dumpers import yaml_dumper
8+
from linkml_runtime.linkml_model import SchemaDefinition, EnumDefinition, PermissibleValue
9+
from oaklib.implementations import BioportalImplementation
10+
from oaklib.selector import get_implementation_from_shorthand
11+
12+
from schema_automator.annotators.schema_annotator import SchemaAnnotator
13+
from linkml.generators.yamlgen import YAMLGenerator
14+
from tests import INPUT_DIR, OUTPUT_DIR
15+
16+
17+
class SchemaEnricherTestCase(unittest.TestCase):
18+
19+
def setUp(self) -> None:
20+
impl = get_implementation_from_shorthand(os.path.join(INPUT_DIR, "so-mini.obo"))
21+
self.annotator = SchemaAnnotator(impl)
22+
23+
def test_enrich(self):
24+
s = SchemaDefinition(id='test', name='test')
25+
sb = SchemaBuilder(s)
26+
sb.add_class('Gene', class_uri="SO:0000704").add_slot('part_of')
27+
s = self.annotator.enrich(sb.schema)
28+
#print(yaml_dumper.dumps(s))
29+
assert s.classes['Gene'].description.startswith("A region")
30+
31+
32+

0 commit comments

Comments
 (0)