Skip to content

Commit

Permalink
Merge branch 'master' into 979-dlite-validate-overwrites-file
Browse files Browse the repository at this point in the history
  • Loading branch information
jesper-friis authored Oct 28, 2024
2 parents 3c5b097 + 33c841f commit e325b2b
Show file tree
Hide file tree
Showing 13 changed files with 163 additions and 90 deletions.
11 changes: 11 additions & 0 deletions bindings/python/tests/input/test_ref_type_middle.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
http://onto-ns.com/meta/0.2/Middle:
description: Middle-level nested data structure.
dimensions: []
properties:
- name: name
type: string
description: Value of this structure.
- name: leaf
type: ref
$ref: http://onto-ns.com/meta/0.1/Leaf
description: Reference to low-level structure.
21 changes: 16 additions & 5 deletions bindings/python/tests/test_ref_type.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,24 @@
from pathlib import Path

import dlite
from dlite.testutils import importcheck

yaml = importcheck("yaml")


thisdir = Path(__file__).resolve().parent
indir = thisdir / "input"

dlite.storage_path.append(indir / "test_ref_type.json")
dlite.storage_path.append(indir)
dlite.storage_path.append(indir / "test_ref_type_middle.yaml")

# If yaml is available, we read Middle v0.2, which is defined in
# `test_ref_type_middle.yaml`. Otherwise, we read Middle v0.1, which
# is defined together with the other datamodels in `test_ref_type.json`.
version = "0.2" if yaml else "0.1"

Top = dlite.get_instance("http://onto-ns.com/meta/0.1/Top")
Middle = dlite.get_instance("http://onto-ns.com/meta/0.1/Middle")
Middle = dlite.get_instance(f"http://onto-ns.com/meta/{version}/Middle")
Leaf = dlite.get_instance("http://onto-ns.com/meta/0.1/Leaf")
Linked = dlite.get_instance("http://onto-ns.com/meta/0.1/Linked")
Tree = dlite.get_instance("http://onto-ns.com/meta/0.1/Tree")
Expand Down Expand Up @@ -78,6 +87,8 @@
assert cyclic.subtree[0].subtree[0] == cyclic
assert cyclic.subtree[0].subtree[0].subtree[0] == cyclic

# Instantiate nested from dict
# For issue #515
# middle = Middle(properties={"name": "nested", "leaf": {"a": 1, "b": True}})
# For isue #982: ref-type in yaml
assert Middle.getprop("leaf").ref == "http://onto-ns.com/meta/0.1/Leaf"

# For issue #515: Instantiate nested from dict
#middle = Middle(properties={"name": "nested", "leaf": {"a": 1, "b": True}})
12 changes: 11 additions & 1 deletion bindings/python/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -136,8 +136,16 @@ def instance_from_dict(d, id=None, single=None, check_storages=True):
if meta.is_metameta:
if "uri" in d:
uri = d["uri"]
else:
elif "identity" in d:
uri = d["identity"]
elif "name" in d and "version" in d and "namespace" in d:
uri = dlite.join_meta_uri(d["name"], d["version"], d["namespace"])
elif id and dlite.urlparse(id).scheme:
uri = id
else:
raise TypeError(
"`id` required for metadata when the URI is not in the dict"
)

if check_storages:
try:
Expand Down Expand Up @@ -169,6 +177,7 @@ def instance_from_dict(d, id=None, single=None, check_storages=True):
dlite.Property(
name=p["name"],
type=p["type"],
ref=p.get("$ref", p.get("ref")),
shape=p.get("shape", p.get("dims")),
unit=p.get("unit"),
description=p.get("description"),
Expand All @@ -180,6 +189,7 @@ def instance_from_dict(d, id=None, single=None, check_storages=True):
dlite.Property(
name=k,
type=v["type"],
ref=v.get("$ref", v.get("ref")),
shape=v.get("shape", v.get("dims")),
unit=v.get("unit"),
description=v.get("description"),
Expand Down
77 changes: 0 additions & 77 deletions doc/user_guide/concepts.md
Original file line number Diff line number Diff line change
Expand Up @@ -269,83 +269,6 @@ Relations are currently not explored in metadata, but are included because of
their generality.
However, relations are heavily used in [collections].


### Representing an entity
Lets start to make a "Person" entity, where we want to describe his/her name, age and skills.

```json
{
"uri": "http://onto-ns.com/meta/0.1/Person",
"meta": "http://onto-ns.com/meta/0.3/EntitySchema",
"description": "A person.",
"dimensions": [
{
"name": "N",
"description": "Number of skills."
}
],
"properties": [
{
"name": "name",
"type": "string",
"description": "Full name."
},
{
"name": "age",
"type": "float",
"unit": "years",
"description": "Age of person."
},
{
"name": "skills",
"type": "string",
"shape": ["N"],
"description": "List of skills."
}
]
}
```

First we have "uri" identifying the entity, "meta" telling that this is an instance of the entity schema (hence an entity) and a human description.
Then comes "dimensions".
In this case one dimension named "N", which is the number of skills the person has.
Finally we have the properties; "name", "age" and "skills".
We see that "name" is represented as a string, "age" as a floating point number with unit years and "skills" as an array of strings, one for each skill.


### SOFT7 representation
Based on input from [SOFT7], DLite also supports a slightly shortened representation of entities.
The "Person" entity from the above example will in this representation, look like:

```json
{
"uri": "http://onto-ns.com/meta/0.1/Person",
"description": "A person.",
"dimensions": {
"N": "Number of skills."
},
"properties": {
"name": {
"type": "string",
"description": "Full name."
},
"age": {
"type": "float",
"unit": "years",
"description": "Age of person."
},
"skills": {
"type": "string",
"shape": ["N"],
"description": "List of skills."
}
}
}
```

In this representation defaults the `meta` field to the entity schema if it is left out.
Dimensions and Properties are dictionaries (JSON objects) instead of arrays with the dimension or property name as key.

references
----------

Expand Down
73 changes: 73 additions & 0 deletions doc/user_guide/datamodels.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
Representing a datamodel (entity)
----------------------------------

The underlying structure of DLite datamodels are described under [concepts].

Here, at set of rules on how to create a datamodel is presented.

Note that several other possibilities are avilable, and this can be seen in the
examples and tests present in the repository.

We choose here to present only one method as mixing reprentation methods might
be confusing. Note, however that yaml and json representations are interchangable.

A generic example with some comments for clarity can be seen below.

```yaml
uri: http://namespace/version/name
description: A description of what this datamodel represents.
dimensions: # Named dimensions referred to in the property shapes. Simplest to represent it as a dict, set to {} if there are no dimensions
name_of_dimension: description of dimension
properties:
name_of_property1:
description: What is this property
type: ref # Can be any on string, float, double, int, ref ....
unit: unit # Can be ommitted if the property has no unit
shape: [name_of_dimension] # Can be omitted if the property is a scalar
$ref: http://namespace/version/name_of_referenceddatamodel # only if type is ref
```
The keywords in the datamodel have the following meaning:
* `uri`: A URI that uniquely identifies the datamodel.
* `description`: A human description that describes what this datamodel represents.
* `dimensions`: Dimensions of the properties (referred to by the property shape). Properties can have the same dimensions, but not necessarily. Each dimension is described by:
- name of the dimension
- a human description of the dimension
In the below example there is one dimension with name "N" and description "Number of skills."
* `properties`: Sub-parts of the datamodel that describe the individual data fields. A property has a name and is further specified by the following keywords:
- `description`: Human description of the property.
- `type`: Data type of the property. Ex: "blob5", "boolean", "uint", "int32", "string", "string10", "ref", ...
- `$ref`: Optional. URI of a sub-datamodel. Only used if type is "ref".
- `unit`: Optional. The unit. Ex: "kg", "km/h", ... Can be omitted if the property has no unit.
- `shape`: Optional. Describes the dimensionality of the property as a list of dimension names. Ex: `[N]`. Can be omitted if the property has no shape, i.e. the instance always has only one value. This is equivalent to a 0-dimensional array, i.e. shape=[].
The datamodel below has three properties; "name", "age" and "skills". We see that "name" is represented as a string, "age" as a floating point number with unit years and "skills" as an array of strings, one for each skill.


A slightly more realistic example is the "Person" entity, where we want to describe his/her name, age and skills:

```yaml
uri: http://onto-ns.com/meta/0.1/Person
description: A person.
dimensions:
N: Number of skills.
properties:
name:
description: Full name.
type: string
age:
description: Age of person.
type: float
unit: years
skills:
description: List of skills.
type: string
shape: [N]
```


dlite-validate
==============
The [dlite-validate tool][./tools.md#dlite_validate] can be used to check if a specific representation (in a file) is a valid DLite datamodel


[concepts]: https://sintef.github.io/dlite/user_guide/concepts.html
1 change: 1 addition & 0 deletions doc/user_guide/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ User Guide
:caption: Contents

concepts
datamodels
type-system
exceptions
collections
Expand Down
34 changes: 32 additions & 2 deletions doc/user_guide/storage_plugins.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
Storage plugins
===============
Storage plugins / Drivers
=========================

Content
-------
Expand Down Expand Up @@ -28,6 +28,36 @@ It also comes with a specific `Blob` and `Image` storage plugin, that can load a
Storage plugins can be written in either C or Python.


How to make storage plugins available
-------------------------------------

As described below it is possible (and most often advisable) to create specific drivers (storage plugins) for your data.
Additional storage plugins drivers can be made available by setting the environment variables
`DLITE_STORAGE_PLUGIN_DIRS` or `DLITE_PYTHON_STORAGE_PLUGIN_DIRS` e.g.:
```bash
export DLITE_STORAGE_PLUGIN_DIRS=/path/to/new/folder:$DLITE_STORAGE_PLUGIN_DIRS
```

Within python, the path to the directory containing plugins can be added as follows:

```python
import dlite
dlite.python_storage_plugin_path.append("/path/to/plugins/dir")
```

Often drivers are connected to very specific datamodel (entities).
DLite will find these datamodels if the path to their directory is set with the
environment variable `DLITE_STORAGES` or added within python with `dlite.storage_path.append` similarly to described above for drivers.


```{attention}
Often, during development dlite will fail unexpectedly. This is typically either because of an error in the
datamodel or the driver.
The variable DLITE_PYDEBUG can be set as `export DLITE_PYDEBUG=` to get python debugging information.
This will give information about the driver.
It is advisable to first check that the datamodel is valid with the command `dlite-validate datamodelfilename`.
```

Using storages implicitly from Python
-------------------------------------
For convenience DLite also has an interface for creating storages implicitly.
Expand Down
12 changes: 12 additions & 0 deletions doc/user_guide/tools.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,18 @@ Tools
DLite comes with a small set of tools.


dlite-validate
--------------
The dlite-validate tool can be used to check if a specific representation (in a file) is a valid DLite datamodel.

This can be run as follows
```bash
dlite-validate filename.yaml # or json
```

It will then return a list of errors if it is not a valid datamodel.


dlite-getuuid
-------------
This is a handy small tool for generating a random UUID or getting the UUID corresonding to an URI.
Expand Down
1 change: 0 additions & 1 deletion python/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -202,7 +202,6 @@ def run(self):
"Operating System :: POSIX :: Linux",
"Operating System :: Microsoft :: Windows",
"Programming Language :: Python :: 3",
"Programming Language :: Python :: 3.7",
"Programming Language :: Python :: 3.8",
"Programming Language :: Python :: 3.9",
"Programming Language :: Python :: 3.10",
Expand Down
2 changes: 1 addition & 1 deletion requirements_full.txt
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ pyarrow>=14.0,<18.0
tables>=3.8,<5.0
openpyxl>=3.0.9,<3.2
jinja2>=3.0,<4
paramiko>=3.0.0,<3.4.1
paramiko>=3.0.0,<3.5.1
requests>=2.10,<3
redis>=5.0,<6
minio>=6.0,<8
Expand Down
3 changes: 1 addition & 2 deletions storages/python/python-storage-plugins/postgresql.py
Original file line number Diff line number Diff line change
Expand Up @@ -129,8 +129,7 @@ def load(self, uuid):

# The uuid will be wrong for data instances, so override it
if not inst.is_metameta:
d = inst.asdict()
d["uuid"] = uuid
d = inst.asdict(single=True, uuid=True)
inst = instance_from_dict(d)
return inst

Expand Down
2 changes: 1 addition & 1 deletion storages/python/tests-c/test_postgresql_storage.c
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ MU_TEST(test_save)
double age = 42.;
const char *skills[] = {"jumping", "hopping"};
int n, i;
char *paths = STRINGIFY(dlite_SOURCE_DIR) "/storage/python/tests/*.json";
char *paths = STRINGIFY(dlite_SOURCE_DIR) "/storage/python/tests-c/*.json";
mu_check(dlite_storage_plugin_path_append(paths) >= 0);
mu_check((meta = dlite_instance_load_url("json://Person.json?mode=r")));
Expand Down
4 changes: 4 additions & 0 deletions storages/python/tests-c/test_postgresql_storage2.c
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,10 @@ MU_TEST(test_load_meta)
{
DLiteInstance *meta;
char url[256], *id="http://onto-ns.com/meta/0.1/Person";
char *paths = STRINGIFY(dlite_SOURCE_DIR) "/storage/python/tests-c/*.json";
mu_check(dlite_storage_plugin_path_append(paths) >= 0);
snprintf(url, sizeof(url), "postgresql://%s?%s#%s", HOST, options, id);
mu_check((meta = dlite_instance_load_url(url)));
Expand Down

0 comments on commit e325b2b

Please sign in to comment.