diff --git a/bindings/python/tests/input/test_ref_type_middle.yaml b/bindings/python/tests/input/test_ref_type_middle.yaml new file mode 100644 index 000000000..beaa596c5 --- /dev/null +++ b/bindings/python/tests/input/test_ref_type_middle.yaml @@ -0,0 +1,11 @@ +http://onto-ns.com/meta/0.2/Middle: + description: Middle-level nested data structure. + dimensions: [] + properties: + - name: name + type: string + description: Value of this structure. + - name: leaf + type: ref + $ref: http://onto-ns.com/meta/0.1/Leaf + description: Reference to low-level structure. diff --git a/bindings/python/tests/test_ref_type.py b/bindings/python/tests/test_ref_type.py index 5d7e62c5b..763198bf3 100644 --- a/bindings/python/tests/test_ref_type.py +++ b/bindings/python/tests/test_ref_type.py @@ -1,15 +1,24 @@ from pathlib import Path import dlite +from dlite.testutils import importcheck + +yaml = importcheck("yaml") thisdir = Path(__file__).resolve().parent indir = thisdir / "input" -dlite.storage_path.append(indir / "test_ref_type.json") +dlite.storage_path.append(indir) +dlite.storage_path.append(indir / "test_ref_type_middle.yaml") + +# If yaml is available, we read Middle v0.2, which is defined in +# `test_ref_type_middle.yaml`. Otherwise, we read Middle v0.1, which +# is defined together with the other datamodels in `test_ref_type.json`. +version = "0.2" if yaml else "0.1" Top = dlite.get_instance("http://onto-ns.com/meta/0.1/Top") -Middle = dlite.get_instance("http://onto-ns.com/meta/0.1/Middle") +Middle = dlite.get_instance(f"http://onto-ns.com/meta/{version}/Middle") Leaf = dlite.get_instance("http://onto-ns.com/meta/0.1/Leaf") Linked = dlite.get_instance("http://onto-ns.com/meta/0.1/Linked") Tree = dlite.get_instance("http://onto-ns.com/meta/0.1/Tree") @@ -78,6 +87,8 @@ assert cyclic.subtree[0].subtree[0] == cyclic assert cyclic.subtree[0].subtree[0].subtree[0] == cyclic -# Instantiate nested from dict -# For issue #515 -# middle = Middle(properties={"name": "nested", "leaf": {"a": 1, "b": True}}) +# For isue #982: ref-type in yaml +assert Middle.getprop("leaf").ref == "http://onto-ns.com/meta/0.1/Leaf" + +# For issue #515: Instantiate nested from dict +#middle = Middle(properties={"name": "nested", "leaf": {"a": 1, "b": True}}) diff --git a/bindings/python/utils.py b/bindings/python/utils.py index 6f7047f7a..87eebc359 100644 --- a/bindings/python/utils.py +++ b/bindings/python/utils.py @@ -136,8 +136,16 @@ def instance_from_dict(d, id=None, single=None, check_storages=True): if meta.is_metameta: if "uri" in d: uri = d["uri"] - else: + elif "identity" in d: + uri = d["identity"] + elif "name" in d and "version" in d and "namespace" in d: uri = dlite.join_meta_uri(d["name"], d["version"], d["namespace"]) + elif id and dlite.urlparse(id).scheme: + uri = id + else: + raise TypeError( + "`id` required for metadata when the URI is not in the dict" + ) if check_storages: try: @@ -169,6 +177,7 @@ def instance_from_dict(d, id=None, single=None, check_storages=True): dlite.Property( name=p["name"], type=p["type"], + ref=p.get("$ref", p.get("ref")), shape=p.get("shape", p.get("dims")), unit=p.get("unit"), description=p.get("description"), @@ -180,6 +189,7 @@ def instance_from_dict(d, id=None, single=None, check_storages=True): dlite.Property( name=k, type=v["type"], + ref=v.get("$ref", v.get("ref")), shape=v.get("shape", v.get("dims")), unit=v.get("unit"), description=v.get("description"), diff --git a/doc/user_guide/concepts.md b/doc/user_guide/concepts.md index 8d77f5185..944e02494 100644 --- a/doc/user_guide/concepts.md +++ b/doc/user_guide/concepts.md @@ -269,83 +269,6 @@ Relations are currently not explored in metadata, but are included because of their generality. However, relations are heavily used in [collections]. - -### Representing an entity -Lets start to make a "Person" entity, where we want to describe his/her name, age and skills. - -```json -{ - "uri": "http://onto-ns.com/meta/0.1/Person", - "meta": "http://onto-ns.com/meta/0.3/EntitySchema", - "description": "A person.", - "dimensions": [ - { - "name": "N", - "description": "Number of skills." - } - ], - "properties": [ - { - "name": "name", - "type": "string", - "description": "Full name." - }, - { - "name": "age", - "type": "float", - "unit": "years", - "description": "Age of person." - }, - { - "name": "skills", - "type": "string", - "shape": ["N"], - "description": "List of skills." - } - ] -} -``` - -First we have "uri" identifying the entity, "meta" telling that this is an instance of the entity schema (hence an entity) and a human description. -Then comes "dimensions". -In this case one dimension named "N", which is the number of skills the person has. -Finally we have the properties; "name", "age" and "skills". -We see that "name" is represented as a string, "age" as a floating point number with unit years and "skills" as an array of strings, one for each skill. - - -### SOFT7 representation -Based on input from [SOFT7], DLite also supports a slightly shortened representation of entities. -The "Person" entity from the above example will in this representation, look like: - -```json -{ - "uri": "http://onto-ns.com/meta/0.1/Person", - "description": "A person.", - "dimensions": { - "N": "Number of skills." - }, - "properties": { - "name": { - "type": "string", - "description": "Full name." - }, - "age": { - "type": "float", - "unit": "years", - "description": "Age of person." - }, - "skills": { - "type": "string", - "shape": ["N"], - "description": "List of skills." - } - } -} -``` - -In this representation defaults the `meta` field to the entity schema if it is left out. -Dimensions and Properties are dictionaries (JSON objects) instead of arrays with the dimension or property name as key. - references ---------- diff --git a/doc/user_guide/datamodels.md b/doc/user_guide/datamodels.md new file mode 100644 index 000000000..76c2c455e --- /dev/null +++ b/doc/user_guide/datamodels.md @@ -0,0 +1,73 @@ +Representing a datamodel (entity) +---------------------------------- + +The underlying structure of DLite datamodels are described under [concepts]. + +Here, at set of rules on how to create a datamodel is presented. + +Note that several other possibilities are avilable, and this can be seen in the +examples and tests present in the repository. + +We choose here to present only one method as mixing reprentation methods might +be confusing. Note, however that yaml and json representations are interchangable. + +A generic example with some comments for clarity can be seen below. + +```yaml +uri: http://namespace/version/name +description: A description of what this datamodel represents. +dimensions: # Named dimensions referred to in the property shapes. Simplest to represent it as a dict, set to {} if there are no dimensions + name_of_dimension: description of dimension +properties: + name_of_property1: + description: What is this property + type: ref # Can be any on string, float, double, int, ref .... + unit: unit # Can be ommitted if the property has no unit + shape: [name_of_dimension] # Can be omitted if the property is a scalar + $ref: http://namespace/version/name_of_referenceddatamodel # only if type is ref +``` + +The keywords in the datamodel have the following meaning: +* `uri`: A URI that uniquely identifies the datamodel. +* `description`: A human description that describes what this datamodel represents. +* `dimensions`: Dimensions of the properties (referred to by the property shape). Properties can have the same dimensions, but not necessarily. Each dimension is described by: + - name of the dimension + - a human description of the dimension + In the below example there is one dimension with name "N" and description "Number of skills." +* `properties`: Sub-parts of the datamodel that describe the individual data fields. A property has a name and is further specified by the following keywords: + - `description`: Human description of the property. + - `type`: Data type of the property. Ex: "blob5", "boolean", "uint", "int32", "string", "string10", "ref", ... + - `$ref`: Optional. URI of a sub-datamodel. Only used if type is "ref". + - `unit`: Optional. The unit. Ex: "kg", "km/h", ... Can be omitted if the property has no unit. + - `shape`: Optional. Describes the dimensionality of the property as a list of dimension names. Ex: `[N]`. Can be omitted if the property has no shape, i.e. the instance always has only one value. This is equivalent to a 0-dimensional array, i.e. shape=[]. + The datamodel below has three properties; "name", "age" and "skills". We see that "name" is represented as a string, "age" as a floating point number with unit years and "skills" as an array of strings, one for each skill. + + +A slightly more realistic example is the "Person" entity, where we want to describe his/her name, age and skills: + +```yaml +uri: http://onto-ns.com/meta/0.1/Person +description: A person. +dimensions: + N: Number of skills. +properties: + name: + description: Full name. + type: string + age: + description: Age of person. + type: float + unit: years + skills: + description: List of skills. + type: string + shape: [N] +``` + + +dlite-validate +============== +The [dlite-validate tool][./tools.md#dlite_validate] can be used to check if a specific representation (in a file) is a valid DLite datamodel + + +[concepts]: https://sintef.github.io/dlite/user_guide/concepts.html diff --git a/doc/user_guide/index.rst b/doc/user_guide/index.rst index c6ef982c6..35a4fdef2 100644 --- a/doc/user_guide/index.rst +++ b/doc/user_guide/index.rst @@ -6,6 +6,7 @@ User Guide :caption: Contents concepts + datamodels type-system exceptions collections diff --git a/doc/user_guide/storage_plugins.md b/doc/user_guide/storage_plugins.md index 1734fadf1..c45d147f0 100644 --- a/doc/user_guide/storage_plugins.md +++ b/doc/user_guide/storage_plugins.md @@ -1,5 +1,5 @@ -Storage plugins -=============== +Storage plugins / Drivers +========================= Content ------- @@ -28,6 +28,36 @@ It also comes with a specific `Blob` and `Image` storage plugin, that can load a Storage plugins can be written in either C or Python. +How to make storage plugins available +------------------------------------- + +As described below it is possible (and most often advisable) to create specific drivers (storage plugins) for your data. +Additional storage plugins drivers can be made available by setting the environment variables +`DLITE_STORAGE_PLUGIN_DIRS` or `DLITE_PYTHON_STORAGE_PLUGIN_DIRS` e.g.: +```bash +export DLITE_STORAGE_PLUGIN_DIRS=/path/to/new/folder:$DLITE_STORAGE_PLUGIN_DIRS +``` + +Within python, the path to the directory containing plugins can be added as follows: + +```python +import dlite +dlite.python_storage_plugin_path.append("/path/to/plugins/dir") +``` + +Often drivers are connected to very specific datamodel (entities). +DLite will find these datamodels if the path to their directory is set with the +environment variable `DLITE_STORAGES` or added within python with `dlite.storage_path.append` similarly to described above for drivers. + + +```{attention} +Often, during development dlite will fail unexpectedly. This is typically either because of an error in the +datamodel or the driver. +The variable DLITE_PYDEBUG can be set as `export DLITE_PYDEBUG=` to get python debugging information. +This will give information about the driver. +It is advisable to first check that the datamodel is valid with the command `dlite-validate datamodelfilename`. +``` + Using storages implicitly from Python ------------------------------------- For convenience DLite also has an interface for creating storages implicitly. diff --git a/doc/user_guide/tools.md b/doc/user_guide/tools.md index c0ad039d2..708c8a257 100644 --- a/doc/user_guide/tools.md +++ b/doc/user_guide/tools.md @@ -3,6 +3,18 @@ Tools DLite comes with a small set of tools. +dlite-validate +-------------- +The dlite-validate tool can be used to check if a specific representation (in a file) is a valid DLite datamodel. + +This can be run as follows +```bash +dlite-validate filename.yaml # or json +``` + +It will then return a list of errors if it is not a valid datamodel. + + dlite-getuuid ------------- This is a handy small tool for generating a random UUID or getting the UUID corresonding to an URI. diff --git a/python/setup.py b/python/setup.py index 00a3f6cbd..91e04c267 100644 --- a/python/setup.py +++ b/python/setup.py @@ -202,7 +202,6 @@ def run(self): "Operating System :: POSIX :: Linux", "Operating System :: Microsoft :: Windows", "Programming Language :: Python :: 3", - "Programming Language :: Python :: 3.7", "Programming Language :: Python :: 3.8", "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", diff --git a/requirements_full.txt b/requirements_full.txt index 7899f2803..a448265d6 100644 --- a/requirements_full.txt +++ b/requirements_full.txt @@ -13,7 +13,7 @@ pyarrow>=14.0,<18.0 tables>=3.8,<5.0 openpyxl>=3.0.9,<3.2 jinja2>=3.0,<4 -paramiko>=3.0.0,<3.4.1 +paramiko>=3.0.0,<3.5.1 requests>=2.10,<3 redis>=5.0,<6 minio>=6.0,<8 diff --git a/storages/python/python-storage-plugins/postgresql.py b/storages/python/python-storage-plugins/postgresql.py index 74aa2a8f4..1053d7d87 100644 --- a/storages/python/python-storage-plugins/postgresql.py +++ b/storages/python/python-storage-plugins/postgresql.py @@ -129,8 +129,7 @@ def load(self, uuid): # The uuid will be wrong for data instances, so override it if not inst.is_metameta: - d = inst.asdict() - d["uuid"] = uuid + d = inst.asdict(single=True, uuid=True) inst = instance_from_dict(d) return inst diff --git a/storages/python/tests-c/test_postgresql_storage.c b/storages/python/tests-c/test_postgresql_storage.c index 99172f5c1..13bff2571 100644 --- a/storages/python/tests-c/test_postgresql_storage.c +++ b/storages/python/tests-c/test_postgresql_storage.c @@ -44,7 +44,7 @@ MU_TEST(test_save) double age = 42.; const char *skills[] = {"jumping", "hopping"}; int n, i; - char *paths = STRINGIFY(dlite_SOURCE_DIR) "/storage/python/tests/*.json"; + char *paths = STRINGIFY(dlite_SOURCE_DIR) "/storage/python/tests-c/*.json"; mu_check(dlite_storage_plugin_path_append(paths) >= 0); mu_check((meta = dlite_instance_load_url("json://Person.json?mode=r"))); diff --git a/storages/python/tests-c/test_postgresql_storage2.c b/storages/python/tests-c/test_postgresql_storage2.c index 1ce1044a1..578435d0f 100644 --- a/storages/python/tests-c/test_postgresql_storage2.c +++ b/storages/python/tests-c/test_postgresql_storage2.c @@ -32,6 +32,10 @@ MU_TEST(test_load_meta) { DLiteInstance *meta; char url[256], *id="http://onto-ns.com/meta/0.1/Person"; + char *paths = STRINGIFY(dlite_SOURCE_DIR) "/storage/python/tests-c/*.json"; + + mu_check(dlite_storage_plugin_path_append(paths) >= 0); + snprintf(url, sizeof(url), "postgresql://%s?%s#%s", HOST, options, id); mu_check((meta = dlite_instance_load_url(url)));