From 34712726bb9f26a00ebdeb8a9306df0751265a38 Mon Sep 17 00:00:00 2001 From: Pontus Lurcock Date: Thu, 3 Nov 2022 10:28:56 +0100 Subject: [PATCH 01/13] Add a per-variable processing_steps to the schema --- dataset-defs/template.schema.json | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/dataset-defs/template.schema.json b/dataset-defs/template.schema.json index 125f9fd..66f2a04 100644 --- a/dataset-defs/template.schema.json +++ b/dataset-defs/template.schema.json @@ -151,7 +151,19 @@ ["time", "lat", "lon"] ] }, - "sources": { + "processing_steps": { + "type": "array", + "description": "processing steps to be applied to the source", + "items": {"type": "string"}, + "examples": [ + [ + "Masking out low quality pixels", + "Temporal NaN-mean aggregation", + "Spatial bi-cubic interpolation" + ] + ] + }, + "sources": { "type": "array", "minItems": 1, "items": { From 0f7dbc955c5d6d2c0c275de9cfebcd6d4d541cd8 Mon Sep 17 00:00:00 2001 From: Pontus Lurcock Date: Wed, 15 Feb 2023 11:53:31 +0100 Subject: [PATCH 02/13] Update template to match current specification --- dataset-spec/template.geojson | 71 ++++++++++++++++++++++++++++------- 1 file changed, 57 insertions(+), 14 deletions(-) diff --git a/dataset-spec/template.geojson b/dataset-spec/template.geojson index 9c1aa66..5f7d1dc 100644 --- a/dataset-spec/template.geojson +++ b/dataset-spec/template.geojson @@ -1,29 +1,24 @@ { "type": "Feature", "properties": { - "title": "Ocean Color Products in the Mediterranean Sea", "data_id": "OC-CMEMS-MED-005-2005-2022-1d-v1", + "title": "Ocean Color Products in the Mediterranean Sea", "description": "CMEMS Ocean Colour for Mediterranean Sea, 2005-2022", - "time_range": ["2005-05-01", "2022-05-01"], - "time_period": "1D", - "spatial_ref": "CRS84", - "spatial_bbox": [-6.75, 30, 36.5, 46], - "spatial_res": 0.05, - "metadata": { - "published_on": "2022-10-26", - "keywords": ["mediterranean", "ocean colour", "chlorophyll"] - }, + "version": "1.0.3", + "recipe": "https://example.com/recipe/", + "recipe": "https://example.com/recipe/CHANGES.md", + "license": "Creative Commons Attribution 4.0 International (CC BY 4.0)", "variables": [ { "name": "CHL", - "units": "mg/m^3", "long_name": "Chlorophyll Concentration", "dtype": "float32", + "dims": ["time", "lat", "lon"], + "units": "mg/m^3", + "fill_value": -1.0, "valid_min": 0.0, "valid_max": 1000.0, - "color_bar_name": "YlGn", "time_range": ["2005-05-01", "2022-05-01"], - "dims": ["time", "lat", "lon"], "sources": [ { "home_url": "https://resources.marine.copernicus.eu/", @@ -39,11 +34,59 @@ } ], "metadata": { + "color_bar_name": "YlGn", + "color_value_min": 0.5, + "color_value_max": 150, "keywords": ["colour", "marine", "chlorophyll"], "processing-level": "L4" } } - ] + ], + "sources": [ + { + "name": "Sentinel Hub S2L2A", + "variable_names": ["CHL"], + "store_id": "sentinelhub", + "store_params": { + "num_retries": 80 + }, + "variable_open_params": { + "CHL": ["some_parameter"] + } + } + ], + "metadata": { + "Conventions": "CF-1.9", + "acknowledgment": "EO4SIBS, CMEMS, DeepESDL project", + "contributor_name": "Brockmann Geomatics Sweden AB", + "contributor_url": "www.brockmann-geomatics.se", + "creator_email": "info@brockmann-consult.de", + "creator_name": "Brockmann Consult GmbH", + "creator_url": "www.brockmann-consult.de", + "institution": "Brockmann Consult GmbH", + "project": "DeepESDL", + "publisher_email": "info@brockmann-consult.de", + "publisher_name": "Brockmann Consult GmbH", + "license_url": "https://creativecommons.org/licenses/by/4.0/", + "source": "EO4SIBS, CMEMS", + "date_modified": "2022-08-19 16:19:15.359970", + "geospatial_lon_min": -6.75, + "geospatial_lon_max": 36.5, + "geospatial_lat_min": 30.0, + "geospatial_lat_max": 46.0, + "geospatial_lon_resolution": 0.05, + "geospatial_lat_resolution": 0.05, + "temporal_coverage_start": "2005-05-01 00:00:00", + "temporal_coverage_end": "2022-05-01 23:59:59", + "temporal_resolution": "1D", + }, + "time_range": ["2005-05-01", "2022-05-01"], + "time_period": "1D", + "spatial_ref": "CRS84", + "spatial_bbox": [-6.75, 30, 36.5, 46], + "spatial_size": [865, 320], + "spatial_offset": [0, 0], + "spatial_res": [0.05, 0.05], }, "geometry": { "type": "Polygon", From 1e9d60ceda2e562a55c22167a50cd3d9846ebdea Mon Sep 17 00:00:00 2001 From: Pontus Lurcock Date: Wed, 15 Feb 2023 15:31:44 +0100 Subject: [PATCH 03/13] Fix some typos in the specification template --- dataset-spec/template.geojson | 6 +- dataset-spec/template.schema.json | 139 ++++++++++++++++++++++++++---- 2 files changed, 125 insertions(+), 20 deletions(-) diff --git a/dataset-spec/template.geojson b/dataset-spec/template.geojson index 5f7d1dc..dbd3e0b 100644 --- a/dataset-spec/template.geojson +++ b/dataset-spec/template.geojson @@ -6,7 +6,7 @@ "description": "CMEMS Ocean Colour for Mediterranean Sea, 2005-2022", "version": "1.0.3", "recipe": "https://example.com/recipe/", - "recipe": "https://example.com/recipe/CHANGES.md", + "changes": "https://example.com/recipe/CHANGES.md", "license": "Creative Commons Attribution 4.0 International (CC BY 4.0)", "variables": [ { @@ -78,7 +78,7 @@ "geospatial_lat_resolution": 0.05, "temporal_coverage_start": "2005-05-01 00:00:00", "temporal_coverage_end": "2022-05-01 23:59:59", - "temporal_resolution": "1D", + "temporal_resolution": "1D" }, "time_range": ["2005-05-01", "2022-05-01"], "time_period": "1D", @@ -86,7 +86,7 @@ "spatial_bbox": [-6.75, 30, 36.5, 46], "spatial_size": [865, 320], "spatial_offset": [0, 0], - "spatial_res": [0.05, 0.05], + "spatial_res": [0.05, 0.05] }, "geometry": { "type": "Polygon", diff --git a/dataset-spec/template.schema.json b/dataset-spec/template.schema.json index 66f2a04..d33969e 100644 --- a/dataset-spec/template.schema.json +++ b/dataset-spec/template.schema.json @@ -15,39 +15,68 @@ "properties": { "type": "object", "required": [ - "title", "data_id", + "title", "description", - "time_range", - "time_period", - "spatial_ref", - "spatial_bbox", - "spatial_res", - "variables" + "version", + "license", + "variables", + "sources" ], "additionalProperties": false, "properties": { - "title": { + "data_id": { "type": "string", - "description": "the title of the dataset", + "description": "unique identification string, filename base", "examples": [ - "Ocean Products in the Mediterranean Sea" + "OC-CMEMS-MED-005-2005-2022-1d-v1" ] }, - "data_id": { + "title": { "type": "string", - "description": "a unique dataset identifier", + "description": "human-readable title of the dataset", "examples": [ - "OC-CMEMS-MED-005-2005-2022-1d-v1.zarr" + "Ocean Products in the Mediterranean Sea" ] }, "description": { "type": "string", - "description": "a description of the dataset", + "description": "human-readable description of the dataset", "examples": [ "CMEMS Ocean Colour for Mediterranean Sea, 2005-2022" ] }, + "version": { + "type": "string", + "description": "Version string. Semver format recommended.", + "examples": [ + "1.0.3" + ] + }, + "recipe": { + "type": "string", + "description": "URL to repo recipe directory", + "examples": [ + "https://example.com/recipe/" + ] + }, + "changes": { + "type": "string", + "description": "URL to CHANGES.md in recipe directory", + "examples": [ + "https://example.com/recipe/CHANGES.md" + ] + }, + "license": { + "type": "string", + "description": "URL to CHANGES.md in recipe directory", + "examples": [ + "https://example.com/recipe/CHANGES.md" + ] + }, + "metadata": { + "type": "object" + }, "time_range": { "type": "array", "prefixItems": [ @@ -61,7 +90,7 @@ ] }, "time_period": { - "type": "string", + "type": ["string", "null"], "examples": ["1D", "2D", "8D", "10D", "2W", "1M"] }, "spatial_ref": { @@ -103,8 +132,35 @@ "description": "spatial resolution in units of the spatial coordinate reference system", "examples": [0.05, 10, 300] }, - "metadata": { - "type": "object" + "spatial_size": { + "type": ["array"], + "prefixItems": [ + { + "type": "integer", + "exclusiveMinimum": 0 + }, + { + "type": "integer", + "exclusiveMinimum": 0 + } + ], + "items": false, + "description": "the spatial image size as [width, height] in pixels", + "examples": [[6000, 3000], [256, 256]] + }, + "spatial_offset": { + "type": ["array"], + "prefixItems": [ + { + "type": "number" + }, + { + "type": "number" + } + ], + "items": false, + "description": "Offset coordinates in units of the CRS", + "examples": [[-180, -90], [0, 0]] }, "variables": { "type": "array", @@ -239,6 +295,55 @@ } } } + }, + "sources": { + "type": "array", + "description": "Array of data source definitions", + "items": { + "type": "object", + "additionalProperties": false, + "required": [ + "name", + "variable_names", + "store_id" + ], + "properties": { + "name": { + "type": "string" + }, + "variable_names": { + "type": "array" + }, + "store_id": { + "type": "string" + }, + "store_params": { + "type": "object" + }, + "variable_open_params": { + "type": "number" + }, + "variables": {}, + "download_url": {}, + "protocol": { + "type": "string", + "examples": ["http", "ftp"] + }, + "compressed": { + "type": "boolean" + }, + "compression_format": { + "type": "string" + }, + "source_format": { + "type": "string", + "examples": ["netcdf", "hdf", "geotiff"] + }, + "source_crs": { + "type": "string" + } + } + } } } }, From 4a9838f13d5220c18b866c87ea73210c9fa3d21e Mon Sep 17 00:00:00 2001 From: Pontus Lurcock Date: Wed, 15 Feb 2023 16:24:37 +0100 Subject: [PATCH 04/13] Update specification schema to match current spec --- dataset-spec/template.schema.json | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/dataset-spec/template.schema.json b/dataset-spec/template.schema.json index d33969e..9e393e4 100644 --- a/dataset-spec/template.schema.json +++ b/dataset-spec/template.schema.json @@ -183,7 +183,17 @@ "properties": { "name": {"type": "string"}, "long_name": {"type": "string"}, + "dtype": {"type": "string"}, + "dims": { + "type": "array", + "items": {"type": "string"}, + "examples": [ + ["time", "y", "x"], + ["time", "lat", "lon"] + ] + }, "units": {"type": "string"}, + "fill_value": {"type": "number"}, "valid_min": {"type": "number"}, "valid_max": {"type": "number"}, "time_range": { @@ -198,15 +208,6 @@ ], "items": false }, - "dtype": {"type": "string"}, - "dims": { - "type": "array", - "items": {"type": "string"}, - "examples": [ - ["time", "y", "x"], - ["time", "lat", "lon"] - ] - }, "processing_steps": { "type": "array", "description": "processing steps to be applied to the source", @@ -321,7 +322,7 @@ "type": "object" }, "variable_open_params": { - "type": "number" + "type": "object" }, "variables": {}, "download_url": {}, From 5c4b00257bee8ddb570ca59b38b78cfafe11c1e1 Mon Sep 17 00:00:00 2001 From: Pontus Lurcock Date: Wed, 15 Feb 2023 16:30:41 +0100 Subject: [PATCH 05/13] Remove per-variable "sources" list --- dataset-spec/template.geojson | 14 ------- dataset-spec/template.schema.json | 69 ++++--------------------------- 2 files changed, 7 insertions(+), 76 deletions(-) diff --git a/dataset-spec/template.geojson b/dataset-spec/template.geojson index dbd3e0b..bb1158f 100644 --- a/dataset-spec/template.geojson +++ b/dataset-spec/template.geojson @@ -19,20 +19,6 @@ "valid_min": 0.0, "valid_max": 1000.0, "time_range": ["2005-05-01", "2022-05-01"], - "sources": [ - { - "home_url": "https://resources.marine.copernicus.eu/", - "data_url": "https://resources.marine.copernicus.eu/", - "license_url": "https://marine.copernicus.eu/user-corner/service-commitments-and-licence", - "attributions": [], - "processing_steps": [ - "Masking out low quality pixels", - "Temporal NaN-mean aggregation", - "Spatial bi-cubic interpolation" - ], - "remarks": "" - } - ], "metadata": { "color_bar_name": "YlGn", "color_value_min": 0.5, diff --git a/dataset-spec/template.schema.json b/dataset-spec/template.schema.json index 9e393e4..c5121a1 100644 --- a/dataset-spec/template.schema.json +++ b/dataset-spec/template.schema.json @@ -177,8 +177,7 @@ "valid_max", "time_range", "dtype", - "dims", - "sources" + "dims" ], "properties": { "name": {"type": "string"}, @@ -220,66 +219,12 @@ ] ] }, - "sources": { - "type": "array", - "minItems": 1, - "items": { - "type": "object", - "additionalProperties": true, - "required": [ - "data_url", - "license_url" - ], - "properties": { - "home_url": { - "type": "string", - "format": "uri", - "examples": [ - ["https://resources.marine.copernicus.eu/"] - ] - }, - "data_url": { - "type": "string", - "format": "uri", - "examples": [ - ["https://resources.marine.copernicus.eu/"] - ] - }, - "license_url": { - "type": "string", - "format": "uri", - "examples": [ - ["https://marine.copernicus.eu/user-corner/service-commitments-and-licence"] - ] - }, - "attributions": { - "type": "array", - "items": {"type": "string"} - }, - "processing_steps": { - "type": "array", - "description": "processing steps to be applied to the source", - "items": {"type": "string"}, - "examples": [ - [ - "Masking out low quality pixels", - "Temporal NaN-mean aggregation", - "Spatial bi-cubic interpolation" - ] - ] - }, - "remarks": { - "type": "string" - } - } - } - }, - "color_bar_name": { - "type": "string", - "description": "name of a matplotlib colormap: https://matplotlib.org/stable/tutorials/colors/colormaps.html", - "examples": [ - ["viridis_r", "turbo", "prism", "Spectral"] - ] + "color_bar_name": { + "type": "string", + "description": "name of a matplotlib colormap: https://matplotlib.org/stable/tutorials/colors/colormaps.html", + "examples": [ + ["viridis_r", "turbo", "prism", "Spectral"] + ] }, "color_value_min": { "type": "number", From c36dd8e907354e0e4a4edef234c1314b7079cd1a Mon Sep 17 00:00:00 2001 From: Pontus Lurcock Date: Wed, 15 Feb 2023 16:50:34 +0100 Subject: [PATCH 06/13] Schema and template: move colour props to metadata --- dataset-spec/template.schema.json | 67 ++++++++++++++++++++----------- 1 file changed, 43 insertions(+), 24 deletions(-) diff --git a/dataset-spec/template.schema.json b/dataset-spec/template.schema.json index c5121a1..74193a7 100644 --- a/dataset-spec/template.schema.json +++ b/dataset-spec/template.schema.json @@ -172,12 +172,9 @@ "required": [ "name", "long_name", - "units", - "valid_min", - "valid_max", - "time_range", "dtype", - "dims" + "dims", + "units" ], "properties": { "name": {"type": "string"}, @@ -219,25 +216,47 @@ ] ] }, - "color_bar_name": { - "type": "string", - "description": "name of a matplotlib colormap: https://matplotlib.org/stable/tutorials/colors/colormaps.html", - "examples": [ - ["viridis_r", "turbo", "prism", "Spectral"] - ] - }, - "color_value_min": { - "type": "number", - "description": "minimum value for colour scale", - "examples": [-10500, 0, 1.735e-9, 4.5e6] - }, - "color_value_max": { - "type": "number", - "description": "maximum value for colour scale", - "examples": [-9500, 0, 2.978e-9, 8.5e6] - }, - "metadata": { - "type": "object" + "metadata": { + "type": "object", + "required": [ + "color_bar_name", + "color_value_min", + "color_value_max" + ], + "properties": { + "color_bar_name": { + "type": "string", + "description": "name of a matplotlib colormap: https://matplotlib.org/stable/tutorials/colors/colormaps.html", + "examples": [ + [ + "viridis_r", + "turbo", + "prism", + "Spectral" + ] + ] + }, + "color_value_min": { + "type": "number", + "description": "minimum value for colour scale", + "examples": [ + -10500, + 0, + 1.735e-9, + 4.5e6 + ] + }, + "color_value_max": { + "type": "number", + "description": "maximum value for colour scale", + "examples": [ + -9500, + 0, + 2.978e-9, + 8.5e6 + ] + } + } } } } From ddba8845c4b616bafadfc04fd5800c327ba1ca82 Mon Sep 17 00:00:00 2001 From: Pontus Lurcock Date: Wed, 15 Feb 2023 16:51:49 +0100 Subject: [PATCH 07/13] Schema: remove processing_steps from variable --- dataset-spec/template.schema.json | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/dataset-spec/template.schema.json b/dataset-spec/template.schema.json index 74193a7..1018476 100644 --- a/dataset-spec/template.schema.json +++ b/dataset-spec/template.schema.json @@ -204,18 +204,6 @@ ], "items": false }, - "processing_steps": { - "type": "array", - "description": "processing steps to be applied to the source", - "items": {"type": "string"}, - "examples": [ - [ - "Masking out low quality pixels", - "Temporal NaN-mean aggregation", - "Spatial bi-cubic interpolation" - ] - ] - }, "metadata": { "type": "object", "required": [ From aab74047d9134fd7e91c934cb94ee6eb8da61521 Mon Sep 17 00:00:00 2001 From: Pontus Lurcock Date: Wed, 15 Feb 2023 17:17:34 +0100 Subject: [PATCH 08/13] Improve schema for data sources --- dataset-spec/template.schema.json | 47 ++++++++++++++++++++++++------- 1 file changed, 37 insertions(+), 10 deletions(-) diff --git a/dataset-spec/template.schema.json b/dataset-spec/template.schema.json index 1018476..947e684 100644 --- a/dataset-spec/template.schema.json +++ b/dataset-spec/template.schema.json @@ -260,40 +260,67 @@ "variable_names", "store_id" ], + "dependentRequired": { + "variable_names": ["store_id"], + "store_id": ["variable_names"], + "variables": ["download_url"], + "download_url": ["variables"] + }, "properties": { "name": { - "type": "string" + "type": "string", + "description": "human-readable title of this data source", + "examples": ["Sentinel Hub S2L2A"] }, "variable_names": { - "type": "array" + "type": "array", + "description": "array of variable names provided by this data store", + "examples": [["B06", "SCL"]] }, "store_id": { - "type": "string" + "type": "string", + "description": "xcube data store identifier", + "examples": ["sentinelhub"] }, "store_params": { - "type": "object" + "type": "object", + "description": "xcube data store parameters", + "examples": [{"num_retries": 80}] }, "variable_open_params": { - "type": "object" + "type": "object", + "description": "xcube data store open parameters" + }, + "variables": { + "type": "object", + "description": "mapping of variable names to variable sources" + }, + "download_url": { + "type": "string", + "description": "download URL template" }, - "variables": {}, - "download_url": {}, "protocol": { "type": "string", + "description": "transport protocol; detected from download_url if omitted", "examples": ["http", "ftp"] }, "compressed": { - "type": "boolean" + "type": "boolean", + "default": false, + "description": "whether data is compressed; detected from data if omitted" }, "compression_format": { - "type": "string" + "type": "string", + "description": "compression format in use; detected from data if omitted" }, "source_format": { "type": "string", + "description": "source format name; detected from filename if omitted", "examples": ["netcdf", "hdf", "geotiff"] }, "source_crs": { - "type": "string" + "type": "string", + "description": "coordinate reference system of source data; detected from data if omitted" } } } From c5607ff094b3e03561a152879301ce959b13ebb3 Mon Sep 17 00:00:00 2001 From: Pontus Lurcock Date: Thu, 16 Feb 2023 14:45:22 +0100 Subject: [PATCH 09/13] Schema: require minimum spatial_size of 2 --- dataset-spec/template.schema.json | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dataset-spec/template.schema.json b/dataset-spec/template.schema.json index 947e684..02ab935 100644 --- a/dataset-spec/template.schema.json +++ b/dataset-spec/template.schema.json @@ -137,11 +137,11 @@ "prefixItems": [ { "type": "integer", - "exclusiveMinimum": 0 + "minimum": 2 }, { "type": "integer", - "exclusiveMinimum": 0 + "minimum": 2 } ], "items": false, From c597dcecfece3763ab8f3fa2bf65210083ca43e0 Mon Sep 17 00:00:00 2001 From: Pontus Lurcock Date: Thu, 16 Feb 2023 16:04:16 +0100 Subject: [PATCH 10/13] Spec and schema: update some property names --- dataset-spec/README.md | 14 +++++++------- dataset-spec/template.schema.json | 6 +++--- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/dataset-spec/README.md b/dataset-spec/README.md index c9b0558..1eccaac 100644 --- a/dataset-spec/README.md +++ b/dataset-spec/README.md @@ -222,13 +222,13 @@ data files in a described source, usually by some URL or set of URLs. The following properties describe access to a xcube data store: -| Property | Type | Req.? | Description | Example | -|----------------------|--------|:-----:|------------------------------------------------------|------------------------| -| name | string | Y | Human-readable title of this data source. | `"Sentinel Hub S2L2A"` | -| variable_names | string | Y | Array of variable names provided by this data store. | `["B06", "SCL"]` | -| store_id | string | Y | xcube data store identifier. | `"sentinelhub"` | -| store_params | object | N | xcube data store parameters. | `{"num_retries": 80}` | -| variable_open_params | object | N | xcube data store open parameters. | | +| Property | Type | Req.? | Description | Example | +|----------------|--------|:-----:|------------------------------------------------------|------------------------| +| name | string | Y | Human-readable title of this data source. | `"Sentinel Hub S2L2A"` | +| variable_names | string | Y | Array of variable names provided by this data store. | `["B06", "SCL"]` | +| store_id | string | Y | xcube data store identifier. | `"sentinelhub"` | +| store_params | object | N | xcube data store parameters. | `{"num_retries": 80}` | +| data_params | object | N | xcube data store open parameters. | | `variable_open_params` is an object that maps variable names to specific open parameters passed to the given data store. diff --git a/dataset-spec/template.schema.json b/dataset-spec/template.schema.json index 02ab935..77fa7b0 100644 --- a/dataset-spec/template.schema.json +++ b/dataset-spec/template.schema.json @@ -284,12 +284,12 @@ }, "store_params": { "type": "object", - "description": "xcube data store parameters", + "description": "xcube parameters used to open the data store", "examples": [{"num_retries": 80}] }, - "variable_open_params": { + "data_params": { "type": "object", - "description": "xcube data store open parameters" + "description": "xcube parameters used to open data from the data store" }, "variables": { "type": "object", From 29cf2dc5f9e2fee289cb53dd23e9d8b33d8736c7 Mon Sep 17 00:00:00 2001 From: Pontus Lurcock Date: Thu, 16 Feb 2023 16:07:00 +0100 Subject: [PATCH 11/13] Spec and schema: make units optional --- dataset-spec/README.md | 2 +- dataset-spec/template.schema.json | 3 +-- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/dataset-spec/README.md b/dataset-spec/README.md index 1eccaac..53cfda9 100644 --- a/dataset-spec/README.md +++ b/dataset-spec/README.md @@ -162,7 +162,7 @@ data cube: | long_name | string | Y | Array of variable definitions. | `"Chlorophyll Concentration"` | | dtype | string | Y | Numpy-compatible data type name. | `float32"` | | dims | string[] | Y | Array of dimension names. | `["time", "lat", "lon"]"` | -| units | string | null | Y | Physical unit. | `"mg/m^3"`, `"n.a."` | +| units | string | null | N | Physical unit. | `"mg/m^3"`, `"n.a."` | | fill_value | number | null | N | Unscaled values equal to `fill_value` are undefined. | 1.0 | | valid_min | number | N | Values below that number are undefined. | 0.0 | | valid_max | number | N | Values above that number are undefined. | 1.0 | diff --git a/dataset-spec/template.schema.json b/dataset-spec/template.schema.json index 77fa7b0..0945fd4 100644 --- a/dataset-spec/template.schema.json +++ b/dataset-spec/template.schema.json @@ -173,8 +173,7 @@ "name", "long_name", "dtype", - "dims", - "units" + "dims" ], "properties": { "name": {"type": "string"}, From 72adf521513a5ce0827433a049c0275b1f5c1a73 Mon Sep 17 00:00:00 2001 From: Pontus Lurcock Date: Thu, 16 Feb 2023 17:00:04 +0100 Subject: [PATCH 12/13] Schema: match exactly one of the two source types --- dataset-spec/template.geojson | 2 +- dataset-spec/template.schema.json | 170 ++++++++++++++++++------------ 2 files changed, 104 insertions(+), 68 deletions(-) diff --git a/dataset-spec/template.geojson b/dataset-spec/template.geojson index bb1158f..3a52a4f 100644 --- a/dataset-spec/template.geojson +++ b/dataset-spec/template.geojson @@ -36,7 +36,7 @@ "store_params": { "num_retries": 80 }, - "variable_open_params": { + "data_params": { "CHL": ["some_parameter"] } } diff --git a/dataset-spec/template.schema.json b/dataset-spec/template.schema.json index 0945fd4..c95ec42 100644 --- a/dataset-spec/template.schema.json +++ b/dataset-spec/template.schema.json @@ -253,75 +253,111 @@ "description": "Array of data source definitions", "items": { "type": "object", - "additionalProperties": false, - "required": [ - "name", - "variable_names", - "store_id" - ], - "dependentRequired": { - "variable_names": ["store_id"], - "store_id": ["variable_names"], - "variables": ["download_url"], - "download_url": ["variables"] - }, - "properties": { - "name": { - "type": "string", - "description": "human-readable title of this data source", - "examples": ["Sentinel Hub S2L2A"] - }, - "variable_names": { - "type": "array", - "description": "array of variable names provided by this data store", - "examples": [["B06", "SCL"]] - }, - "store_id": { - "type": "string", - "description": "xcube data store identifier", - "examples": ["sentinelhub"] - }, - "store_params": { - "type": "object", - "description": "xcube parameters used to open the data store", - "examples": [{"num_retries": 80}] - }, - "data_params": { - "type": "object", - "description": "xcube parameters used to open data from the data store" - }, - "variables": { - "type": "object", - "description": "mapping of variable names to variable sources" - }, - "download_url": { - "type": "string", - "description": "download URL template" - }, - "protocol": { - "type": "string", - "description": "transport protocol; detected from download_url if omitted", - "examples": ["http", "ftp"] - }, - "compressed": { - "type": "boolean", - "default": false, - "description": "whether data is compressed; detected from data if omitted" - }, - "compression_format": { - "type": "string", - "description": "compression format in use; detected from data if omitted" - }, - "source_format": { - "type": "string", - "description": "source format name; detected from filename if omitted", - "examples": ["netcdf", "hdf", "geotiff"] + "oneOf": [ + { + "properties": { + "name": { + "type": "string", + "description": "human-readable title of this data source", + "examples": [ + "Sentinel Hub S2L2A" + ] + }, + "variable_names": { + "type": "array", + "items": {"type": "string"}, + "description": "array of variable names provided by this data store", + "examples": [ + [ + "B06", + "SCL" + ] + ] + }, + "store_id": { + "type": "string", + "description": "xcube data store identifier", + "examples": [ + "sentinelhub" + ] + }, + "store_params": { + "type": "object", + "description": "xcube parameters used to open the data store", + "examples": [ + { + "num_retries": 80 + } + ] + }, + "data_params": { + "type": "object", + "description": "xcube parameters used to open data from the data store" + } + }, + "additionalProperties": false, + "required": [ + "name", + "variable_names", + "store_id" + ] }, - "source_crs": { - "type": "string", - "description": "coordinate reference system of source data; detected from data if omitted" + { + "properties": { + "name": { + "type": "string", + "description": "human-readable title of this data source", + "examples": [ + "Sentinel Hub S2L2A" + ] + }, + "variables": { + "type": "object", + "description": "mapping of variable names to variable sources" + }, + "download_url": { + "type": "string", + "description": "download URL template" + }, + "protocol": { + "type": "string", + "description": "transport protocol; detected from download_url if omitted", + "examples": [ + "http", + "ftp" + ] + }, + "compressed": { + "type": "boolean", + "default": false, + "description": "whether data is compressed; detected from data if omitted" + }, + "compression_format": { + "type": "string", + "description": "compression format in use; detected from data if omitted" + }, + "source_format": { + "type": "string", + "description": "source format name; detected from filename if omitted", + "examples": [ + "netcdf", + "hdf", + "geotiff" + ] + }, + "source_crs": { + "type": "string", + "description": "coordinate reference system of source data; detected from data if omitted" + } + }, + "additionalProperties": false, + "required": [ + "name", + "variables", + "download_url" + ] } - } + ] } } } From 18110f4a9661df32ce68ae32de498b332c66d505 Mon Sep 17 00:00:00 2001 From: Pontus Lurcock Date: Thu, 16 Feb 2023 17:32:06 +0100 Subject: [PATCH 13/13] Minor updates to spec and schema --- dataset-spec/README.md | 17 ++++++++++++----- dataset-spec/template.schema.json | 2 +- 2 files changed, 13 insertions(+), 6 deletions(-) diff --git a/dataset-spec/README.md b/dataset-spec/README.md index 53cfda9..41255f0 100644 --- a/dataset-spec/README.md +++ b/dataset-spec/README.md @@ -158,11 +158,11 @@ data cube: | Property | Type | Req.? | Description | Example | |------------|--------------------|:-----:|---------------------------------------------------------|--------------------------------| -| name | string | Y | Array of variable definitions. | `"CHL"` | -| long_name | string | Y | Array of variable definitions. | `"Chlorophyll Concentration"` | +| name | string | Y | Name of the variable in the dataset. | `"CHL"` | +| long_name | string | Y | Descriptive name of variable, per CF conventions. | `"Chlorophyll Concentration"` | | dtype | string | Y | Numpy-compatible data type name. | `float32"` | | dims | string[] | Y | Array of dimension names. | `["time", "lat", "lon"]"` | -| units | string | null | N | Physical unit. | `"mg/m^3"`, `"n.a."` | +| units | string | null | Y | Physical unit (null if not applicable). | `"mg/m^3"`, `"n.a."` | | fill_value | number | null | N | Unscaled values equal to `fill_value` are undefined. | 1.0 | | valid_min | number | N | Values below that number are undefined. | 0.0 | | valid_max | number | N | Values above that number are undefined. | 1.0 | @@ -174,11 +174,18 @@ letter and should be continued either by letters, digits, or the underscore (`_`) character. Spaces (' '), hyphens (`-`), and other characters should be avoided. +The `long_name` corresponds to the [`long_name` attribute](https://cfconventions.org/Data/cf-conventions/cf-conventions-1.10/cf-conventions.html#long-name) +defined in the Climate and Forecast (CF) Metadata Conventions, where it is +described as ‘a long descriptive name which may, for example, be used for +labeling plots.’ + If `fill_value` is missing or `null` and `dtype` is a floating point data, `NaN` is assumed. -`units` should not be specified or set to set to `null` for data that has -no units, e.g., categorical data. +`units` is required despite not always being applicable, in order to reduce the +risk of its omission in cases where is *is* applicable. +For data that has no units (e.g. categorical data), `units` should be set to +`null`. `time_range` defaults to the dataset's `time_range`. It should not be specified for variables that have no "time" dimension. diff --git a/dataset-spec/template.schema.json b/dataset-spec/template.schema.json index c95ec42..ddf90c5 100644 --- a/dataset-spec/template.schema.json +++ b/dataset-spec/template.schema.json @@ -187,7 +187,7 @@ ["time", "lat", "lon"] ] }, - "units": {"type": "string"}, + "units": {"type": ["string", "null"]}, "fill_value": {"type": "number"}, "valid_min": {"type": "number"}, "valid_max": {"type": "number"},