diff --git a/dataset-spec/README.md b/dataset-spec/README.md index c9b0558..41255f0 100644 --- a/dataset-spec/README.md +++ b/dataset-spec/README.md @@ -158,11 +158,11 @@ data cube: | Property | Type | Req.? | Description | Example | |------------|--------------------|:-----:|---------------------------------------------------------|--------------------------------| -| name | string | Y | Array of variable definitions. | `"CHL"` | -| long_name | string | Y | Array of variable definitions. | `"Chlorophyll Concentration"` | +| name | string | Y | Name of the variable in the dataset. | `"CHL"` | +| long_name | string | Y | Descriptive name of variable, per CF conventions. | `"Chlorophyll Concentration"` | | dtype | string | Y | Numpy-compatible data type name. | `float32"` | | dims | string[] | Y | Array of dimension names. | `["time", "lat", "lon"]"` | -| units | string | null | Y | Physical unit. | `"mg/m^3"`, `"n.a."` | +| units | string | null | Y | Physical unit (null if not applicable). | `"mg/m^3"`, `"n.a."` | | fill_value | number | null | N | Unscaled values equal to `fill_value` are undefined. | 1.0 | | valid_min | number | N | Values below that number are undefined. | 0.0 | | valid_max | number | N | Values above that number are undefined. | 1.0 | @@ -174,11 +174,18 @@ letter and should be continued either by letters, digits, or the underscore (`_`) character. Spaces (' '), hyphens (`-`), and other characters should be avoided. +The `long_name` corresponds to the [`long_name` attribute](https://cfconventions.org/Data/cf-conventions/cf-conventions-1.10/cf-conventions.html#long-name) +defined in the Climate and Forecast (CF) Metadata Conventions, where it is +described as ‘a long descriptive name which may, for example, be used for +labeling plots.’ + If `fill_value` is missing or `null` and `dtype` is a floating point data, `NaN` is assumed. -`units` should not be specified or set to set to `null` for data that has -no units, e.g., categorical data. +`units` is required despite not always being applicable, in order to reduce the +risk of its omission in cases where is *is* applicable. +For data that has no units (e.g. categorical data), `units` should be set to +`null`. `time_range` defaults to the dataset's `time_range`. It should not be specified for variables that have no "time" dimension. @@ -222,13 +229,13 @@ data files in a described source, usually by some URL or set of URLs. The following properties describe access to a xcube data store: -| Property | Type | Req.? | Description | Example | -|----------------------|--------|:-----:|------------------------------------------------------|------------------------| -| name | string | Y | Human-readable title of this data source. | `"Sentinel Hub S2L2A"` | -| variable_names | string | Y | Array of variable names provided by this data store. | `["B06", "SCL"]` | -| store_id | string | Y | xcube data store identifier. | `"sentinelhub"` | -| store_params | object | N | xcube data store parameters. | `{"num_retries": 80}` | -| variable_open_params | object | N | xcube data store open parameters. | | +| Property | Type | Req.? | Description | Example | +|----------------|--------|:-----:|------------------------------------------------------|------------------------| +| name | string | Y | Human-readable title of this data source. | `"Sentinel Hub S2L2A"` | +| variable_names | string | Y | Array of variable names provided by this data store. | `["B06", "SCL"]` | +| store_id | string | Y | xcube data store identifier. | `"sentinelhub"` | +| store_params | object | N | xcube data store parameters. | `{"num_retries": 80}` | +| data_params | object | N | xcube data store open parameters. | | `variable_open_params` is an object that maps variable names to specific open parameters passed to the given data store. diff --git a/dataset-spec/template.geojson b/dataset-spec/template.geojson index 9c1aa66..3a52a4f 100644 --- a/dataset-spec/template.geojson +++ b/dataset-spec/template.geojson @@ -1,49 +1,78 @@ { "type": "Feature", "properties": { - "title": "Ocean Color Products in the Mediterranean Sea", "data_id": "OC-CMEMS-MED-005-2005-2022-1d-v1", + "title": "Ocean Color Products in the Mediterranean Sea", "description": "CMEMS Ocean Colour for Mediterranean Sea, 2005-2022", - "time_range": ["2005-05-01", "2022-05-01"], - "time_period": "1D", - "spatial_ref": "CRS84", - "spatial_bbox": [-6.75, 30, 36.5, 46], - "spatial_res": 0.05, - "metadata": { - "published_on": "2022-10-26", - "keywords": ["mediterranean", "ocean colour", "chlorophyll"] - }, + "version": "1.0.3", + "recipe": "https://example.com/recipe/", + "changes": "https://example.com/recipe/CHANGES.md", + "license": "Creative Commons Attribution 4.0 International (CC BY 4.0)", "variables": [ { "name": "CHL", - "units": "mg/m^3", "long_name": "Chlorophyll Concentration", "dtype": "float32", + "dims": ["time", "lat", "lon"], + "units": "mg/m^3", + "fill_value": -1.0, "valid_min": 0.0, "valid_max": 1000.0, - "color_bar_name": "YlGn", "time_range": ["2005-05-01", "2022-05-01"], - "dims": ["time", "lat", "lon"], - "sources": [ - { - "home_url": "https://resources.marine.copernicus.eu/", - "data_url": "https://resources.marine.copernicus.eu/", - "license_url": "https://marine.copernicus.eu/user-corner/service-commitments-and-licence", - "attributions": [], - "processing_steps": [ - "Masking out low quality pixels", - "Temporal NaN-mean aggregation", - "Spatial bi-cubic interpolation" - ], - "remarks": "" - } - ], "metadata": { + "color_bar_name": "YlGn", + "color_value_min": 0.5, + "color_value_max": 150, "keywords": ["colour", "marine", "chlorophyll"], "processing-level": "L4" } } - ] + ], + "sources": [ + { + "name": "Sentinel Hub S2L2A", + "variable_names": ["CHL"], + "store_id": "sentinelhub", + "store_params": { + "num_retries": 80 + }, + "data_params": { + "CHL": ["some_parameter"] + } + } + ], + "metadata": { + "Conventions": "CF-1.9", + "acknowledgment": "EO4SIBS, CMEMS, DeepESDL project", + "contributor_name": "Brockmann Geomatics Sweden AB", + "contributor_url": "www.brockmann-geomatics.se", + "creator_email": "info@brockmann-consult.de", + "creator_name": "Brockmann Consult GmbH", + "creator_url": "www.brockmann-consult.de", + "institution": "Brockmann Consult GmbH", + "project": "DeepESDL", + "publisher_email": "info@brockmann-consult.de", + "publisher_name": "Brockmann Consult GmbH", + "license_url": "https://creativecommons.org/licenses/by/4.0/", + "source": "EO4SIBS, CMEMS", + "date_modified": "2022-08-19 16:19:15.359970", + "geospatial_lon_min": -6.75, + "geospatial_lon_max": 36.5, + "geospatial_lat_min": 30.0, + "geospatial_lat_max": 46.0, + "geospatial_lon_resolution": 0.05, + "geospatial_lat_resolution": 0.05, + "temporal_coverage_start": "2005-05-01 00:00:00", + "temporal_coverage_end": "2022-05-01 23:59:59", + "temporal_resolution": "1D" + }, + "time_range": ["2005-05-01", "2022-05-01"], + "time_period": "1D", + "spatial_ref": "CRS84", + "spatial_bbox": [-6.75, 30, 36.5, 46], + "spatial_size": [865, 320], + "spatial_offset": [0, 0], + "spatial_res": [0.05, 0.05] }, "geometry": { "type": "Polygon", diff --git a/dataset-spec/template.schema.json b/dataset-spec/template.schema.json index 125f9fd..ddf90c5 100644 --- a/dataset-spec/template.schema.json +++ b/dataset-spec/template.schema.json @@ -15,39 +15,68 @@ "properties": { "type": "object", "required": [ - "title", "data_id", + "title", "description", - "time_range", - "time_period", - "spatial_ref", - "spatial_bbox", - "spatial_res", - "variables" + "version", + "license", + "variables", + "sources" ], "additionalProperties": false, "properties": { - "title": { + "data_id": { "type": "string", - "description": "the title of the dataset", + "description": "unique identification string, filename base", "examples": [ - "Ocean Products in the Mediterranean Sea" + "OC-CMEMS-MED-005-2005-2022-1d-v1" ] }, - "data_id": { + "title": { "type": "string", - "description": "a unique dataset identifier", + "description": "human-readable title of the dataset", "examples": [ - "OC-CMEMS-MED-005-2005-2022-1d-v1.zarr" + "Ocean Products in the Mediterranean Sea" ] }, "description": { "type": "string", - "description": "a description of the dataset", + "description": "human-readable description of the dataset", "examples": [ "CMEMS Ocean Colour for Mediterranean Sea, 2005-2022" ] }, + "version": { + "type": "string", + "description": "Version string. Semver format recommended.", + "examples": [ + "1.0.3" + ] + }, + "recipe": { + "type": "string", + "description": "URL to repo recipe directory", + "examples": [ + "https://example.com/recipe/" + ] + }, + "changes": { + "type": "string", + "description": "URL to CHANGES.md in recipe directory", + "examples": [ + "https://example.com/recipe/CHANGES.md" + ] + }, + "license": { + "type": "string", + "description": "URL to CHANGES.md in recipe directory", + "examples": [ + "https://example.com/recipe/CHANGES.md" + ] + }, + "metadata": { + "type": "object" + }, "time_range": { "type": "array", "prefixItems": [ @@ -61,7 +90,7 @@ ] }, "time_period": { - "type": "string", + "type": ["string", "null"], "examples": ["1D", "2D", "8D", "10D", "2W", "1M"] }, "spatial_ref": { @@ -103,8 +132,35 @@ "description": "spatial resolution in units of the spatial coordinate reference system", "examples": [0.05, 10, 300] }, - "metadata": { - "type": "object" + "spatial_size": { + "type": ["array"], + "prefixItems": [ + { + "type": "integer", + "minimum": 2 + }, + { + "type": "integer", + "minimum": 2 + } + ], + "items": false, + "description": "the spatial image size as [width, height] in pixels", + "examples": [[6000, 3000], [256, 256]] + }, + "spatial_offset": { + "type": ["array"], + "prefixItems": [ + { + "type": "number" + }, + { + "type": "number" + } + ], + "items": false, + "description": "Offset coordinates in units of the CRS", + "examples": [[-180, -90], [0, 0]] }, "variables": { "type": "array", @@ -116,18 +172,23 @@ "required": [ "name", "long_name", - "units", - "valid_min", - "valid_max", - "time_range", "dtype", - "dims", - "sources" + "dims" ], "properties": { "name": {"type": "string"}, "long_name": {"type": "string"}, - "units": {"type": "string"}, + "dtype": {"type": "string"}, + "dims": { + "type": "array", + "items": {"type": "string"}, + "examples": [ + ["time", "y", "x"], + ["time", "lat", "lon"] + ] + }, + "units": {"type": ["string", "null"]}, + "fill_value": {"type": "number"}, "valid_min": {"type": "number"}, "valid_max": {"type": "number"}, "time_range": { @@ -142,91 +203,162 @@ ], "items": false }, - "dtype": {"type": "string"}, - "dims": { - "type": "array", - "items": {"type": "string"}, - "examples": [ - ["time", "y", "x"], - ["time", "lat", "lon"] - ] - }, - "sources": { - "type": "array", - "minItems": 1, - "items": { - "type": "object", - "additionalProperties": true, - "required": [ - "data_url", - "license_url" - ], - "properties": { - "home_url": { - "type": "string", - "format": "uri", - "examples": [ - ["https://resources.marine.copernicus.eu/"] - ] - }, - "data_url": { - "type": "string", - "format": "uri", - "examples": [ - ["https://resources.marine.copernicus.eu/"] - ] - }, - "license_url": { - "type": "string", - "format": "uri", - "examples": [ - ["https://marine.copernicus.eu/user-corner/service-commitments-and-licence"] - ] - }, - "attributions": { - "type": "array", - "items": {"type": "string"} - }, - "processing_steps": { - "type": "array", - "description": "processing steps to be applied to the source", - "items": {"type": "string"}, - "examples": [ - [ - "Masking out low quality pixels", - "Temporal NaN-mean aggregation", - "Spatial bi-cubic interpolation" - ] + "metadata": { + "type": "object", + "required": [ + "color_bar_name", + "color_value_min", + "color_value_max" + ], + "properties": { + "color_bar_name": { + "type": "string", + "description": "name of a matplotlib colormap: https://matplotlib.org/stable/tutorials/colors/colormaps.html", + "examples": [ + [ + "viridis_r", + "turbo", + "prism", + "Spectral" ] - }, - "remarks": { - "type": "string" - } + ] + }, + "color_value_min": { + "type": "number", + "description": "minimum value for colour scale", + "examples": [ + -10500, + 0, + 1.735e-9, + 4.5e6 + ] + }, + "color_value_max": { + "type": "number", + "description": "maximum value for colour scale", + "examples": [ + -9500, + 0, + 2.978e-9, + 8.5e6 + ] } } - }, - "color_bar_name": { - "type": "string", - "description": "name of a matplotlib colormap: https://matplotlib.org/stable/tutorials/colors/colormaps.html", - "examples": [ - ["viridis_r", "turbo", "prism", "Spectral"] - ] - }, - "color_value_min": { - "type": "number", - "description": "minimum value for colour scale", - "examples": [-10500, 0, 1.735e-9, 4.5e6] - }, - "color_value_max": { - "type": "number", - "description": "maximum value for colour scale", - "examples": [-9500, 0, 2.978e-9, 8.5e6] - }, - "metadata": { - "type": "object" } } } + }, + "sources": { + "type": "array", + "description": "Array of data source definitions", + "items": { + "type": "object", + "oneOf": [ + { + "properties": { + "name": { + "type": "string", + "description": "human-readable title of this data source", + "examples": [ + "Sentinel Hub S2L2A" + ] + }, + "variable_names": { + "type": "array", + "items": {"type": "string"}, + "description": "array of variable names provided by this data store", + "examples": [ + [ + "B06", + "SCL" + ] + ] + }, + "store_id": { + "type": "string", + "description": "xcube data store identifier", + "examples": [ + "sentinelhub" + ] + }, + "store_params": { + "type": "object", + "description": "xcube parameters used to open the data store", + "examples": [ + { + "num_retries": 80 + } + ] + }, + "data_params": { + "type": "object", + "description": "xcube parameters used to open data from the data store" + } + }, + "additionalProperties": false, + "required": [ + "name", + "variable_names", + "store_id" + ] + }, + { + "properties": { + "name": { + "type": "string", + "description": "human-readable title of this data source", + "examples": [ + "Sentinel Hub S2L2A" + ] + }, + "variables": { + "type": "object", + "description": "mapping of variable names to variable sources" + }, + "download_url": { + "type": "string", + "description": "download URL template" + }, + "protocol": { + "type": "string", + "description": "transport protocol; detected from download_url if omitted", + "examples": [ + "http", + "ftp" + ] + }, + "compressed": { + "type": "boolean", + "default": false, + "description": "whether data is compressed; detected from data if omitted" + }, + "compression_format": { + "type": "string", + "description": "compression format in use; detected from data if omitted" + }, + "source_format": { + "type": "string", + "description": "source format name; detected from filename if omitted", + "examples": [ + "netcdf", + "hdf", + "geotiff" + ] + }, + "source_crs": { + "type": "string", + "description": "coordinate reference system of source data; detected from data if omitted" + } + }, + "additionalProperties": false, + "required": [ + "name", + "variables", + "download_url" + ] + } + ] + } } } },