diff --git a/CHANGELOG.md b/CHANGELOG.md
index a480432..c52af0b 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -8,19 +8,37 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
## [Unreleased](https://github.com/crim-ca/mlm-extension/tree/main)
### Added
-- n/a
+- Add `raster:bands` required property `name` for describing `mlm:input` bands
+ (see [README - Bands and Statistics](README.md#bands-and-statistics) for details).
+- Add README warnings about new extension `eo` and `raster` versions.
### Changed
-- n/a
+- Split `ModelBands` and `AnyBandsRef` definitions in the JSON schema to allow them to be referenced individually.
+- Move `AnyBandsRef` definition explicitly to STAC Item JSON schema, rather than implicitly inferred via `mlm:input`.
+- Modified the JSON schema to use a `if` check of the `type` (STAC Item or Collection) prior to validating further
+ properties. This allows some validators (e.g. `pystac`) to better report the *real* error that causes the schema
+ to fail, rather than reporting the first mismatching `type` case with a poor error description to debug the issue.
### Deprecated
- n/a
### Removed
-- n/a
+- Removed `$comment` entries from the JSON schema that are considered as invalid by some parsers.
+- When `mlm:input` objects do **NOT** define band references (i.e.: `bands: []` is used), the JSON schema will not
+ fail if an Asset with the `mlm:model` role contains a band definition. This is to allow MLM model definitions to
+ simultaneously use some inputs with `bands` reference names while others do not.
### Fixed
-- n/a
+- Band checks against [`eo`](https://github.com/stac-extensions/eo), [`raster`](https://github.com/stac-extensions/eo)
+ or STAC Core 1.1 [`bands`](https://github.com/radiantearth/stac-spec/blob/master/commons/common-metadata.md#bands)
+ when a `mlm:input` references names in `bands` are now properly validated.
+- Fix the examples using `raster:bands` incorrectly defined in STAC Item properties.
+ The correct use is for them to be defined under the STAC Asset using the `mlm:model` role.
+- Fix the [EuroSAT ResNet pydantic example](./stac_model/examples.py) that incorrectly referenced some `bands`
+ in its `mlm:input` definition without providing any definition of those bands. The `eo:bands` properties have
+ been added to the corresponding `model` Asset using
+ the [`pystac.extensions.eo`](https://github.com/stac-utils/pystac/blob/main/pystac/extensions/eo.py) utilities.
+- Fix various STAC Asset definitions erroneously employing `mlm:model` role instead of the intended `mlm:source_code`.
## [v1.2.0](https://github.com/crim-ca/mlm-extension/tree/v1.2.0)
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index 1fb68c6..a9adfdb 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -19,7 +19,7 @@ make install-dev
make pre-commit-install
```
-## PR submittion
+## PR submission
Before submitting your code please do the following steps:
@@ -41,7 +41,7 @@ make lint-all
make test
```
-5. Upload your changes to your fork, then make a PR from there to the main repo:
+6. Upload your changes to your fork, then make a PR from there to the main repo:
```bash
git checkout -b your-branch
@@ -53,11 +53,15 @@ git push -u origin your-branch
## Building and releasing
-> :warning:
+
+
+> [!WARNING]
> There are multiple types of releases for this repository:
> 1. Release for MLM specification (usually, this should include one for `stac-model` as well to support it)
> 2. Release for `stac-model` only
+
+
### Building a new version of MLM specification
- Checkout to the `main` branch by making sure the CI passed all previous tests.
@@ -69,9 +73,14 @@ git push -u origin your-branch
- Make a commit to `GitHub` and push the corresponding auto-generated `v{MAJOR}.{MINOR}.{PATCH}` tag.
- Validate that the CI validated everything once again.
- Create a `GitHub release` with the created tag.
- > :warning:
- > - Ensure the "Set as the latest release" option is selected :heavy_check_mark:.
- > - Ensure the diff ranges from the previous MLM version, and not an intermediate `stac-model` release.
+
+
+
+> [!WARNING]
+> - Ensure the "Set as the latest release" option is selected :heavy_check_mark:.
+> - Ensure the diff ranges from the previous MLM version, and not an intermediate `stac-model` release.
+
+
### Building a new version of `stac-model`
@@ -83,9 +92,14 @@ git push -u origin your-branch
- Checkout to `main` branch that contais the freshly created merge commit.
- Push the tag `stac-model-v{MAJOR}.{MINOR}.{PATCH}`. The CI should auto-publish it to PyPI.
- Create a `GitHub release`
- > :warning:
- > - Ensure the "Set as the latest release" option is deselected :x:.
- > - Ensure the diff ranges from the previous release of `stac-model`, not an intermediate MLM release.
+
+
+
+> [!WARNING]
+> - Ensure the "Set as the latest release" option is deselected :x:.
+> - Ensure the diff ranges from the previous release of `stac-model`, not an intermediate MLM release.
+
+
## Other help
@@ -93,7 +107,7 @@ You can contribute by spreading a word about this library.
It would also be a huge contribution to write
a short article on how you are using this project.
You can also share how the ML Model extension does or does
-not serve your needs with us in the Github Discussions or raise
+not serve your needs with us in the GitHub Discussions or raise
Issues for bugs.
[poetry-install]: https://github.com/python-poetry/install.python-poetry.org
diff --git a/Makefile b/Makefile
index 6c0fa19..599754d 100644
--- a/Makefile
+++ b/Makefile
@@ -1,7 +1,8 @@
#* Variables
-SHELL := /usr/bin/env bash
-PYTHON := python
+SHELL ?= /usr/bin/env bash
+PYTHON ?= python
PYTHONPATH := `pwd`
+POETRY ?= poetry
#* Poetry
.PHONY: poetry-install
@@ -14,36 +15,36 @@ poetry-remove:
.PHONY: poetry-plugins
poetry-plugins:
- poetry self add poetry-plugin-up
+ $(POETRY) self add poetry-plugin-up
.PHONY: poetry-env
poetry-env:
- poetry config virtualenvs.in-project true
+ $(POETRY) config virtualenvs.in-project true
.PHONY: publish
publish:
- poetry publish --build
+ $(POETRY) publish --build
#* Installation
.PHONY: install
install: poetry-env
- poetry lock -n && poetry export --without-hashes > requirements-lock.txt
- poetry install -n
+ $(POETRY) lock -n && poetry export --without-hashes > requirements-lock.txt
+ $(POETRY) install -n
-poetry run mypy --install-types --non-interactive ./
.PHONY: install-dev
install-dev: poetry-env install
- poetry install -n --with dev
+ $(POETRY) install -n --with dev
.PHONY: pre-commit-install
pre-commit-install:
- poetry run pre-commit install
+ $(POETRY) run pre-commit install
#* Formatters
.PHONY: codestyle
codestyle:
- poetry run ruff format --config=pyproject.toml stac_model tests
+ $(POETRY) run ruff format --config=pyproject.toml stac_model tests
.PHONY: format
format: codestyle
@@ -61,29 +62,29 @@ check-all: check
.PHONY: mypy
mypy:
- poetry run mypy --config-file pyproject.toml ./
+ $(POETRY) run mypy --config-file pyproject.toml ./
.PHONY: check-mypy
check-mypy: mypy
.PHONY: check-safety
check-safety:
- poetry check
- poetry run safety check --full-report
- poetry run bandit -ll --recursive stac_model tests
+ $(POETRY) check
+ $(POETRY) run safety check --full-report
+ $(POETRY) run bandit -ll --recursive stac_model tests
.PHONY: lint
lint:
- poetry run ruff --config=pyproject.toml ./
- poetry run pydocstyle --count --config=pyproject.toml ./
- poetry run pydoclint --config=pyproject.toml ./
+ $(POETRY) run ruff --config=pyproject.toml ./
+ $(POETRY) run pydocstyle --count --config=pyproject.toml ./
+ $(POETRY) run pydoclint --config=pyproject.toml ./
.PHONY: check-lint
check-lint: lint
.PHONY: format-lint
format-lint:
- poetry run ruff --config=pyproject.toml --fix ./
+ $(POETRY) run ruff --config=pyproject.toml --fix ./
.PHONY: install-npm
install-npm:
@@ -113,7 +114,7 @@ lint-all: lint mypy check-safety check-markdown
.PHONY: update-dev-deps
update-dev-deps:
- poetry up --only=dev-dependencies --latest
+ $(POETRY) up --only=dev-dependencies --latest
#* Cleaning
.PHONY: pycache-remove
diff --git a/README.md b/README.md
index 99f2159..f20587c 100644
--- a/README.md
+++ b/README.md
@@ -224,7 +224,9 @@ It is recommended to define `accelerator` with one of the following values:
- `intel-ipex-gpu` for models optimized with IPEX for Intel GPUs
- `macos-arm` for models trained on Apple Silicon
-> :warning:
+
+
+> [!WARNING]
> If `mlm:accelerator = amd64`, this explicitly indicates that the model does not (and will not try to) use any
> accelerator, even if some are available from the runtime environment. This is to be distinguished from
> the value `mlm:accelerator = null`, which means that the model *could* make use of some accelerators if provided,
@@ -232,6 +234,8 @@ It is recommended to define `accelerator` with one of the following values:
> using `mlm:accelerator = amd64` also set explicitly `mlm:accelerator_constrained = true` to illustrate that the
> model **WILL NOT** use accelerators, although the hardware resolution should be identical nonetheless.
+
+
When `mlm:accelerator = null` is employed, the value of `mlm:accelerator_constrained` can be ignored, since even if
set to `true`, there would be no `accelerator` to contain against. To avoid confusion, it is suggested to set the
`mlm:accelerator_constrained = false` or omit the field entirely in this case.
@@ -265,7 +269,15 @@ representing bands information, including notably the `nodata` value,
the `data_type` (see also [Data Type Enum](#data-type-enum)),
and [Common Band Names][stac-band-names].
-> :information_source:
+
+
+> [!WARNING]
+> Only versions `v1.x` of `eo` and `raster` are supported to provide `mlm:input` band references.
+> Versions `2.x` of those extensions rely on the [STAC 1.1 - Band Object][stac-1.1-band] instead.
+> If those versions are desired, consider migrating your MLM definition to use [STAC 1.1 - Band Object][stac-1.1-band]
+> as well for referencing `mlm:input` with band names.
+
+> [!NOTE]
> Due to how the schema for [`eo:bands`][stac-eo-band] is defined, it is not sufficient to *only* provide
> the `eo:bands` property at the STAC Item level. The schema validation of the EO extension explicitly looks
> for a corresponding set of bands under an Asset, and if none is found, it disallows `eo:bands` in the Item properties.
@@ -273,12 +285,21 @@ and [Common Band Names][stac-band-names].
> (see [Model Asset](#model-asset)), or define them *both* under the Asset and Item properties. If the second
> approach is selected, it is recommended that the `eo:bands` under the Asset contains only the `name` or the
> `common_name` property, such that all other details about the bands are defined at the Item level.
+> An example of such representation is provided in
+> [examples/item_eo_bands_summarized.json](examples/item_eo_bands_summarized.json).
+>
+> For an example where `eo:bands` are entirely defined in the Asset on their own, please refer to
+> [examples/item_eo_bands.json](examples/item_eo_bands.json) instead.
>
> For more details, refer to [stac-extensions/eo#12](https://github.com/stac-extensions/eo/issues/12).
>
-> For an example, please refer to [examples/item_eo_bands.json](examples/item_eo_bands.json).
-> Notably in this example, the `assets.weights.eo:bands` property provides the `name` to fulfill the Asset requirement,
-> while all additional band details are provided in `properties.eo:bands`.
+
+> [!NOTE]
+> When using `raster:bands`, and additional `name` parameter **MUST** be provided for each band. This parameter
+> is not defined in `raster` extension itself, but is permitted. This addition is required to ensure
+> that `mlm:input` bands referenced by name can be associated to their respective `raster:bands` definitions.
+
+
Only bands used as input to the model should be included in the MLM `bands` field.
To avoid duplicating the information, MLM only uses the `name` of whichever "Band Object" is defined in the STAC Item.
@@ -294,12 +315,12 @@ to normalize all bands, rather than normalizing the values over a single product
applied differently for distinct [Model Input](#model-input-object) definitions, in order to adjust for intrinsic
properties of the model.
-[stac-1.1-band]: https://github.com/radiantearth/stac-spec/pull/1254
-[stac-1.1-stats]: https://github.com/radiantearth/stac-spec/blob/bands/item-spec/common-metadata.md#statistics-object
-[stac-eo-band]: https://github.com/stac-extensions/eo?tab=readme-ov-file#band-object
-[stac-raster-band]: https://github.com/stac-extensions/raster?tab=readme-ov-file#raster-band-object
-[stac-raster-stats]: https://github.com/stac-extensions/raster?tab=readme-ov-file#statistics-object
-[stac-band-names]: https://github.com/stac-extensions/eo?tab=readme-ov-file#common-band-names
+[stac-1.1-band]: https://github.com/radiantearth/stac-spec/blob/v1.1.0/commons/common-metadata.md#bands
+[stac-1.1-stats]: https://github.com/radiantearth/stac-spec/blob/v1.1.0/commons/common-metadata.md#statistics-object
+[stac-eo-band]: https://github.com/stac-extensions/eo/tree/v1.1.0#band-object
+[stac-raster-band]: https://github.com/stac-extensions/raster/tree/v1.1.0#raster-band-object
+[stac-raster-stats]: https://github.com/stac-extensions/raster/tree/v1.1.0#statistics-object
+[stac-band-names]: https://github.com/stac-extensions/eo#common-band-names
#### Model Band Object
@@ -309,10 +330,14 @@ properties of the model.
| format | string | The type of expression that is specified in the `expression` property. |
| expression | \* | An expression compliant with the `format` specified. The expression can be applied to any data type and depends on the `format` given. |
-> :information_source:
+
+
+> [!NOTE]
> Although `format` and `expression` are not required in this context, they are mutually dependent on each other.
> See also [Processing Expression](#processing-expression) for more details and examples.
+
+
The `format` and `expression` properties can serve multiple purpose.
1. Applying a band-specific pre-processing step,
@@ -441,14 +466,18 @@ the following formats are recommended as alternative scripts and function refere
| `docker` | string | An URI with image and tag to a Docker. | `ghcr.io/NAMESPACE/IMAGE_NAME:latest` |
| `uri` | string | An URI to some binary or script. | `{"href": "https://raw.githubusercontent.com/ORG/REPO/TAG/package/cli.py", "type": "text/x-python"}` |
-> :information_source:
+
+
+> [!NOTE]
> Above definitions are only indicative, and more can be added as desired with even more custom definitions.
> It is left as an implementation detail for users to resolve how these expressions should be handled at runtime.
-> :warning:
+> [!WARNING]
> See also discussion regarding additional processing expressions:
> [stac-extensions/processing#31](https://github.com/stac-extensions/processing/issues/31)
+
+
[stac-proc-expr]: https://github.com/stac-extensions/processing#expression-object
### Model Output Object
@@ -543,10 +572,14 @@ In order to provide more context, the following roles are also recommended were
| mlm:model | `model` | Required role for [Model Asset](#model-asset). |
| mlm:source_code | `code` | Required role for [Model Asset](#source-code-asset). |
-> :information_source:
+
+
+> [!NOTE]
> (*) These roles are offered as direct conversions from the previous extension
> that provided [ML-Model Asset Roles][ml-model-asset-roles] to provide easier upgrade to the MLM extension.
+
+
[ml-model-asset-roles]: https://github.com/stac-extensions/ml-model?tab=readme-ov-file#asset-objects
### Model Asset
diff --git a/README_DLM_LEGACY.md b/README_DLM_LEGACY.md
index fbc815a..058ed45 100644
--- a/README_DLM_LEGACY.md
+++ b/README_DLM_LEGACY.md
@@ -1,9 +1,13 @@
# Deep Learning Model (DLM) Extension
-> :information_source:
+
+
+> [!NOTE]
> This is legacy documentation references of Deep Learning Model extension
> preceding the current Machine Learning Model (MLM) extension.
+
+
Check the original [Technical Report](https://github.com/crim-ca/CCCOT03/raw/main/CCCOT03_Rapport%20Final_FINAL_EN.pdf).
![Image Description](https://i.imgur.com/cVAg5sA.png)
diff --git a/best-practices.md b/best-practices.md
index fc595bf..9d94b35 100644
--- a/best-practices.md
+++ b/best-practices.md
@@ -41,7 +41,9 @@ could include the bbox of "the world" `[-90, -180, 90, 180]` and the `start_date
would ideally be generic values like `["1900-01-01T00:00:00Z", null]` (see warning below).
However, due to limitations with the STAC 1.0 specification, this time extent is not applicable.
-> :warning:
+
+
+> [!WARNING]
> The `null` value is not allowed for datetime specification.
> As a workaround, the `end_datetime` can be set with a "very large value"
> (similarly to `start_datetime` set with a small value), such as `"9999-12-31T23:59:59Z"`.
@@ -49,6 +51,8 @@ However, due to limitations with the STAC 1.0 specification, this time extent is
>
> For more details, see the following [discussion](https://github.com/radiantearth/stac-spec/issues/1268).
+
+
It is to be noted that generic and very broad spatiotemporal
extents like above rarely reflect the reality regarding the capabilities and precision of the model to predict reliable
results. If a more restrained area and time of interest can be identified, such as the ranges for which the training
@@ -187,9 +191,13 @@ MLM definition to indicate which class values can be contained in the resulting
For more details, see the [Model Output Object](README.md#model-output-object) definition.
-> :information_source:
+
+
+> [!NOTE]
> Update according to [stac-extensions/classification#48](https://github.com/stac-extensions/classification/issues/48).
+
+
### Scientific Extension
Provided that most models derive from previous scientific work, it is strongly recommended to employ the
@@ -252,9 +260,13 @@ inference strategies to apply a model should define the [Source Code Asset](READ
This code is in itself ideal to guide users how to run it, and should therefore be replicated as an `example` link
reference to offer more code samples to execute the model.
-> :information_source:
+
+
+> [!NOTE]
> Update according to [stac-extensions/example-links#4](https://github.com/stac-extensions/example-links/issues/4).
+
+
### Version Extension
In the even that a model is retrained with gradually added annotations or improved training strategies leading to
diff --git a/examples/item_eo_bands.json b/examples/item_eo_bands.json
index c937e92..0efd02c 100644
--- a/examples/item_eo_bands.json
+++ b/examples/item_eo_bands.json
@@ -374,7 +374,7 @@
"description": "Source code to run the model.",
"type": "text/x-python",
"roles": [
- "mlm:model",
+ "mlm:source_code",
"code",
"metadata"
]
diff --git a/examples/item_multi_io.json b/examples/item_multi_io.json
index fa6c46b..4922415 100644
--- a/examples/item_multi_io.json
+++ b/examples/item_multi_io.json
@@ -43,7 +43,7 @@
58.21798141355221
],
"properties": {
- "description": "Generic model that employs multiple input sources with different combination of bands.",
+ "description": "Generic model that employs multiple input sources with different combination of bands, and some inputs without any band at all.",
"datetime": null,
"start_datetime": "1900-01-01T00:00:00Z",
"end_datetime": "9999-12-31T23:59:59Z",
@@ -216,48 +216,6 @@
"expression": "logical_not(A)"
}
}
- ],
- "raster:bands": [
- {
- "name": "B02 - blue",
- "nodata": 0,
- "data_type": "uint16",
- "bits_per_sample": 15,
- "spatial_resolution": 10,
- "scale": 0.0001,
- "offset": 0,
- "unit": "m"
- },
- {
- "name": "B03 - green",
- "nodata": 0,
- "data_type": "uint16",
- "bits_per_sample": 15,
- "spatial_resolution": 10,
- "scale": 0.0001,
- "offset": 0,
- "unit": "m"
- },
- {
- "name": "B04 - red",
- "nodata": 0,
- "data_type": "uint16",
- "bits_per_sample": 15,
- "spatial_resolution": 10,
- "scale": 0.0001,
- "offset": 0,
- "unit": "m"
- },
- {
- "name": "B08 - nir",
- "nodata": 0,
- "data_type": "uint16",
- "bits_per_sample": 15,
- "spatial_resolution": 10,
- "scale": 0.0001,
- "offset": 0,
- "unit": "m"
- }
]
},
"assets": {
@@ -269,6 +227,48 @@
"roles": [
"mlm:model",
"mlm:weights"
+ ],
+ "raster:bands": [
+ {
+ "name": "B02 - blue",
+ "nodata": 0,
+ "data_type": "uint16",
+ "bits_per_sample": 15,
+ "spatial_resolution": 10,
+ "scale": 0.0001,
+ "offset": 0,
+ "unit": "m"
+ },
+ {
+ "name": "B03 - green",
+ "nodata": 0,
+ "data_type": "uint16",
+ "bits_per_sample": 15,
+ "spatial_resolution": 10,
+ "scale": 0.0001,
+ "offset": 0,
+ "unit": "m"
+ },
+ {
+ "name": "B04 - red",
+ "nodata": 0,
+ "data_type": "uint16",
+ "bits_per_sample": 15,
+ "spatial_resolution": 10,
+ "scale": 0.0001,
+ "offset": 0,
+ "unit": "m"
+ },
+ {
+ "name": "B08 - nir",
+ "nodata": 0,
+ "data_type": "uint16",
+ "bits_per_sample": 15,
+ "spatial_resolution": 10,
+ "scale": 0.0001,
+ "offset": 0,
+ "unit": "m"
+ }
]
}
},
diff --git a/examples/item_raster_bands.json b/examples/item_raster_bands.json
index 0677909..46334e9 100644
--- a/examples/item_raster_bands.json
+++ b/examples/item_raster_bands.json
@@ -204,138 +204,6 @@
],
"post_processing_function": null
}
- ],
- "raster:bands": [
- {
- "name": "B01",
- "nodata": 0,
- "data_type": "uint16",
- "bits_per_sample": 15,
- "spatial_resolution": 60,
- "scale": 0.0001,
- "offset": 0,
- "unit": "m"
- },
- {
- "name": "B02",
- "nodata": 0,
- "data_type": "uint16",
- "bits_per_sample": 15,
- "spatial_resolution": 10,
- "scale": 0.0001,
- "offset": 0,
- "unit": "m"
- },
- {
- "name": "B03",
- "nodata": 0,
- "data_type": "uint16",
- "bits_per_sample": 15,
- "spatial_resolution": 10,
- "scale": 0.0001,
- "offset": 0,
- "unit": "m"
- },
- {
- "name": "B04",
- "nodata": 0,
- "data_type": "uint16",
- "bits_per_sample": 15,
- "spatial_resolution": 10,
- "scale": 0.0001,
- "offset": 0,
- "unit": "m"
- },
- {
- "name": "B05",
- "nodata": 0,
- "data_type": "uint16",
- "bits_per_sample": 15,
- "spatial_resolution": 20,
- "scale": 0.0001,
- "offset": 0,
- "unit": "m"
- },
- {
- "name": "B06",
- "nodata": 0,
- "data_type": "uint16",
- "bits_per_sample": 15,
- "spatial_resolution": 20,
- "scale": 0.0001,
- "offset": 0,
- "unit": "m"
- },
- {
- "name": "B07",
- "nodata": 0,
- "data_type": "uint16",
- "bits_per_sample": 15,
- "spatial_resolution": 20,
- "scale": 0.0001,
- "offset": 0,
- "unit": "m"
- },
- {
- "name": "B08",
- "nodata": 0,
- "data_type": "uint16",
- "bits_per_sample": 15,
- "spatial_resolution": 10,
- "scale": 0.0001,
- "offset": 0,
- "unit": "m"
- },
- {
- "name": "B8A",
- "nodata": 0,
- "data_type": "uint16",
- "bits_per_sample": 15,
- "spatial_resolution": 20,
- "scale": 0.0001,
- "offset": 0,
- "unit": "m"
- },
- {
- "name": "B09",
- "nodata": 0,
- "data_type": "uint16",
- "bits_per_sample": 15,
- "spatial_resolution": 60,
- "scale": 0.0001,
- "offset": 0,
- "unit": "m"
- },
- {
- "name": "B10",
- "nodata": 0,
- "data_type": "uint16",
- "bits_per_sample": 15,
- "spatial_resolution": 60,
- "scale": 0.0001,
- "offset": 0,
- "unit": "m"
- },
- {
- "name": "B11",
- "nodata": 0,
- "data_type": "uint16",
- "bits_per_sample": 15,
- "spatial_resolution": 20,
- "scale": 0.0001,
- "offset": 0,
- "unit": "m"
- },
- {
- "name": "B12",
- "nodata": 0,
- "data_type": "uint16",
- "bits_per_sample": 15,
- "spatial_resolution": 20,
- "scale": 0.0001,
- "offset": 0,
- "unit": "m"
- }
]
},
"assets": {
@@ -347,6 +215,138 @@
"roles": [
"mlm:model",
"mlm:weights"
+ ],
+ "raster:bands": [
+ {
+ "name": "B01",
+ "nodata": 0,
+ "data_type": "uint16",
+ "bits_per_sample": 15,
+ "spatial_resolution": 60,
+ "scale": 0.0001,
+ "offset": 0,
+ "unit": "m"
+ },
+ {
+ "name": "B02",
+ "nodata": 0,
+ "data_type": "uint16",
+ "bits_per_sample": 15,
+ "spatial_resolution": 10,
+ "scale": 0.0001,
+ "offset": 0,
+ "unit": "m"
+ },
+ {
+ "name": "B03",
+ "nodata": 0,
+ "data_type": "uint16",
+ "bits_per_sample": 15,
+ "spatial_resolution": 10,
+ "scale": 0.0001,
+ "offset": 0,
+ "unit": "m"
+ },
+ {
+ "name": "B04",
+ "nodata": 0,
+ "data_type": "uint16",
+ "bits_per_sample": 15,
+ "spatial_resolution": 10,
+ "scale": 0.0001,
+ "offset": 0,
+ "unit": "m"
+ },
+ {
+ "name": "B05",
+ "nodata": 0,
+ "data_type": "uint16",
+ "bits_per_sample": 15,
+ "spatial_resolution": 20,
+ "scale": 0.0001,
+ "offset": 0,
+ "unit": "m"
+ },
+ {
+ "name": "B06",
+ "nodata": 0,
+ "data_type": "uint16",
+ "bits_per_sample": 15,
+ "spatial_resolution": 20,
+ "scale": 0.0001,
+ "offset": 0,
+ "unit": "m"
+ },
+ {
+ "name": "B07",
+ "nodata": 0,
+ "data_type": "uint16",
+ "bits_per_sample": 15,
+ "spatial_resolution": 20,
+ "scale": 0.0001,
+ "offset": 0,
+ "unit": "m"
+ },
+ {
+ "name": "B08",
+ "nodata": 0,
+ "data_type": "uint16",
+ "bits_per_sample": 15,
+ "spatial_resolution": 10,
+ "scale": 0.0001,
+ "offset": 0,
+ "unit": "m"
+ },
+ {
+ "name": "B8A",
+ "nodata": 0,
+ "data_type": "uint16",
+ "bits_per_sample": 15,
+ "spatial_resolution": 20,
+ "scale": 0.0001,
+ "offset": 0,
+ "unit": "m"
+ },
+ {
+ "name": "B09",
+ "nodata": 0,
+ "data_type": "uint16",
+ "bits_per_sample": 15,
+ "spatial_resolution": 60,
+ "scale": 0.0001,
+ "offset": 0,
+ "unit": "m"
+ },
+ {
+ "name": "B10",
+ "nodata": 0,
+ "data_type": "uint16",
+ "bits_per_sample": 15,
+ "spatial_resolution": 60,
+ "scale": 0.0001,
+ "offset": 0,
+ "unit": "m"
+ },
+ {
+ "name": "B11",
+ "nodata": 0,
+ "data_type": "uint16",
+ "bits_per_sample": 15,
+ "spatial_resolution": 20,
+ "scale": 0.0001,
+ "offset": 0,
+ "unit": "m"
+ },
+ {
+ "name": "B12",
+ "nodata": 0,
+ "data_type": "uint16",
+ "bits_per_sample": 15,
+ "spatial_resolution": 20,
+ "scale": 0.0001,
+ "offset": 0,
+ "unit": "m"
+ }
]
},
"source_code": {
@@ -355,7 +355,7 @@
"description": "Source code to run the model.",
"type": "text/x-python",
"roles": [
- "mlm:model",
+ "mlm:source_code",
"code",
"metadata"
]
diff --git a/json-schema/schema.json b/json-schema/schema.json
index 53a9486..c9ec4da 100644
--- a/json-schema/schema.json
+++ b/json-schema/schema.json
@@ -3,95 +3,114 @@
"$id": "https://crim-ca.github.io/mlm-extension/v1.2.0/schema.json",
"title": "Machine Learning Model STAC Extension Schema",
"description": "This object represents the metadata for a Machine Learning Model (MLM) used in STAC documents.",
- "oneOf": [
+ "$comment": "Use 'allOf+if/then' for each 'type' to allow implementations to report more specific messages about the exact case in error (if any). Using only a 'oneOf/allOf' with the 'type' caused any incompatible 'type' to be reported first with a minimal and poorly described error by 'pystac'.",
+ "allOf": [
{
"$comment": "This is the schema for STAC extension MLM in Items.",
- "allOf": [
- {
- "$comment": "Schema to validate the MLM fields under Item properties or Assets properties.",
- "type": "object",
- "required": [
- "type",
- "properties",
- "assets"
- ],
- "properties": {
- "type": {
- "const": "Feature"
- },
+ "if": {
+ "required": [
+ "type"
+ ],
+ "properties": {
+ "type": {
+ "const": "Feature"
+ }
+ }
+ },
+ "then": {
+ "allOf": [
+ {
+ "$comment": "Schema to validate the MLM fields under Item properties or Assets properties.",
+ "type": "object",
+ "required": [
+ "properties",
+ "assets"
+ ],
"properties": {
- "allOf": [
- {
- "required": [
- "mlm:name",
- "mlm:architecture",
- "mlm:tasks",
- "mlm:input",
- "mlm:output"
- ]
- },
- {
- "$ref": "#/$defs/fields"
- }
- ]
- },
- "assets": {
- "type": "object",
- "additionalProperties": {
+ "properties": {
"allOf": [
+ {
+ "required": [
+ "mlm:name",
+ "mlm:architecture",
+ "mlm:tasks",
+ "mlm:input",
+ "mlm:output"
+ ]
+ },
{
"$ref": "#/$defs/fields"
}
]
+ },
+ "assets": {
+ "type": "object",
+ "additionalProperties": {
+ "allOf": [
+ {
+ "$ref": "#/$defs/fields"
+ }
+ ]
+ }
}
}
+ },
+ {
+ "$ref": "#/$defs/stac_extensions_mlm"
+ },
+ {
+ "$comment": "Schema to validate cross-references of bands between MLM inputs and any 'bands'-compliant section describing them using another STAC definition.",
+ "$ref": "#/$defs/AnyBandsRef"
+ },
+ {
+ "$comment": "Schema to validate model role requirement.",
+ "$ref": "#/$defs/AssetModelRoleMinimumOneDefinition"
}
- },
- {
- "$ref": "#/$defs/stac_extensions_mlm"
- },
- {
- "$comment": "Schema to validate model role requirement.",
- "$ref": "#/$defs/AssetModelRoleMinimumOneDefinition"
- }
- ]
+ ]
+ }
},
{
"$comment": "This is the schema for STAC extension MLM in Collections.",
- "allOf": [
- {
- "type": "object",
- "required": [
- "type"
- ],
- "properties": {
- "type": {
- "const": "Collection"
- },
- "summaries": {
- "type": "object",
- "additionalProperties": {
- "$ref": "#/$defs/fields"
- }
- },
- "assets": {
- "type": "object",
- "additionalProperties": {
- "$ref": "#/$defs/fields"
- }
- },
- "item_assets": {
- "type": "object",
- "additionalProperties": {
- "$ref": "#/$defs/fields"
+ "if": {
+ "required": [
+ "type"
+ ],
+ "properties": {
+ "type": {
+ "const": "Collection"
+ }
+ }
+ },
+ "then": {
+ "allOf": [
+ {
+ "type": "object",
+ "properties": {
+ "summaries": {
+ "type": "object",
+ "additionalProperties": {
+ "$ref": "#/$defs/fields"
+ }
+ },
+ "assets": {
+ "type": "object",
+ "additionalProperties": {
+ "$ref": "#/$defs/fields"
+ }
+ },
+ "item_assets": {
+ "type": "object",
+ "additionalProperties": {
+ "$ref": "#/$defs/fields"
+ }
}
}
+ },
+ {
+ "$ref": "#/$defs/stac_extensions_mlm"
}
- },
- {
- "$ref": "#/$defs/stac_extensions_mlm"
- }
- ]
+ ]
+ }
}
],
"$defs": {
@@ -124,16 +143,50 @@
}
}
},
- "stac_extensions_eo_bands": {
- "required": ["eo:bands"],
- "$comment": "This is the JSON-object 'properties' definition for the STAC Item 'properties' field.",
+ "stac_extensions_eo_bands_item": {
+ "$comment": "This is the JSON-object 'properties' definition, which describes the STAC-Item field named 'properties' containing 'eo:bands' as described in [https://github.com/stac-extensions/eo#item-properties-or-asset-fields].",
"properties": {
- "$comment": "https://github.com/stac-extensions/eo#item-properties-or-asset-fields",
- "eo:bands": {
- "type": "array",
- "minItems": 1,
- "items": {
- "type": "object"
+ "properties": {
+ "required": [
+ "eo:bands"
+ ],
+ "properties": {
+ "eo:bands": {
+ "type": "array",
+ "minItems": 1,
+ "items": {
+ "type": "object"
+ }
+ }
+ }
+ }
+ }
+ },
+ "stac_extensions_eo_bands_asset": {
+ "required": [
+ "assets"
+ ],
+ "$comment": "This is the JSON-object 'properties' definition, which describes the STAC-Asset containing 'eo:bands' as described in [https://github.com/stac-extensions/eo#item-properties-or-asset-fields].",
+ "properties": {
+ "assets": {
+ "additionalProperties": {
+ "if": {
+ "$ref": "#/$defs/AssetModelRole"
+ },
+ "then": {
+ "required": [
+ "eo:bands"
+ ],
+ "properties": {
+ "eo:bands": {
+ "type": "array",
+ "minItems": 1,
+ "items": {
+ "type": "object"
+ }
+ }
+ }
+ }
}
}
}
@@ -153,16 +206,41 @@
}
}
},
- "stac_extensions_raster_bands": {
- "required": ["raster:bands"],
- "$comment": "This is the JSON-object 'properties' definition for the STAC Item 'properties' field.",
+ "stac_extensions_raster_bands_asset": {
+ "required": [
+ "assets"
+ ],
+ "$comment": "This is the JSON-object 'properties' definition, which describes the STAC-Item at least one Asset field containing 'raster:bands' as described in [https://github.com/stac-extensions/raster/tree/v1.1.0#item-asset-fields].",
"properties": {
- "$comment": "https://github.com/stac-extensions/raster#item-asset-fields",
- "raster:bands": {
- "type": "array",
- "minItems": 1,
- "items": {
- "type": "object"
+ "assets": {
+ "additionalProperties": {
+ "if": {
+ "$ref": "#/$defs/AssetModelRole"
+ },
+ "then": {
+ "required": [
+ "raster:bands"
+ ],
+ "properties": {
+ "raster:bands": {
+ "type": "array",
+ "minItems": 1,
+ "items": {
+ "$comment": "Raster extension does not explicitly indicate a 'name', but one is needed for MLM.",
+ "type": "object",
+ "required": [
+ "name"
+ ],
+ "properties": {
+ "name": {
+ "type": "string",
+ "minLength": 1
+ }
+ }
+ }
+ }
+ }
+ }
}
}
}
@@ -327,13 +405,11 @@
"type": "boolean",
"$comment": "If trained from scratch, the source should be explicitly 'null'. However, omitting the source if pretrained is allowed.",
"if": {
- "$comment": "This is the JSON-object 'properties' definition.",
+ "$comment": "This is the JSON-object 'properties' definition, which describes the STAC-Item field named 'properties'.",
"properties": {
- "$comment": "This is the STAC-Item 'properties' field.",
"properties": {
- "$comment": "This is the JSON-object 'properties' definition for the STAC Item 'properties' field.",
+ "$comment": "This is the JSON-object 'properties' definition for the STAC MLM pretraining reference.",
"properties": {
- "$comment": "Required MLM pretraining reference.",
"mlm:pretrained": {
"const": false
}
@@ -342,14 +418,14 @@
}
},
"then": {
- "$comment": "This is the JSON-object 'properties' definition.",
+ "$comment": "This is the JSON-object 'properties' definition, which describes the STAC-Item field named 'properties'.",
"properties": {
- "$comment": "This is the STAC-Item 'properties' field.",
"properties": {
- "$comment": "This is the JSON-object 'properties' definition for the STAC Item 'properties' field.",
- "required": ["mlm:pretrained_source"],
+ "$comment": "This is the JSON-object 'properties' definition for the STAC MLM pretraining reference.",
+ "required": [
+ "mlm:pretrained_source"
+ ],
"properties": {
- "$comment": "Required MLM pretraining reference.",
"mlm:pretrained_source": {
"const": null
}
@@ -700,7 +776,9 @@
]
},
"AssetModelRole": {
- "required": ["roles"],
+ "required": [
+ "roles"
+ ],
"properties": {
"roles": {
"contains": {
@@ -712,69 +790,67 @@
},
"ModelBands": {
"description": "List of bands (if any) that compose the input. Band order represents the index position of the bands.",
- "allOf": [
- {
- "$comment": "No 'minItems' here to support model inputs not using any band (other data source).",
- "type": "array",
- "items": {
- "oneOf": [
- {
- "description": "Implied named-band with the name directly provided.",
+ "$comment": "No 'minItems' here to support model inputs not using any band (other data source).",
+ "type": "array",
+ "items": {
+ "oneOf": [
+ {
+ "description": "Implied named-band with the name directly provided.",
+ "type": "string",
+ "minLength": 1
+ },
+ {
+ "description": "Explicit named-band with optional derived expression to obtain it.",
+ "type": "object",
+ "required": [
+ "name"
+ ],
+ "properties": {
+ "name": {
"type": "string",
"minLength": 1
},
- {
- "description": "Explicit named-band with optional derived expression to obtain it.",
- "type": "object",
- "required": [
- "name"
- ],
- "properties": {
- "name": {
- "type": "string",
- "minLength": 1
- },
- "format": {
- "description": "Format to interpret the specified expression used to obtain the band.",
- "type": "string",
- "minLength": 1
- },
- "expression": {
- "description": "Any representation relevant for the specified 'format'."
- }
- },
- "dependencies": {
- "format": ["expression"],
- "expression": ["format"]
- },
- "additionalProperties": false
+ "format": {
+ "description": "Format to interpret the specified expression used to obtain the band.",
+ "type": "string",
+ "minLength": 1
+ },
+ "expression": {
+ "description": "Any representation relevant for the specified 'format'."
}
- ]
+ },
+ "dependencies": {
+ "format": [
+ "expression"
+ ],
+ "expression": [
+ "format"
+ ]
+ },
+ "additionalProperties": false
}
- },
- {
- "$comment": "However, if any band is indicated, a 'bands'-compliant section should describe them.",
- "$ref": "#/$defs/AnyBandsRef"
- }
- ]
+ ]
+ }
},
"AnyBandsRef": {
"$comment": "This definition ensures that, if at least 1 named MLM input 'bands' is provided, at least 1 of the supported references from EO, Raster or STAC Core 1.1 are provided as well. Otherwise, 'bands' must be explicitly empty.",
"if": {
"type": "object",
+ "$comment": "This is the JSON-object 'properties' definition, which describes the STAC-Item field named 'properties'.",
"properties": {
- "$comment": "This is the STAC-Item 'properties' field.",
"properties": {
"type": "object",
"required": [
"mlm:input"
],
- "$comment": "This is the JSON-object 'properties' definition for the STAC Item 'properties' field.",
+ "$comment": "This is the JSON-object 'properties' definition for the MLM input with bands listing referring to at least one band name.",
"properties": {
- "$comment": "Required MLM bands listing referring to at least one band name.",
"mlm:input": {
"type": "array",
+ "$comment": "Below 'minItems' ensures that band check does not fail for explicitly empty 'mlm:inputs'.",
+ "minItems": 1,
"items": {
+ "type": "object",
"required": [
"bands"
],
@@ -801,24 +877,7 @@
"$ref": "#/$defs/stac_extensions_raster"
},
{
- "$comment": "This is the JSON-object 'properties' definition.",
- "properties": {
- "$comment": "This is the STAC-Item 'properties' field.",
- "properties": {
- "required": ["raster:bands"],
- "$comment": "This is the JSON-object 'properties' definition for the STAC Item 'properties' field.",
- "properties": {
- "$comment": "https://github.com/stac-extensions/raster#item-asset-fields",
- "raster:bands": {
- "type": "array",
- "minItems": 1,
- "items": {
- "type": "object"
- }
- }
- }
- }
- }
+ "$ref": "#/$defs/stac_extensions_raster_bands_asset"
}
]
},
@@ -829,31 +888,12 @@
"$ref": "#/$defs/stac_extensions_eo"
},
{
- "$comment": "EO extension expects at 'eo:bands' in (at least) 1 asset, and possibly in Item properties. Items are for summarizing. Since MLM also uses bands by 'name' reference, allow any combination, and let 'eo' validate remaining combinations.",
"anyOf": [
{
- "$comment": "This is the JSON-object 'properties' definition.",
- "properties": {
- "$comment": "This is the STAC-Item 'properties' field.",
- "properties": {
- "$ref": "#/$defs/stac_extensions_eo_bands"
- }
- }
+ "$ref": "#/$defs/stac_extensions_eo_bands_item"
},
{
- "$comment": "For the case where 'eo:bands' is in the Asset of the model, it must also contain the 'mlm:model' role.",
- "properties": {
- "assets": {
- "additionalProperties": {
- "if": {
- "$ref": "#/$defs/AssetModelRole"
- },
- "then": {
- "$ref": "#/$defs/stac_extensions_eo_bands"
- }
- }
- }
- }
+ "$ref": "#/$defs/stac_extensions_eo_bands_asset"
}
]
}
@@ -866,16 +906,14 @@
"$ref": "#/$defs/stac_version_1.1"
},
{
- "$comment": "This is the JSON-object 'properties' definition.",
+ "$comment": "This is the JSON-object 'properties' definition, which describes the STAC-Item field named 'properties'.",
"properties": {
- "$comment": "This is the STAC-Item 'properties' field.",
"properties": {
"required": [
"bands"
],
- "$comment": "This is the JSON-object 'properties' definition for the STAC Item 'properties' field.",
+ "$comment": "This is the JSON-object 'properties' definition for the STAC Core 'bands' field defined by [https://github.com/radiantearth/stac-spec/blob/bands/item-spec/common-metadata.md#bands].",
"properties": {
- "$comment": "https://github.com/radiantearth/stac-spec/blob/bands/item-spec/common-metadata.md#bands",
"bands": {
"type": "array",
"minItems": 1,
@@ -892,63 +930,7 @@
]
},
"else": {
- "$comment": "Case where no 'bands' are referenced in the MLM input. Counter-validate there are no 'eo:bands' or 'raster:bands' in the Model Asset.",
- "allOf": [
- {
- "$comment": "This is the JSON-object 'properties' definition.",
- "properties": {
- "$comment": "This is the STAC-Item 'properties' field.",
- "properties": {
- "required": [
- "mlm:input"
- ],
- "$comment": "This is the JSON-object 'properties' definition for the STAC Item 'properties' field.",
- "properties": {
- "$comment": "Required MLM bands listing referring to at least one band name.",
- "mlm:input": {
- "type": "array",
- "items": {
- "required": [
- "bands"
- ],
- "$comment": "This is the 'Model Input Object' properties.",
- "properties": {
- "bands": {
- "$comment": "No bands reference provided, therefore none permitted in model inputs.",
- "type": "array",
- "maxItems": 0
- }
- }
- }
- }
- }
- }
- }
- },
- {
- "properties": {
- "assets": {
- "additionalProperties": {
- "if": {
- "$ref": "#/$defs/AssetModelRole"
- },
- "then": {
- "not": {
- "anyOf": [
- {
- "$ref": "#/$defs/stac_extensions_eo_bands"
- },
- {
- "$ref": "#/$defs/stac_extensions_raster_bands"
- }
- ]
- }
- }
- }
- }
- }
- }
- ]
+ "$comment": "Case where no 'bands' (empty list) are referenced in the MLM input. Because models can use a mixture of inputs with/without bands, we cannot enforce eo/raster/stac bands references to be omitted. If bands are provided in the 'mlm:model', it will simply be an odd case if none are used in any 'mlm:input' bands'."
}
}
}
diff --git a/stac_model/examples.py b/stac_model/examples.py
index 47be1db..d882623 100644
--- a/stac_model/examples.py
+++ b/stac_model/examples.py
@@ -3,6 +3,7 @@
import pystac
import shapely
from dateutil.parser import parse as parse_dt
+from pystac.extensions.eo import Band, EOExtension
from pystac.extensions.file import FileExtension
from stac_model.base import ProcessingExpression
@@ -134,7 +135,7 @@ def eurosat_resnet() -> ItemMLModelExtension:
href="https://github.com/microsoft/torchgeo/blob/61efd2e2c4df7ebe3bd03002ebbaeaa3cfe9885a/torchgeo/models/resnet.py#L207",
media_type="text/x-python",
roles=[
- "mlm:model",
+ "mlm:source_code",
"code",
],
),
@@ -214,10 +215,24 @@ def eurosat_resnet() -> ItemMLModelExtension:
model_asset = cast(
FileExtension[pystac.Asset],
- pystac.extensions.file.FileExtension.ext(assets["model"], add_if_missing=True),
+ FileExtension.ext(assets["model"], add_if_missing=True),
)
model_asset.apply(size=ml_model_size)
+ eo_model_asset = cast(
+ EOExtension[pystac.Asset],
+ EOExtension.ext(assets["model"], add_if_missing=True),
+ )
+ # NOTE:
+ # typically, it is recommended to add as much details as possible for the band description
+ # minimally, the names (which are well-known for sentinel-2) are sufficient
+ eo_bands = []
+ for name in band_names:
+ band = Band({})
+ band.apply(name=name)
+ eo_bands.append(band)
+ eo_model_asset.apply(bands=eo_bands)
+
item_mlm = MLModelExtension.ext(item, add_if_missing=True)
item_mlm.apply(ml_model_meta.model_dump(by_alias=True, exclude_unset=True, exclude_defaults=True))
return item_mlm
diff --git a/tests/test_schema.py b/tests/test_schema.py
index 5cf04d5..4755978 100644
--- a/tests/test_schema.py
+++ b/tests/test_schema.py
@@ -27,6 +27,25 @@ def test_mlm_schema(
assert SCHEMA_URI in validated
+@pytest.mark.parametrize(
+ "mlm_example",
+ ["item_raster_bands.json"],
+ indirect=True,
+)
+def test_mlm_missing_bands_invalid_if_mlm_input_lists_bands(
+ mlm_validator: STACValidator,
+ mlm_example: Dict[str, JSON],
+) -> None:
+ mlm_item = pystac.Item.from_dict(mlm_example)
+ pystac.validation.validate(mlm_item, validator=mlm_validator) # ensure original is valid
+
+ mlm_bands_bad_data = copy.deepcopy(mlm_example)
+ mlm_bands_bad_data["assets"]["weights"].pop("raster:bands") # type: ignore # no 'None' to raise in case modified
+ with pytest.raises(pystac.errors.STACValidationError):
+ mlm_bands_bad_item = pystac.Item.from_dict(mlm_bands_bad_data)
+ pystac.validation.validate(mlm_bands_bad_item, validator=mlm_validator)
+
+
@pytest.mark.parametrize(
"mlm_example",
["item_eo_bands_summarized.json"],
@@ -40,7 +59,7 @@ def test_mlm_eo_bands_invalid_only_in_item_properties(
pystac.validation.validate(mlm_item, validator=mlm_validator) # ensure original is valid
mlm_eo_bands_bad_data = copy.deepcopy(mlm_example)
- mlm_eo_bands_bad_data["assets"]["weights"].pop("eo:bands") # type: ignore
+ mlm_eo_bands_bad_data["assets"]["weights"].pop("eo:bands") # type: ignore # no 'None' to raise in case modified
with pytest.raises(pystac.errors.STACValidationError):
mlm_eo_bands_bad_item = pystac.Item.from_dict(mlm_eo_bands_bad_data)
pystac.validation.validate(mlm_eo_bands_bad_item, validator=mlm_validator)
@@ -61,7 +80,7 @@ def test_mlm_no_input_allowed_but_explicit_empty_array_required(
pystac.validation.validate(mlm_item, validator=mlm_validator)
with pytest.raises(pystac.errors.STACValidationError):
- mlm_data["properties"].pop("mlm:input") # type: ignore
+ mlm_data["properties"].pop("mlm:input") # type: ignore # no 'None' to raise in case modified
mlm_item = pystac.Item.from_dict(mlm_data)
pystac.validation.validate(mlm_item, validator=mlm_validator)