From aa7cdc6a5d0d2c2fa19dbae8d7047921156ff136 Mon Sep 17 00:00:00 2001 From: fynnbe Date: Wed, 26 Oct 2022 21:59:32 +0200 Subject: [PATCH 01/40] draft workflow RDF --- bioimageio/spec/workflow/__init__.py | 1 + bioimageio/spec/workflow/v0_2/__init__.py | 9 ++ bioimageio/spec/workflow/v0_2/converters.py | 3 + bioimageio/spec/workflow/v0_2/raw_nodes.py | 49 +++++++ bioimageio/spec/workflow/v0_2/schema.py | 137 ++++++++++++++++++++ bioimageio/spec/workflow/v0_2/utils.py | 5 + 6 files changed, 204 insertions(+) create mode 100644 bioimageio/spec/workflow/__init__.py create mode 100644 bioimageio/spec/workflow/v0_2/__init__.py create mode 100644 bioimageio/spec/workflow/v0_2/converters.py create mode 100644 bioimageio/spec/workflow/v0_2/raw_nodes.py create mode 100644 bioimageio/spec/workflow/v0_2/schema.py create mode 100644 bioimageio/spec/workflow/v0_2/utils.py diff --git a/bioimageio/spec/workflow/__init__.py b/bioimageio/spec/workflow/__init__.py new file mode 100644 index 000000000..00dc806ab --- /dev/null +++ b/bioimageio/spec/workflow/__init__.py @@ -0,0 +1 @@ +from . import v0_2 diff --git a/bioimageio/spec/workflow/v0_2/__init__.py b/bioimageio/spec/workflow/v0_2/__init__.py new file mode 100644 index 000000000..431dc6a3e --- /dev/null +++ b/bioimageio/spec/workflow/v0_2/__init__.py @@ -0,0 +1,9 @@ +from . import converters, raw_nodes, schema, utils +from .raw_nodes import FormatVersion + +try: + from typing import get_args +except ImportError: + from typing_extensions import get_args # type: ignore + +format_version = get_args(FormatVersion)[-1] diff --git a/bioimageio/spec/workflow/v0_2/converters.py b/bioimageio/spec/workflow/v0_2/converters.py new file mode 100644 index 000000000..833a36197 --- /dev/null +++ b/bioimageio/spec/workflow/v0_2/converters.py @@ -0,0 +1,3 @@ +from bioimageio.spec.rdf.v0_2.converters import maybe_convert as maybe_convert_rdf + +maybe_convert = maybe_convert_rdf diff --git a/bioimageio/spec/workflow/v0_2/raw_nodes.py b/bioimageio/spec/workflow/v0_2/raw_nodes.py new file mode 100644 index 000000000..630549c3e --- /dev/null +++ b/bioimageio/spec/workflow/v0_2/raw_nodes.py @@ -0,0 +1,49 @@ +""" raw nodes for the dataset RDF spec + +raw nodes are the deserialized equivalent to the content of any RDF. +serialization and deserialization are defined in schema: +RDF <--schema--> raw nodes +""" +from dataclasses import dataclass +from pathlib import Path + +from marshmallow import missing +from marshmallow.utils import _Missing + +from bioimageio.spec.rdf.v0_2.raw_nodes import FormatVersion, RDF as _RDF, URI +from bioimageio.spec.shared.raw_nodes import RawNode + +try: + from typing import Any, Dict, List, Literal, Union +except ImportError: + from typing_extensions import Literal # type: ignore + +FormatVersion = FormatVersion + + +@dataclass +class Tensor(RawNode): + name: str = missing + description: Union[_Missing, str] = missing + + +@dataclass +class Step(RawNode): + id: Union[_Missing, str] = missing + op: str = missing + inputs: Union[_Missing, List[str]] = missing + outputs: Union[_Missing, List[str]] = missing + kwargs: Union[_Missing, Dict[str, Any]] = missing + + +@dataclass +class Workflow(_RDF): + type: Literal["workflow"] = missing + + inputs: List[Tensor] = missing + outputs: List[Tensor] = missing + + test_inputs: List[Union[URI, Path]] = missing + test_outputs: List[Union[URI, Path]] = missing + + steps: List[Step] = missing diff --git a/bioimageio/spec/workflow/v0_2/schema.py b/bioimageio/spec/workflow/v0_2/schema.py new file mode 100644 index 000000000..14ea71e24 --- /dev/null +++ b/bioimageio/spec/workflow/v0_2/schema.py @@ -0,0 +1,137 @@ +import typing + +from marshmallow import ValidationError, validates, validates_schema + +from bioimageio.spec.rdf.v0_2.schema import RDF +from bioimageio.spec.shared import field_validators, fields +from bioimageio.spec.shared.schema import SharedBioImageIOSchema +from . import raw_nodes + +try: + from typing import get_args +except ImportError: + from typing_extensions import get_args # type: ignore + + +class _BioImageIOSchema(SharedBioImageIOSchema): + raw_nodes = raw_nodes + + +class Tensor(_BioImageIOSchema): + name = fields.String( + required=True, + validate=field_validators.Predicate("isidentifier"), + bioimageio_description="Tensor name. No duplicates are allowed.", + ) + description = fields.String() + + +class Step(_BioImageIOSchema): + id = fields.String( + required=False, + validate=field_validators.Predicate("isidentifier"), + bioimageio_description="Step id for referencing the steps' kwargs or outputs.", + ) + op = fields.String( + required=True, + validate=field_validators.Predicate("isidentifier"), + bioimageio_description="Name of operation. Must be implemented in bioimageio.core or bioimageio.contrib.", + ) + inputs = fields.List( + fields.String( + validate=field_validators.Predicate("isidentifier"), + bioimageio_description="named output of a previous step with the pattern '.outputs.'", + ), + required=False, + ) + outputs = fields.List( + fields.String( + validate=field_validators.Predicate("isidentifier"), + ), + bioimageio_description="output names for this step", + required=False, + ) + kwargs = fields.Kwargs(bioimageio_description="Key word arguments for op.") + + +class Workflow(_BioImageIOSchema, RDF): + bioimageio_description = f"""# BioImage.IO Workflow Resource Description File {get_args(raw_nodes.FormatVersion)[-1]} +This specification defines the fields used in a BioImage.IO-compliant resource description file (`RDF`) for describing workflows. +These fields are typically stored in a YAML file which we call Workflow Resource Description File or `workflow RDF`. + +The workflow RDF YAML file contains mandatory and optional fields. In the following description, optional fields are indicated by _optional_. +_optional*_ with an asterisk indicates the field is optional depending on the value in another field. +""" + inputs = fields.List( + fields.Nested(Tensor()), + validate=field_validators.Length(min=1), + required=True, + bioimageio_description="Describes the input tensors expected by this model.", + ) + + @validates("inputs") + def no_duplicate_input_tensor_names(self, value: typing.List[raw_nodes.Tensor]): + if not isinstance(value, list) or not all(isinstance(v, raw_nodes.Tensor) for v in value): + raise ValidationError("Could not check for duplicate input tensor names due to another validation error.") + + names = [t.name for t in value] + if len(names) > len(set(names)): + raise ValidationError("Duplicate input tensor names are not allowed.") + + outputs = fields.List( + fields.Nested(Tensor()), + validate=field_validators.Length(min=1), + bioimageio_description="Describes the output tensors from this model.", + ) + + @validates("outputs") + def no_duplicate_output_tensor_names(self, value: typing.List[raw_nodes.Tensor]): + if not isinstance(value, list) or not all(isinstance(v, raw_nodes.Tensor) for v in value): + raise ValidationError("Could not check for duplicate output tensor names due to another validation error.") + + names = [t["name"] if isinstance(t, dict) else t.name for t in value] + if len(names) > len(set(names)): + raise ValidationError("Duplicate output tensor names are not allowed.") + + @validates_schema + def inputs_and_outputs(self, data, **kwargs): + ipts: typing.List[raw_nodes.Tensor] = data.get("inputs") + outs: typing.List[raw_nodes.Tensor] = data.get("outputs") + if any( + [ + not isinstance(ipts, list), + not isinstance(outs, list), + not all(isinstance(v, raw_nodes.Tensor) for v in ipts), + not all(isinstance(v, raw_nodes.Tensor) for v in outs), + ] + ): + raise ValidationError("Could not check for duplicate tensor names due to another validation error.") + + # no duplicate tensor names + names = [t.name for t in ipts + outs] # type: ignore + if len(names) > len(set(names)): + raise ValidationError("Duplicate tensor names are not allowed.") + + test_inputs = fields.List( + fields.Union([fields.URI(), fields.Path()]), + validate=field_validators.Length(min=1), + required=True, + bioimageio_description="List of URIs or local relative paths to test inputs as described in inputs for " + "**a single test case**. " + "This means if your workflow has more than one input, you should provide one URI for each input." + "Each test input should be a file with a ndarray in " + "[numpy.lib file format](https://numpy.org/doc/stable/reference/generated/numpy.lib.format.html#module-numpy.lib.format)." + "The extension must be '.npy'.", + ) + test_outputs = fields.List( + fields.Union([fields.URI(), fields.Path()]), + validate=field_validators.Length(min=1), + required=True, + bioimageio_description="Analog to test_inputs.", + ) + steps = fields.List( + fields.Nested(Step()), + validate=field_validators.Length(min=1), + required=True, + bioimageio_description="Workflow steps to be executed consecutively.", + ) diff --git a/bioimageio/spec/workflow/v0_2/utils.py b/bioimageio/spec/workflow/v0_2/utils.py new file mode 100644 index 000000000..f049849c2 --- /dev/null +++ b/bioimageio/spec/workflow/v0_2/utils.py @@ -0,0 +1,5 @@ +from . import raw_nodes + + +def filter_resource_description(raw_rd: raw_nodes.Workflow) -> raw_nodes.Workflow: + return raw_rd From 7924b2fa19f82cc9d485ff850e1726255c857eca Mon Sep 17 00:00:00 2001 From: fynnbe Date: Wed, 26 Oct 2022 22:02:49 +0200 Subject: [PATCH 02/40] update passthrough module generation --- bioimageio/spec/workflow/__init__.py | 13 +++++++++++++ bioimageio/spec/workflow/converters.py | 3 +++ bioimageio/spec/workflow/raw_nodes.py | 3 +++ bioimageio/spec/workflow/schema.py | 3 +++ bioimageio/spec/workflow/utils.py | 3 +++ scripts/generate_passthrough_modules.py | 2 +- 6 files changed, 26 insertions(+), 1 deletion(-) create mode 100644 bioimageio/spec/workflow/converters.py create mode 100644 bioimageio/spec/workflow/raw_nodes.py create mode 100644 bioimageio/spec/workflow/schema.py create mode 100644 bioimageio/spec/workflow/utils.py diff --git a/bioimageio/spec/workflow/__init__.py b/bioimageio/spec/workflow/__init__.py index 00dc806ab..d8a7cc745 100644 --- a/bioimageio/spec/workflow/__init__.py +++ b/bioimageio/spec/workflow/__init__.py @@ -1 +1,14 @@ from . import v0_2 + +# autogen: start +from . import converters, raw_nodes, schema, utils +from .raw_nodes import FormatVersion + +try: + from typing import get_args +except ImportError: + from typing_extensions import get_args # type: ignore + +format_version = get_args(FormatVersion)[-1] + +# autogen: stop diff --git a/bioimageio/spec/workflow/converters.py b/bioimageio/spec/workflow/converters.py new file mode 100644 index 000000000..b296f1351 --- /dev/null +++ b/bioimageio/spec/workflow/converters.py @@ -0,0 +1,3 @@ +# Auto-generated by generate_passthrough_modules.py - do not modify + +from .v0_2.converters import * diff --git a/bioimageio/spec/workflow/raw_nodes.py b/bioimageio/spec/workflow/raw_nodes.py new file mode 100644 index 000000000..bb25c3c1f --- /dev/null +++ b/bioimageio/spec/workflow/raw_nodes.py @@ -0,0 +1,3 @@ +# Auto-generated by generate_passthrough_modules.py - do not modify + +from .v0_2.raw_nodes import * diff --git a/bioimageio/spec/workflow/schema.py b/bioimageio/spec/workflow/schema.py new file mode 100644 index 000000000..9a6b0a4b9 --- /dev/null +++ b/bioimageio/spec/workflow/schema.py @@ -0,0 +1,3 @@ +# Auto-generated by generate_passthrough_modules.py - do not modify + +from .v0_2.schema import * diff --git a/bioimageio/spec/workflow/utils.py b/bioimageio/spec/workflow/utils.py new file mode 100644 index 000000000..1086c2eb1 --- /dev/null +++ b/bioimageio/spec/workflow/utils.py @@ -0,0 +1,3 @@ +# Auto-generated by generate_passthrough_modules.py - do not modify + +from .v0_2.utils import * diff --git a/scripts/generate_passthrough_modules.py b/scripts/generate_passthrough_modules.py index f03360cc8..9a57bc1ee 100644 --- a/scripts/generate_passthrough_modules.py +++ b/scripts/generate_passthrough_modules.py @@ -114,7 +114,7 @@ def parse_args(): ) ) p.add_argument("command", choices=["check", "generate"]) - target_choices = ["rdf", "collection", "model", "dataset"] + target_choices = ["rdf", "collection", "model", "dataset", "workflow"] p.add_argument( "--target-rdf", choices=target_choices, From 825cad33ef2cf21e4c899b0cd1032d2824a82a6b Mon Sep 17 00:00:00 2001 From: fynnbe Date: Wed, 26 Oct 2022 23:11:23 +0200 Subject: [PATCH 03/40] Tensor -> Arg; ArgType --- bioimageio/spec/workflow/v0_2/raw_nodes.py | 8 ++-- bioimageio/spec/workflow/v0_2/schema.py | 49 ++++++++++++---------- 2 files changed, 32 insertions(+), 25 deletions(-) diff --git a/bioimageio/spec/workflow/v0_2/raw_nodes.py b/bioimageio/spec/workflow/v0_2/raw_nodes.py index 630549c3e..4f39245a8 100644 --- a/bioimageio/spec/workflow/v0_2/raw_nodes.py +++ b/bioimageio/spec/workflow/v0_2/raw_nodes.py @@ -19,11 +19,13 @@ from typing_extensions import Literal # type: ignore FormatVersion = FormatVersion +ArgType = Literal["tensor", "string", "object"] @dataclass -class Tensor(RawNode): +class Arg(RawNode): name: str = missing + type: ArgType = missing description: Union[_Missing, str] = missing @@ -40,8 +42,8 @@ class Step(RawNode): class Workflow(_RDF): type: Literal["workflow"] = missing - inputs: List[Tensor] = missing - outputs: List[Tensor] = missing + inputs: List[Arg] = missing + outputs: List[Arg] = missing test_inputs: List[Union[URI, Path]] = missing test_outputs: List[Union[URI, Path]] = missing diff --git a/bioimageio/spec/workflow/v0_2/schema.py b/bioimageio/spec/workflow/v0_2/schema.py index 14ea71e24..7f1d77c91 100644 --- a/bioimageio/spec/workflow/v0_2/schema.py +++ b/bioimageio/spec/workflow/v0_2/schema.py @@ -17,13 +17,18 @@ class _BioImageIOSchema(SharedBioImageIOSchema): raw_nodes = raw_nodes -class Tensor(_BioImageIOSchema): +class Arg(_BioImageIOSchema): name = fields.String( required=True, validate=field_validators.Predicate("isidentifier"), - bioimageio_description="Tensor name. No duplicates are allowed.", + bioimageio_description="Argument/tensor name. No duplicates are allowed.", ) - description = fields.String() + type = fields.String( + required=True, + validate=field_validators.OneOf(get_args(raw_nodes.ArgType)), + bioimageio_description=f"Argument type. One of: {get_args(raw_nodes.ArgType)}", + ) + description = fields.String(bioimageio_description="Description of argument/tensor.") class Step(_BioImageIOSchema): @@ -63,54 +68,54 @@ class Workflow(_BioImageIOSchema, RDF): _optional*_ with an asterisk indicates the field is optional depending on the value in another field. """ inputs = fields.List( - fields.Nested(Tensor()), + fields.Nested(Arg()), validate=field_validators.Length(min=1), required=True, - bioimageio_description="Describes the input tensors expected by this model.", + bioimageio_description="Describes the inputs expected by this model.", ) @validates("inputs") - def no_duplicate_input_tensor_names(self, value: typing.List[raw_nodes.Tensor]): - if not isinstance(value, list) or not all(isinstance(v, raw_nodes.Tensor) for v in value): - raise ValidationError("Could not check for duplicate input tensor names due to another validation error.") + def no_duplicate_input_names(self, value: typing.List[raw_nodes.Arg]): + if not isinstance(value, list) or not all(isinstance(v, raw_nodes.Arg) for v in value): + raise ValidationError("Could not check for duplicate input names due to another validation error.") names = [t.name for t in value] if len(names) > len(set(names)): - raise ValidationError("Duplicate input tensor names are not allowed.") + raise ValidationError("Duplicate input names are not allowed.") outputs = fields.List( - fields.Nested(Tensor()), + fields.Nested(Arg()), validate=field_validators.Length(min=1), - bioimageio_description="Describes the output tensors from this model.", + bioimageio_description="Describes the outputs from this model.", ) @validates("outputs") - def no_duplicate_output_tensor_names(self, value: typing.List[raw_nodes.Tensor]): - if not isinstance(value, list) or not all(isinstance(v, raw_nodes.Tensor) for v in value): - raise ValidationError("Could not check for duplicate output tensor names due to another validation error.") + def no_duplicate_output_names(self, value: typing.List[raw_nodes.Arg]): + if not isinstance(value, list) or not all(isinstance(v, raw_nodes.Arg) for v in value): + raise ValidationError("Could not check for duplicate output names due to another validation error.") names = [t["name"] if isinstance(t, dict) else t.name for t in value] if len(names) > len(set(names)): - raise ValidationError("Duplicate output tensor names are not allowed.") + raise ValidationError("Duplicate output names are not allowed.") @validates_schema def inputs_and_outputs(self, data, **kwargs): - ipts: typing.List[raw_nodes.Tensor] = data.get("inputs") - outs: typing.List[raw_nodes.Tensor] = data.get("outputs") + ipts: typing.List[raw_nodes.Arg] = data.get("inputs") + outs: typing.List[raw_nodes.Arg] = data.get("outputs") if any( [ not isinstance(ipts, list), not isinstance(outs, list), - not all(isinstance(v, raw_nodes.Tensor) for v in ipts), - not all(isinstance(v, raw_nodes.Tensor) for v in outs), + not all(isinstance(v, raw_nodes.Arg) for v in ipts), + not all(isinstance(v, raw_nodes.Arg) for v in outs), ] ): - raise ValidationError("Could not check for duplicate tensor names due to another validation error.") + raise ValidationError("Could not check for duplicate names due to another validation error.") - # no duplicate tensor names + # no duplicate names names = [t.name for t in ipts + outs] # type: ignore if len(names) > len(set(names)): - raise ValidationError("Duplicate tensor names are not allowed.") + raise ValidationError("Duplicate names are not allowed.") test_inputs = fields.List( fields.Union([fields.URI(), fields.Path()]), From 891093f27b9181ea9b2d1685691a5c7bf2ff941a Mon Sep 17 00:00:00 2001 From: fynnbe Date: Wed, 26 Oct 2022 23:35:35 +0200 Subject: [PATCH 04/40] test examples --- bioimageio/spec/workflow/v0_2/schema.py | 2 - .../hpa/single_cell_classification.yaml | 44 +++++++++++++++++++ .../workflows/stardist/stardist_example.yaml | 39 ++++++++++++++++ tests/conftest.py | 10 +++++ 4 files changed, 93 insertions(+), 2 deletions(-) create mode 100644 example_specs/workflows/hpa/single_cell_classification.yaml create mode 100644 example_specs/workflows/stardist/stardist_example.yaml diff --git a/bioimageio/spec/workflow/v0_2/schema.py b/bioimageio/spec/workflow/v0_2/schema.py index 7f1d77c91..305fc9b33 100644 --- a/bioimageio/spec/workflow/v0_2/schema.py +++ b/bioimageio/spec/workflow/v0_2/schema.py @@ -20,7 +20,6 @@ class _BioImageIOSchema(SharedBioImageIOSchema): class Arg(_BioImageIOSchema): name = fields.String( required=True, - validate=field_validators.Predicate("isidentifier"), bioimageio_description="Argument/tensor name. No duplicates are allowed.", ) type = fields.String( @@ -44,7 +43,6 @@ class Step(_BioImageIOSchema): ) inputs = fields.List( fields.String( - validate=field_validators.Predicate("isidentifier"), bioimageio_description="named output of a previous step with the pattern '.outputs.'", ), required=False, diff --git a/example_specs/workflows/hpa/single_cell_classification.yaml b/example_specs/workflows/hpa/single_cell_classification.yaml new file mode 100644 index 000000000..cd565b330 --- /dev/null +++ b/example_specs/workflows/hpa/single_cell_classification.yaml @@ -0,0 +1,44 @@ +name: HPA Single-cell Classification Example Workflow +description: A workflow for running HPA single-cell classification +format_version: 0.2.3 +type: workflow + +inputs: +- name: nuclei + type: tensor +- name: protein + type: tensor + +test_inputs: +- nuclei.npy +- protein.npy + +outputs: +- name: cells + type: tensor +- name: scores + type: tensor + +test_outputs: +- cells.npy +- scores.npy + +steps: +- id: segmentation + op: model_inference + inputs: [inputs.nuclei] # take the first output of step 1 (id: data) as the only input + outputs: [cells] + kwargs: + model_id: conscientious-seashell + preprocessing: true + postprocessing: false +- id: classification + op: model_inference + inputs: [inputs.protein, segmentation.outputs.cells] # take the second output of step1 and the output of step 2 + outputs: [scores] + kwargs: + model_id: straightforward-crocodile + preprocessing: true + postprocessing: false +- op: select_outputs + inputs: [segmentation.outputs.cells, classification.outputs.scores] diff --git a/example_specs/workflows/stardist/stardist_example.yaml b/example_specs/workflows/stardist/stardist_example.yaml new file mode 100644 index 000000000..6bb2a9970 --- /dev/null +++ b/example_specs/workflows/stardist/stardist_example.yaml @@ -0,0 +1,39 @@ +name: StarDist Example Workflow +description: A workflow for running stardist +format_version: 0.2.3 +type: workflow + +inputs: +- name: raw + type: tensor + description: image with star-convex objects + +test_inputs: +- raw.npy + +outputs: +- name: labels + type: tensor +- name: coord + type: tensor +- name: points + type: tensor +- name: prob + type: tensor + +test_outputs: +- labels.npy +- coord.npy +- points.npy +- prob.npy + +steps: +- op: zero_mean_unit_variance +- op: model_inference + kwargs: + model_id: fearless-crab + preprocessing: false # disable the preprocessing + postprocessing: false # disable the postprocessing +- op: stardist_postprocessing + kwargs: + diameter: 2.3 diff --git a/tests/conftest.py b/tests/conftest.py index 275a9f7cc..62f2b74e8 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -101,3 +101,13 @@ def unet2d_keras_tf(): @pytest.fixture def dataset_rdf(): return pathlib.Path(__file__).parent / "../example_specs/datasets/covid_if_training_data/rdf.yaml" + + +@pytest.fixture +def stardist_workflow_rdf(): + return pathlib.Path(__file__).parent / "../example_specs/workflows/stardist/stardist_example.yaml" + + +@pytest.fixture +def hpa_workflow_rdf(): + return pathlib.Path(__file__).parent / "../example_specs/workflows/hpa/single_cell_classification.yaml" From 8f06f9eca49b4e368c3658347560f1c6e2848395 Mon Sep 17 00:00:00 2001 From: fynnbe Date: Thu, 27 Oct 2022 00:25:04 +0200 Subject: [PATCH 05/40] add schema validation --- bioimageio/spec/workflow/v0_2/schema.py | 39 ++++++++++++++++++++++++- 1 file changed, 38 insertions(+), 1 deletion(-) diff --git a/bioimageio/spec/workflow/v0_2/schema.py b/bioimageio/spec/workflow/v0_2/schema.py index 305fc9b33..51738eca3 100644 --- a/bioimageio/spec/workflow/v0_2/schema.py +++ b/bioimageio/spec/workflow/v0_2/schema.py @@ -1,6 +1,6 @@ import typing -from marshmallow import ValidationError, validates, validates_schema +from marshmallow import ValidationError, missing, validates, validates_schema from bioimageio.spec.rdf.v0_2.schema import RDF from bioimageio.spec.shared import field_validators, fields @@ -126,15 +126,52 @@ def inputs_and_outputs(self, data, **kwargs): "[numpy.lib file format](https://numpy.org/doc/stable/reference/generated/numpy.lib.format.html#module-numpy.lib.format)." "The extension must be '.npy'.", ) + test_outputs = fields.List( fields.Union([fields.URI(), fields.Path()]), validate=field_validators.Length(min=1), required=True, bioimageio_description="Analog to test_inputs.", ) + + @validates_schema + def test_outputs_match(self, data, **kwargs): + steps = data.get("steps") + if not steps or not isinstance(steps, list) or not isinstance(steps[-1], raw_nodes.Step): + raise ValidationError("invalid 'steps'") + + test_outputs = data.get("test_outputs") + if not isinstance(test_outputs, list): + raise ValidationError("invalid 'test_outputs'") + + if steps[-1].op == "select_outputs": + if steps[-1].outputs: + raise ValidationError("Unexpected 'outputs' defined for op: 'select_outputs'. Did you mean 'inputs'?") + if len(test_outputs) != len(steps[-1].inputs): + raise ValidationError(f"Expected {len(steps[-1].inputs)} 'test_inputs', but found {len(test_outputs)}") + steps = fields.List( fields.Nested(Step()), validate=field_validators.Length(min=1), required=True, bioimageio_description="Workflow steps to be executed consecutively.", ) + + @validates_schema + def step_input_references_exist(self, data, **kwargs): + inputs = data.get("inputs") + if not inputs or not isinstance(inputs, list) or not all(isinstance(ipt, raw_nodes.Arg) for ipt in inputs): + raise ValidationError("Missing/invalid 'inputs'") + steps = data.get("steps") + if not steps or not isinstance(steps, list) or not isinstance(steps[0], raw_nodes.Step): + raise ValidationError("Missing/invalid 'steps'") + + references = {f"inputs.{ipt.name}" for ipt in inputs} + for step in steps: + if step.inputs: + for si in step.inputs: + if si not in references: + raise ValidationError(f"Invalid step input reference '{si}'") + + if step.outputs: + references.update({f"{step.id}.outputs.{out}" for out in step.outputs}) From 6889bf81996019a1d6f068dfd2d189f44f245b78 Mon Sep 17 00:00:00 2001 From: fynnbe Date: Thu, 27 Oct 2022 00:25:39 +0200 Subject: [PATCH 06/40] add test_workflow_rdf.py --- tests/test_workflow_rdf.py | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) create mode 100644 tests/test_workflow_rdf.py diff --git a/tests/test_workflow_rdf.py b/tests/test_workflow_rdf.py new file mode 100644 index 000000000..dc06265b8 --- /dev/null +++ b/tests/test_workflow_rdf.py @@ -0,0 +1,24 @@ +from bioimageio.spec.shared import yaml +from bioimageio.spec.workflow import raw_nodes + + +def test_workflow_rdf_stardist_example(stardist_workflow_rdf): + from bioimageio.spec.workflow.schema import Workflow + + data = yaml.load(stardist_workflow_rdf) + # data["root_path"] = stardist_workflow_rdf.parent + + workflow = Workflow().load(data) + assert isinstance(workflow, raw_nodes.Workflow) + assert workflow.steps[0].op == "zero_mean_unit_variance" + + +def test_workflow_rdf_hpa_example(hpa_workflow_rdf): + from bioimageio.spec.workflow.schema import Workflow + + data = yaml.load(hpa_workflow_rdf) + # data["root_path"] = hpa_workflow_rdf.parent + + workflow = Workflow().load(data) + assert isinstance(workflow, raw_nodes.Workflow) + assert workflow.outputs[0].name == "cells" From 9f0eed210cbb2263815bbbc05717d6763a9d8a28 Mon Sep 17 00:00:00 2001 From: fynnbe Date: Thu, 27 Oct 2022 00:29:03 +0200 Subject: [PATCH 07/40] fix missing workflow import --- bioimageio/spec/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bioimageio/spec/__init__.py b/bioimageio/spec/__init__.py index 1387a5d6b..339a927f2 100644 --- a/bioimageio/spec/__init__.py +++ b/bioimageio/spec/__init__.py @@ -1,4 +1,4 @@ -from . import collection, model, rdf, shared +from . import collection, model, rdf, shared, workflow from .commands import update_format, update_rdf, validate from .io_ import ( get_resource_package_content, From 8639d8d1b5050d83386eaa2caee18dad58049437 Mon Sep 17 00:00:00 2001 From: fynnbe Date: Thu, 27 Oct 2022 00:29:42 +0200 Subject: [PATCH 08/40] update generate_rdf_docs.py and generate_json_specs.py --- scripts/generate_json_specs.py | 3 +++ scripts/generate_rdf_docs.py | 2 ++ 2 files changed, 5 insertions(+) diff --git a/scripts/generate_json_specs.py b/scripts/generate_json_specs.py index d0414afc6..3211da64b 100644 --- a/scripts/generate_json_specs.py +++ b/scripts/generate_json_specs.py @@ -46,7 +46,10 @@ def export_json_schema_from_schema(folder: Path, spec): export_json_schema_from_schema(dist, bioimageio.spec.rdf.v0_2) export_json_schema_from_schema(dist, bioimageio.spec.collection) export_json_schema_from_schema(dist, bioimageio.spec.collection.v0_2) + export_json_schema_from_schema(dist, bioimageio.spec.dataset) export_json_schema_from_schema(dist, bioimageio.spec.dataset.v0_2) export_json_schema_from_schema(dist, bioimageio.spec.model) export_json_schema_from_schema(dist, bioimageio.spec.model.v0_3) export_json_schema_from_schema(dist, bioimageio.spec.model.v0_4) + export_json_schema_from_schema(dist, bioimageio.spec.workflow) + export_json_schema_from_schema(dist, bioimageio.spec.workflow.v0_2) diff --git a/scripts/generate_rdf_docs.py b/scripts/generate_rdf_docs.py index d9c5e6525..0a726b066 100644 --- a/scripts/generate_rdf_docs.py +++ b/scripts/generate_rdf_docs.py @@ -190,3 +190,5 @@ def export_markdown_doc(folder: Path, spec) -> None: export_markdown_doc(dist, bioimageio.spec.model.v0_4) export_markdown_doc(dist, bioimageio.spec.rdf) export_markdown_doc(dist, bioimageio.spec.rdf.v0_2) + export_markdown_doc(dist, bioimageio.spec.workflow) + export_markdown_doc(dist, bioimageio.spec.workflow.v0_2) From 147667054d40c7344c35e5b2c2daa1ef0852e400 Mon Sep 17 00:00:00 2001 From: fynnbe Date: Thu, 27 Oct 2022 00:31:23 +0200 Subject: [PATCH 09/40] fix typing import --- bioimageio/spec/workflow/v0_2/raw_nodes.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/bioimageio/spec/workflow/v0_2/raw_nodes.py b/bioimageio/spec/workflow/v0_2/raw_nodes.py index 4f39245a8..71bac0ca4 100644 --- a/bioimageio/spec/workflow/v0_2/raw_nodes.py +++ b/bioimageio/spec/workflow/v0_2/raw_nodes.py @@ -6,6 +6,7 @@ """ from dataclasses import dataclass from pathlib import Path +from typing import Any, Dict, List, Union from marshmallow import missing from marshmallow.utils import _Missing @@ -14,7 +15,7 @@ from bioimageio.spec.shared.raw_nodes import RawNode try: - from typing import Any, Dict, List, Literal, Union + from typing import Literal except ImportError: from typing_extensions import Literal # type: ignore From fe9243e2694acdf34ccaa8bd35248f17e68fa811 Mon Sep 17 00:00:00 2001 From: fynnbe Date: Thu, 27 Oct 2022 15:57:08 +0200 Subject: [PATCH 10/40] test_steps and better workflow kwargs --- bioimageio/spec/shared/fields.py | 4 + bioimageio/spec/workflow/v0_2/raw_nodes.py | 24 +++- bioimageio/spec/workflow/v0_2/schema.py | 121 +++++++++++++----- .../hpa/single_cell_classification.yaml | 35 +++-- .../workflows/stardist/stardist_example.yaml | 51 ++++++-- 5 files changed, 175 insertions(+), 60 deletions(-) diff --git a/bioimageio/spec/shared/fields.py b/bioimageio/spec/shared/fields.py index 931da6a4d..3ef1e5a02 100644 --- a/bioimageio/spec/shared/fields.py +++ b/bioimageio/spec/shared/fields.py @@ -82,6 +82,10 @@ def deserialize(self, value: typing.Any, attr: str = None, data: typing.Mapping[ return value +class Boolean(DocumentedField, marshmallow_fields.Boolean): + pass + + class DateTime(DocumentedField, marshmallow_fields.DateTime): """ Parses datetime in ISO8601 or if value already has datetime.datetime type diff --git a/bioimageio/spec/workflow/v0_2/raw_nodes.py b/bioimageio/spec/workflow/v0_2/raw_nodes.py index 71bac0ca4..63eeff3d8 100644 --- a/bioimageio/spec/workflow/v0_2/raw_nodes.py +++ b/bioimageio/spec/workflow/v0_2/raw_nodes.py @@ -4,6 +4,7 @@ serialization and deserialization are defined in schema: RDF <--schema--> raw nodes """ +import typing from dataclasses import dataclass from pathlib import Path from typing import Any, Dict, List, Union @@ -15,18 +16,29 @@ from bioimageio.spec.shared.raw_nodes import RawNode try: - from typing import Literal + from typing import Literal, get_args except ImportError: - from typing_extensions import Literal # type: ignore + from typing_extensions import Literal, get_args # type: ignore FormatVersion = FormatVersion -ArgType = Literal["tensor", "string", "object"] +ArgType = Literal["tensor", "int", "float", "string", "boolean", "list", "dict", "any"] +DefaultType = Union[int, float, str, bool, list, dict, None] +TYPE_NAME_MAP = {int: "int", float: "float", str: "string", bool: "boolean", list: "list", dict: "dict", None: "null"} @dataclass class Arg(RawNode): name: str = missing type: ArgType = missing + default: Union[_Missing, DefaultType] = missing + description: Union[_Missing, str] = missing + + +@dataclass +class WorkflowKwarg(RawNode): + name: str = missing + type: ArgType = missing + default: DefaultType = missing description: Union[_Missing, str] = missing @@ -46,7 +58,7 @@ class Workflow(_RDF): inputs: List[Arg] = missing outputs: List[Arg] = missing - test_inputs: List[Union[URI, Path]] = missing - test_outputs: List[Union[URI, Path]] = missing - steps: List[Step] = missing + test_steps: List[Step] = missing + + kwargs: Union[_Missing, List[WorkflowKwarg]] = missing diff --git a/bioimageio/spec/workflow/v0_2/schema.py b/bioimageio/spec/workflow/v0_2/schema.py index 51738eca3..96f4a5acf 100644 --- a/bioimageio/spec/workflow/v0_2/schema.py +++ b/bioimageio/spec/workflow/v0_2/schema.py @@ -27,9 +27,66 @@ class Arg(_BioImageIOSchema): validate=field_validators.OneOf(get_args(raw_nodes.ArgType)), bioimageio_description=f"Argument type. One of: {get_args(raw_nodes.ArgType)}", ) + default = fields.Raw( + required=False, + bioimageio_description="Default value compatible with type given by `type` field.", + allow_none=True, + ) + + @validates_schema + def default_has_compatible_type(self, data, **kwargs): + if data.get("default") is None: + return + + arg_type_name = data.get("type") + if arg_type_name == "any": + return + + default_type = type(data["default"]) + type_name = raw_nodes.TYPE_NAME_MAP[default_type] + if type_name != arg_type_name: + raise ValidationError( + f"Default value of type {default_type} (type name: {type_name}) does not match type: {arg_type_name}" + ) + description = fields.String(bioimageio_description="Description of argument/tensor.") +class WorkflowKwarg(_BioImageIOSchema): + name = fields.String( + required=True, + bioimageio_description="Key word argument name. No duplicates are allowed.", + ) + type = fields.String( + required=True, + validate=field_validators.OneOf(get_args(raw_nodes.ArgType)), + bioimageio_description=f"Argument type. One of: {get_args(raw_nodes.ArgType)}", + ) + default = fields.Raw( + required=True, + bioimageio_description="Default value compatible with type given by `type` field.", + allow_none=True, + ) + + @validates_schema + def default_has_compatible_type(self, data, **kwargs): + if data.get("default") is None: + return + + arg_type_name = data.get("type") + if arg_type_name == "any": + return + + default_type = type(data["default"]) + type_name = raw_nodes.TYPE_NAME_MAP[default_type] + if type_name != arg_type_name: + raise ValidationError( + f"Default value of type {default_type} (type name: {type_name}) does not match type: {arg_type_name}" + ) + + description = fields.String(required=False, bioimageio_description="Description of key word argument.") + + class Step(_BioImageIOSchema): id = fields.String( required=False, @@ -69,7 +126,7 @@ class Workflow(_BioImageIOSchema, RDF): fields.Nested(Arg()), validate=field_validators.Length(min=1), required=True, - bioimageio_description="Describes the inputs expected by this model.", + bioimageio_description="Describes the inputs expected by this workflow.", ) @validates("inputs") @@ -84,7 +141,7 @@ def no_duplicate_input_names(self, value: typing.List[raw_nodes.Arg]): outputs = fields.List( fields.Nested(Arg()), validate=field_validators.Length(min=1), - bioimageio_description="Describes the outputs from this model.", + bioimageio_description="Describes the outputs from this workflow.", ) @validates("outputs") @@ -115,41 +172,12 @@ def inputs_and_outputs(self, data, **kwargs): if len(names) > len(set(names)): raise ValidationError("Duplicate names are not allowed.") - test_inputs = fields.List( - fields.Union([fields.URI(), fields.Path()]), - validate=field_validators.Length(min=1), - required=True, - bioimageio_description="List of URIs or local relative paths to test inputs as described in inputs for " - "**a single test case**. " - "This means if your workflow has more than one input, you should provide one URI for each input." - "Each test input should be a file with a ndarray in " - "[numpy.lib file format](https://numpy.org/doc/stable/reference/generated/numpy.lib.format.html#module-numpy.lib.format)." - "The extension must be '.npy'.", - ) - - test_outputs = fields.List( - fields.Union([fields.URI(), fields.Path()]), - validate=field_validators.Length(min=1), - required=True, - bioimageio_description="Analog to test_inputs.", + kwargs = fields.List( + fields.Nested(WorkflowKwarg()), + required=False, + bioimageio_description="Key word arguments for this workflow.", ) - @validates_schema - def test_outputs_match(self, data, **kwargs): - steps = data.get("steps") - if not steps or not isinstance(steps, list) or not isinstance(steps[-1], raw_nodes.Step): - raise ValidationError("invalid 'steps'") - - test_outputs = data.get("test_outputs") - if not isinstance(test_outputs, list): - raise ValidationError("invalid 'test_outputs'") - - if steps[-1].op == "select_outputs": - if steps[-1].outputs: - raise ValidationError("Unexpected 'outputs' defined for op: 'select_outputs'. Did you mean 'inputs'?") - if len(test_outputs) != len(steps[-1].inputs): - raise ValidationError(f"Expected {len(steps[-1].inputs)} 'test_inputs', but found {len(test_outputs)}") - steps = fields.List( fields.Nested(Step()), validate=field_validators.Length(min=1), @@ -175,3 +203,26 @@ def step_input_references_exist(self, data, **kwargs): if step.outputs: references.update({f"{step.id}.outputs.{out}" for out in step.outputs}) + + test_steps = fields.List( + fields.Nested(Step()), + validate=field_validators.Length(min=1), + required=True, + bioimageio_description="Test steps to be executed consecutively.", + ) + + @validates_schema + def test_step_input_references_exist(self, data, **kwargs): + steps = data.get("test_steps") + if not steps or not isinstance(steps, list) or not isinstance(steps[0], raw_nodes.Step): + raise ValidationError("Missing/invalid 'test_steps'") + + references = set() + for step in steps: + if step.inputs: + for si in step.inputs: + if si not in references: + raise ValidationError(f"Invalid test step input reference '{si}'") + + if step.outputs: + references.update({f"{step.id}.outputs.{out}" for out in step.outputs}) diff --git a/example_specs/workflows/hpa/single_cell_classification.yaml b/example_specs/workflows/hpa/single_cell_classification.yaml index cd565b330..56398b642 100644 --- a/example_specs/workflows/hpa/single_cell_classification.yaml +++ b/example_specs/workflows/hpa/single_cell_classification.yaml @@ -9,9 +9,11 @@ inputs: - name: protein type: tensor -test_inputs: -- nuclei.npy -- protein.npy +kwargs: +- name: seg_prep + type: boolean + default: false + outputs: - name: cells @@ -19,26 +21,41 @@ outputs: - name: scores type: tensor -test_outputs: -- cells.npy -- scores.npy steps: +- op: set_ - id: segmentation op: model_inference inputs: [inputs.nuclei] # take the first output of step 1 (id: data) as the only input outputs: [cells] kwargs: - model_id: conscientious-seashell - preprocessing: true + rdf_source: conscientious-seashell + preprocessing: ${{ kwargs.seg_prep }} postprocessing: false - id: classification op: model_inference inputs: [inputs.protein, segmentation.outputs.cells] # take the second output of step1 and the output of step 2 outputs: [scores] kwargs: - model_id: straightforward-crocodile + rdf_source: straightforward-crocodile preprocessing: true postprocessing: false - op: select_outputs inputs: [segmentation.outputs.cells, classification.outputs.scores] + +test_steps: +- id: test_tensors + op: load_tensors + outputs: [nuclei, protein, cells, scores] + kwargs: + sources: [nuclei.npy, protein.npy, cells.npy, scores.npy] +- id: workflow + op: run_workflow + inputs: [test_tensors.outputs.nuclei, test_tensors.outputs.protein] + outputs: [cells, scores] + kwargs: + rdf_source: ${{ self.rdf_source }} +- op: assert_close + inputs: [test_tensors.outputs.cells, workflow.outputs.cells] +- op: assert_close + inputs: [test_tensors.outputs.scores, workflow.outputs.scores] diff --git a/example_specs/workflows/stardist/stardist_example.yaml b/example_specs/workflows/stardist/stardist_example.yaml index 6bb2a9970..9adef7fb5 100644 --- a/example_specs/workflows/stardist/stardist_example.yaml +++ b/example_specs/workflows/stardist/stardist_example.yaml @@ -8,9 +8,6 @@ inputs: type: tensor description: image with star-convex objects -test_inputs: -- raw.npy - outputs: - name: labels type: tensor @@ -21,19 +18,53 @@ outputs: - name: prob type: tensor -test_outputs: -- labels.npy -- coord.npy -- points.npy -- prob.npy +kwargs: +- name: diameter + type: float + default: 2.3 steps: - op: zero_mean_unit_variance - op: model_inference kwargs: - model_id: fearless-crab + rdf_source: fearless-crab preprocessing: false # disable the preprocessing postprocessing: false # disable the postprocessing - op: stardist_postprocessing kwargs: - diameter: 2.3 + diameter: ${{ kwargs.diameter }} + +test_steps: +- id: test_tensors + op: load_tensors + outputs: + - raw + - labels + - coord + - points + - prob + kwargs: + sources: + - raw.npy + - labels.npy + - coord.npy + - points.npy + - prob.npy +- id: workflow + op: run_workflow + inputs: [test_tensors.outputs.raw] + outputs: + - labels + - coord + - points + - prob + kwargs: + rdf_source: ${{ self.rdf_source }} +- op: assert_close + inputs: [test_tensors.outputs.labels, workflow.outputs.labels] +- op: assert_close + inputs: [test_tensors.outputs.coord, workflow.outputs.coord] +- op: assert_close + inputs: [test_tensors.outputs.points, workflow.outputs.points] +- op: assert_close + inputs: [test_tensors.outputs.prob, workflow.outputs.prob] From 5645d4c292ca6887cd5353b93c0b9a5181a7cdab Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fynn=20Beuttenm=C3=BCller?= Date: Fri, 28 Oct 2022 14:48:50 +0200 Subject: [PATCH 11/40] Update example_specs/workflows/hpa/single_cell_classification.yaml --- example_specs/workflows/hpa/single_cell_classification.yaml | 1 - 1 file changed, 1 deletion(-) diff --git a/example_specs/workflows/hpa/single_cell_classification.yaml b/example_specs/workflows/hpa/single_cell_classification.yaml index 56398b642..5eb10495c 100644 --- a/example_specs/workflows/hpa/single_cell_classification.yaml +++ b/example_specs/workflows/hpa/single_cell_classification.yaml @@ -23,7 +23,6 @@ outputs: steps: -- op: set_ - id: segmentation op: model_inference inputs: [inputs.nuclei] # take the first output of step 1 (id: data) as the only input From 218b67ab230203c331a3b0fe096b2d45520d6d89 Mon Sep 17 00:00:00 2001 From: fynnbe Date: Fri, 28 Oct 2022 23:48:58 +0200 Subject: [PATCH 12/40] wip discussion with constantin --- bioimageio/spec/workflow/v0_2/schema.py | 68 ++++++++++++++++++++++--- example_specs/workflows/dummy/rdf.yaml | 47 +++++++++++++++++ tests/conftest.py | 5 ++ tests/test_workflow_rdf.py | 24 +++++++++ 4 files changed, 137 insertions(+), 7 deletions(-) create mode 100644 example_specs/workflows/dummy/rdf.yaml diff --git a/bioimageio/spec/workflow/v0_2/schema.py b/bioimageio/spec/workflow/v0_2/schema.py index 96f4a5acf..731bffa29 100644 --- a/bioimageio/spec/workflow/v0_2/schema.py +++ b/bioimageio/spec/workflow/v0_2/schema.py @@ -111,7 +111,9 @@ class Step(_BioImageIOSchema): bioimageio_description="output names for this step", required=False, ) - kwargs = fields.Kwargs(bioimageio_description="Key word arguments for op.") + kwargs = fields.Kwargs( + bioimageio_description="Key word arguments for op. \n\nWorkflow kwargs can be refered to as ${{ kwargs.\ }}. \n\nOutputs of previous steps can be referenced as ${{ \.outputs.\ }} (the previous step is required to specify `id` and `outputs`). \n\nThe workflow's `rdf_source` can be referenced as ${{ self.rdf_source }}. This will expand to the URL or file path of the workflow RDF." + ) class Workflow(_BioImageIOSchema, RDF): @@ -124,7 +126,6 @@ class Workflow(_BioImageIOSchema, RDF): """ inputs = fields.List( fields.Nested(Arg()), - validate=field_validators.Length(min=1), required=True, bioimageio_description="Describes the inputs expected by this workflow.", ) @@ -178,6 +179,17 @@ def inputs_and_outputs(self, data, **kwargs): bioimageio_description="Key word arguments for this workflow.", ) + @validates("kwargs") + def unique_kwarg_names(self, kwargs): + if not isinstance(kwargs, list) or not all(isinstance(kw, raw_nodes.WorkflowKwarg) for kw in kwargs): + raise ValidationError("Invalid 'kwargs'.") + + kwarg_names = set() + for kw in kwargs: + if kw.name in kwarg_names: + raise ValidationError(f"Duplicate kwarg name '{kw.name}'.") + kwarg_names.add(kw.name) + steps = fields.List( fields.Nested(Step()), validate=field_validators.Length(min=1), @@ -185,16 +197,37 @@ def inputs_and_outputs(self, data, **kwargs): bioimageio_description="Workflow steps to be executed consecutively.", ) + @staticmethod + def get_kwarg_reference_names(data) -> typing.Set[str]: + refs: typing.Set[str] = set() + kwargs = data.get("kwargs") + if not isinstance(kwargs, list): + return refs + + for kw in kwargs: + if isinstance(kw, raw_nodes.WorkflowKwarg): + refs.add(f"${{{{ kwargs.{kw.name} }}}}") + + return refs + + @staticmethod + def get_self_reference_names() -> typing.Set[str]: + return {"${{ self.rdf_source }}"} + @validates_schema def step_input_references_exist(self, data, **kwargs): inputs = data.get("inputs") - if not inputs or not isinstance(inputs, list) or not all(isinstance(ipt, raw_nodes.Arg) for ipt in inputs): + if not isinstance(inputs, list) or not all(isinstance(ipt, raw_nodes.Arg) for ipt in inputs): raise ValidationError("Missing/invalid 'inputs'") + steps = data.get("steps") - if not steps or not isinstance(steps, list) or not isinstance(steps[0], raw_nodes.Step): + if not steps or not isinstance(steps, list) or not all(isinstance(s, raw_nodes.Step) for s in steps): raise ValidationError("Missing/invalid 'steps'") - references = {f"inputs.{ipt.name}" for ipt in inputs} + references = {f"${{{{ inputs.{ipt.name} }}}}" for ipt in inputs} + references.update(self.get_kwarg_reference_names(data)) + references.update(self.get_self_reference_names()) + for step in steps: if step.inputs: for si in step.inputs: @@ -214,10 +247,11 @@ def step_input_references_exist(self, data, **kwargs): @validates_schema def test_step_input_references_exist(self, data, **kwargs): steps = data.get("test_steps") - if not steps or not isinstance(steps, list) or not isinstance(steps[0], raw_nodes.Step): + if not steps or not isinstance(steps, list) or not all(isinstance(s, raw_nodes.Step) for s in steps): raise ValidationError("Missing/invalid 'test_steps'") - references = set() + references = self.get_kwarg_reference_names(data) + references.update(self.get_self_reference_names()) for step in steps: if step.inputs: for si in step.inputs: @@ -226,3 +260,23 @@ def test_step_input_references_exist(self, data, **kwargs): if step.outputs: references.update({f"{step.id}.outputs.{out}" for out in step.outputs}) + + @validates_schema + def test_kwarg_references_are_valid(self, data, **kwargs): + for step_type in ["steps", "test_steps"]: + steps = data.get(step_type) + if not steps or not isinstance(steps, list) or not isinstance(steps[0], raw_nodes.Step): + raise ValidationError(f"Missing/invalid '{step_type}'") + + references = self.get_kwarg_reference_names(data) + references.update(self.get_self_reference_names()) + for step in steps: + if step.kwargs: + for k, v in step.kwargs.items(): + if isinstance(v, str) and v.startswith("${{") and v.endswith("}}") and v not in references: + raise ValidationError( + f"Invalid {step_type[:-1].replace('_', ' ')} kwarg ({k}) referencing '{v}'" + ) + + if step.outputs: + references.update({f"${{{{ {step.id}.outputs.{out} }}}}" for out in step.outputs}) diff --git a/example_specs/workflows/dummy/rdf.yaml b/example_specs/workflows/dummy/rdf.yaml new file mode 100644 index 000000000..ce17c876d --- /dev/null +++ b/example_specs/workflows/dummy/rdf.yaml @@ -0,0 +1,47 @@ +name: dummy workflow +description: A workflow to produce some nonsense +format_version: 0.2.3 +type: workflow + +inputs: +- name: shape + type: list + default: [2, 3] +- name: threshold + type: float + default: 0.5 + +outputs: +- name: nonsense + type: tensor + +steps: +- id: step0 + op: generate_random_tensor + inputs: + shape: ${{ inputs.shape }} + dtype: float + distribution: uniform + low: 0 + high: 1 +- id: step1 + op: binarize + inputs: + threshold: ${{ inputs.threshold }} +- op: log + kwargs: + threshold: ${{ kwargs.threshold }} + original: ${{ step0.outputs.random_tensor }} + binarized: ${{ step1.outputs.binarized }} + +test_steps: +- id: wf + op: run_workflow + outputs: [out0] + kwargs: + rdf_source: ${{ self.rdf_source }} + threshold: 0.5 +- op: assert_shape + inputs: + tensor: ${{ wf.outputs.out0 }} + shape: ${{ inputs.shape }} diff --git a/tests/conftest.py b/tests/conftest.py index 62f2b74e8..21f7c9fb5 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -111,3 +111,8 @@ def stardist_workflow_rdf(): @pytest.fixture def hpa_workflow_rdf(): return pathlib.Path(__file__).parent / "../example_specs/workflows/hpa/single_cell_classification.yaml" + + +@pytest.fixture +def dummy_workflow_rdf(): + return pathlib.Path(__file__).parent / "../example_specs/workflows/dummy/rdf.yaml" diff --git a/tests/test_workflow_rdf.py b/tests/test_workflow_rdf.py index dc06265b8..3a2625e1d 100644 --- a/tests/test_workflow_rdf.py +++ b/tests/test_workflow_rdf.py @@ -1,3 +1,6 @@ +import pytest +from marshmallow import ValidationError + from bioimageio.spec.shared import yaml from bioimageio.spec.workflow import raw_nodes @@ -22,3 +25,24 @@ def test_workflow_rdf_hpa_example(hpa_workflow_rdf): workflow = Workflow().load(data) assert isinstance(workflow, raw_nodes.Workflow) assert workflow.outputs[0].name == "cells" + + +def test_dummy_workflow_rdf(dummy_workflow_rdf): + from bioimageio.spec.workflow.schema import Workflow + + data = yaml.load(dummy_workflow_rdf) + + workflow = Workflow().load(data) + assert isinstance(workflow, raw_nodes.Workflow) + + +def test_invalid_kwarg_name_duplicate(dummy_workflow_rdf): + from bioimageio.spec.workflow.schema import Workflow + + data = yaml.load(dummy_workflow_rdf) + data["kwargs"].append(data["kwargs"][0]) + + with pytest.raises(ValidationError) as e: + Workflow().load(data) + + assert e.value.messages == {"kwargs": ["Duplicate kwarg name 'shape'."]} From 94d12920eb3abfb52b96ce7e78bf3a3dc7b70779 Mon Sep 17 00:00:00 2001 From: fynnbe Date: Sat, 29 Oct 2022 00:50:00 +0200 Subject: [PATCH 13/40] wip2 --- bioimageio/spec/workflow/v0_2/raw_nodes.py | 29 ++- bioimageio/spec/workflow/v0_2/schema.py | 249 ++++++++------------- 2 files changed, 106 insertions(+), 172 deletions(-) diff --git a/bioimageio/spec/workflow/v0_2/raw_nodes.py b/bioimageio/spec/workflow/v0_2/raw_nodes.py index 63eeff3d8..ee20266d0 100644 --- a/bioimageio/spec/workflow/v0_2/raw_nodes.py +++ b/bioimageio/spec/workflow/v0_2/raw_nodes.py @@ -21,44 +21,43 @@ from typing_extensions import Literal, get_args # type: ignore FormatVersion = FormatVersion -ArgType = Literal["tensor", "int", "float", "string", "boolean", "list", "dict", "any"] +ParameterType = Literal["tensor", "int", "float", "string", "boolean", "list", "dict", "any"] DefaultType = Union[int, float, str, bool, list, dict, None] TYPE_NAME_MAP = {int: "int", float: "float", str: "string", bool: "boolean", list: "list", dict: "dict", None: "null"} @dataclass -class Arg(RawNode): +class Parameter(RawNode): name: str = missing - type: ArgType = missing - default: Union[_Missing, DefaultType] = missing + type: ParameterType = missing description: Union[_Missing, str] = missing + axes: Union[_Missing, str] = missing @dataclass -class WorkflowKwarg(RawNode): - name: str = missing - type: ArgType = missing - default: DefaultType = missing - description: Union[_Missing, str] = missing +class Input(Parameter): + default: Union[_Missing, DefaultType] = missing + + +@dataclass +class Output(Parameter): + pass @dataclass class Step(RawNode): id: Union[_Missing, str] = missing op: str = missing - inputs: Union[_Missing, List[str]] = missing + inputs: Union[_Missing, List[Any], Dict[str, Any]] = missing outputs: Union[_Missing, List[str]] = missing - kwargs: Union[_Missing, Dict[str, Any]] = missing @dataclass class Workflow(_RDF): type: Literal["workflow"] = missing - inputs: List[Arg] = missing - outputs: List[Arg] = missing + inputs: List[Input] = missing + outputs: List[Output] = missing steps: List[Step] = missing test_steps: List[Step] = missing - - kwargs: Union[_Missing, List[WorkflowKwarg]] = missing diff --git a/bioimageio/spec/workflow/v0_2/schema.py b/bioimageio/spec/workflow/v0_2/schema.py index 731bffa29..b85bef138 100644 --- a/bioimageio/spec/workflow/v0_2/schema.py +++ b/bioimageio/spec/workflow/v0_2/schema.py @@ -17,102 +17,95 @@ class _BioImageIOSchema(SharedBioImageIOSchema): raw_nodes = raw_nodes -class Arg(_BioImageIOSchema): +class Parameter(_BioImageIOSchema): name = fields.String( required=True, - bioimageio_description="Argument/tensor name. No duplicates are allowed.", + bioimageio_description="Parameter name. No duplicates are allowed.", ) type = fields.String( required=True, - validate=field_validators.OneOf(get_args(raw_nodes.ArgType)), - bioimageio_description=f"Argument type. One of: {get_args(raw_nodes.ArgType)}", + validate=field_validators.OneOf(get_args(raw_nodes.ParameterType)), + bioimageio_description=f"Parameter type. One of: {get_args(raw_nodes.ParameterType)}", ) - default = fields.Raw( + axes = fields.Axes( required=False, - bioimageio_description="Default value compatible with type given by `type` field.", - allow_none=True, + bioimageio_maybe_required=True, + bioimageio_description="[only applicable if type is 'tensor'] one letter out of 'bitczyx' per tensor dimension", + valid_axes="bitczyx", ) + description = fields.String(bioimageio_description="Description of parameter.") @validates_schema - def default_has_compatible_type(self, data, **kwargs): - if data.get("default") is None: - return - - arg_type_name = data.get("type") - if arg_type_name == "any": - return - - default_type = type(data["default"]) - type_name = raw_nodes.TYPE_NAME_MAP[default_type] - if type_name != arg_type_name: - raise ValidationError( - f"Default value of type {default_type} (type name: {type_name}) does not match type: {arg_type_name}" - ) + def has_axes_if_tensor(self, data, **kwargs): + ipt_type = data.get("type") + axes = data.get("axes") + if ipt_type == "tensor" and axes is None: + raise ValidationError("'axes' required for input type 'tensor'.") - description = fields.String(bioimageio_description="Description of argument/tensor.") - -class WorkflowKwarg(_BioImageIOSchema): - name = fields.String( - required=True, - bioimageio_description="Key word argument name. No duplicates are allowed.", - ) - type = fields.String( - required=True, - validate=field_validators.OneOf(get_args(raw_nodes.ArgType)), - bioimageio_description=f"Argument type. One of: {get_args(raw_nodes.ArgType)}", - ) +class Input(Parameter): default = fields.Raw( - required=True, - bioimageio_description="Default value compatible with type given by `type` field.", + required=False, + bioimageio_description="Default value compatible with type given by `type` field." + "\n\nThe `null` value is compatible with any specified type.", allow_none=True, ) @validates_schema def default_has_compatible_type(self, data, **kwargs): if data.get("default") is None: + # no default or always valid default of None return - arg_type_name = data.get("type") - if arg_type_name == "any": + input_type_name = data.get("type") + if input_type_name == "any": return default_type = type(data["default"]) type_name = raw_nodes.TYPE_NAME_MAP[default_type] - if type_name != arg_type_name: + if type_name != input_type_name: raise ValidationError( - f"Default value of type {default_type} (type name: {type_name}) does not match type: {arg_type_name}" + f"Default value of type {default_type} (type name: {type_name}) does not match type: {input_type_name}" ) - description = fields.String(required=False, bioimageio_description="Description of key word argument.") + +class Output(Parameter): + pass class Step(_BioImageIOSchema): id = fields.String( required=False, + bioimageio_maybe_required=True, validate=field_validators.Predicate("isidentifier"), bioimageio_description="Step id for referencing the steps' kwargs or outputs.", ) + + @validates_schema + def has_id_if_outputs(self, data, **kwargs): + if data.get("outputs") and "id" not in data: + raise ValidationError("'id' required if 'outputs' are named.") + op = fields.String( required=True, validate=field_validators.Predicate("isidentifier"), bioimageio_description="Name of operation. Must be implemented in bioimageio.core or bioimageio.contrib.", ) - inputs = fields.List( - fields.String( - bioimageio_description="named output of a previous step with the pattern '.outputs.'", - ), - required=False, + inputs = fields.Union( + [ + fields.List(fields.Raw()), + fields.YamlDict(fields.String(validate=field_validators.Predicate("isidentifier")), fields.Raw()), + ], + bioimageio_description="Either a list of input parameters (named parameters as dict with one entry after all positional parameters)." + "\n\nOr a dictionary of named parameters." + "\n\nIf not set the outputs of the previous step are used as positional input parameters.", ) outputs = fields.List( fields.String( validate=field_validators.Predicate("isidentifier"), ), - bioimageio_description="output names for this step", required=False, - ) - kwargs = fields.Kwargs( - bioimageio_description="Key word arguments for op. \n\nWorkflow kwargs can be refered to as ${{ kwargs.\ }}. \n\nOutputs of previous steps can be referenced as ${{ \.outputs.\ }} (the previous step is required to specify `id` and `outputs`). \n\nThe workflow's `rdf_source` can be referenced as ${{ self.rdf_source }}. This will expand to the URL or file path of the workflow RDF." + bioimageio_description="Output names for this step.", ) @@ -123,90 +116,54 @@ class Workflow(_BioImageIOSchema, RDF): The workflow RDF YAML file contains mandatory and optional fields. In the following description, optional fields are indicated by _optional_. _optional*_ with an asterisk indicates the field is optional depending on the value in another field. + """ inputs = fields.List( - fields.Nested(Arg()), + fields.Nested(Input()), required=True, bioimageio_description="Describes the inputs expected by this workflow.", ) - @validates("inputs") - def no_duplicate_input_names(self, value: typing.List[raw_nodes.Arg]): - if not isinstance(value, list) or not all(isinstance(v, raw_nodes.Arg) for v in value): - raise ValidationError("Could not check for duplicate input names due to another validation error.") + @staticmethod + def verify_param_list(params: typing.Any) -> typing.List[typing.Union[raw_nodes.Parameter]]: + if not isinstance(params, list) or not all(isinstance(v, raw_nodes.Parameter) for v in params): + raise ValidationError("Could not check for duplicate parameter names due to another validation error.") + + return params + + @staticmethod + def check_for_duplicate_param_names(params: typing.List[typing.Union[raw_nodes.Parameter]]): + names = set() + for t in params: + if t.name in names: + raise ValidationError(f"Duplicate parameter name '{t.name}' not allowed.") - names = [t.name for t in value] - if len(names) > len(set(names)): - raise ValidationError("Duplicate input names are not allowed.") + names.add(t.name) + + @validates("inputs") + def no_duplicate_input_names(self, ipts: typing.List[raw_nodes.Input]): + self.check_for_duplicate_param_names(self.verify_param_list(ipts)) outputs = fields.List( - fields.Nested(Arg()), + fields.Nested(Output()), validate=field_validators.Length(min=1), bioimageio_description="Describes the outputs from this workflow.", ) @validates("outputs") - def no_duplicate_output_names(self, value: typing.List[raw_nodes.Arg]): - if not isinstance(value, list) or not all(isinstance(v, raw_nodes.Arg) for v in value): - raise ValidationError("Could not check for duplicate output names due to another validation error.") - - names = [t["name"] if isinstance(t, dict) else t.name for t in value] - if len(names) > len(set(names)): - raise ValidationError("Duplicate output names are not allowed.") - - @validates_schema - def inputs_and_outputs(self, data, **kwargs): - ipts: typing.List[raw_nodes.Arg] = data.get("inputs") - outs: typing.List[raw_nodes.Arg] = data.get("outputs") - if any( - [ - not isinstance(ipts, list), - not isinstance(outs, list), - not all(isinstance(v, raw_nodes.Arg) for v in ipts), - not all(isinstance(v, raw_nodes.Arg) for v in outs), - ] - ): - raise ValidationError("Could not check for duplicate names due to another validation error.") - - # no duplicate names - names = [t.name for t in ipts + outs] # type: ignore - if len(names) > len(set(names)): - raise ValidationError("Duplicate names are not allowed.") - - kwargs = fields.List( - fields.Nested(WorkflowKwarg()), - required=False, - bioimageio_description="Key word arguments for this workflow.", - ) - - @validates("kwargs") - def unique_kwarg_names(self, kwargs): - if not isinstance(kwargs, list) or not all(isinstance(kw, raw_nodes.WorkflowKwarg) for kw in kwargs): - raise ValidationError("Invalid 'kwargs'.") - - kwarg_names = set() - for kw in kwargs: - if kw.name in kwarg_names: - raise ValidationError(f"Duplicate kwarg name '{kw.name}'.") - kwarg_names.add(kw.name) - - steps = fields.List( - fields.Nested(Step()), - validate=field_validators.Length(min=1), - required=True, - bioimageio_description="Workflow steps to be executed consecutively.", - ) + def no_duplicate_output_names(self, outs: typing.List[raw_nodes.Output]): + self.check_for_duplicate_param_names(self.verify_param_list(outs)) @staticmethod - def get_kwarg_reference_names(data) -> typing.Set[str]: + def get_input_reference_names(data) -> typing.Set[str]: refs: typing.Set[str] = set() - kwargs = data.get("kwargs") - if not isinstance(kwargs, list): + inputs = data.get("inputs") + if not isinstance(inputs, list): return refs - for kw in kwargs: - if isinstance(kw, raw_nodes.WorkflowKwarg): - refs.add(f"${{{{ kwargs.{kw.name} }}}}") + for ipt in inputs: + if isinstance(ipt, raw_nodes.Input): + refs.add(f"${{{{ inputs.{ipt.name} }}}}") return refs @@ -214,29 +171,6 @@ def get_kwarg_reference_names(data) -> typing.Set[str]: def get_self_reference_names() -> typing.Set[str]: return {"${{ self.rdf_source }}"} - @validates_schema - def step_input_references_exist(self, data, **kwargs): - inputs = data.get("inputs") - if not isinstance(inputs, list) or not all(isinstance(ipt, raw_nodes.Arg) for ipt in inputs): - raise ValidationError("Missing/invalid 'inputs'") - - steps = data.get("steps") - if not steps or not isinstance(steps, list) or not all(isinstance(s, raw_nodes.Step) for s in steps): - raise ValidationError("Missing/invalid 'steps'") - - references = {f"${{{{ inputs.{ipt.name} }}}}" for ipt in inputs} - references.update(self.get_kwarg_reference_names(data)) - references.update(self.get_self_reference_names()) - - for step in steps: - if step.inputs: - for si in step.inputs: - if si not in references: - raise ValidationError(f"Invalid step input reference '{si}'") - - if step.outputs: - references.update({f"{step.id}.outputs.{out}" for out in step.outputs}) - test_steps = fields.List( fields.Nested(Step()), validate=field_validators.Length(min=1), @@ -245,33 +179,34 @@ def step_input_references_exist(self, data, **kwargs): ) @validates_schema - def test_step_input_references_exist(self, data, **kwargs): - steps = data.get("test_steps") - if not steps or not isinstance(steps, list) or not all(isinstance(s, raw_nodes.Step) for s in steps): - raise ValidationError("Missing/invalid 'test_steps'") - - references = self.get_kwarg_reference_names(data) - references.update(self.get_self_reference_names()) - for step in steps: - if step.inputs: - for si in step.inputs: - if si not in references: - raise ValidationError(f"Invalid test step input reference '{si}'") - - if step.outputs: - references.update({f"{step.id}.outputs.{out}" for out in step.outputs}) - - @validates_schema - def test_kwarg_references_are_valid(self, data, **kwargs): + def step_inputs_are_valid(self, data, **kwargs): for step_type in ["steps", "test_steps"]: steps = data.get(step_type) if not steps or not isinstance(steps, list) or not isinstance(steps[0], raw_nodes.Step): raise ValidationError(f"Missing/invalid '{step_type}'") - references = self.get_kwarg_reference_names(data) + references = self.get_input_reference_names(data) references.update(self.get_self_reference_names()) for step in steps: - if step.kwargs: + if isinstance(step.inputs, list): + for si in step.inputs: + if isinstance(si, str) and si.startswith("${{") and si.endwith("}}") and si not in references: + raise ValidationError(f"Invalid reference '{si}'") + elif isinstance(si, dict) and len(si) == 1: + si_ref = list(si.values())[0] + if si_ref not in references: + raise ValidationError(f"Invalid reference '{si_ref}'") + + elif isinstance(step.inputs, dict): + for key, value in step.inputs.values(): + if key.startswith("${{") and key.endswith("}}"): + raise ValidationError("Invalid input name. (no reference allowed here)") + if value.startswith("${{") and value.endswith("}}") and value not in references: + raise ValidationError(f"Invalid reference '{value}'") + + if step.outputs: + references.update({f"{step.id}.outputs.{out}" for out in step.outputs}) + for k, v in step.kwargs.items(): if isinstance(v, str) and v.startswith("${{") and v.endswith("}}") and v not in references: raise ValidationError( From 428d6059f7700f65e22dd931f59a1b688ef2c68b Mon Sep 17 00:00:00 2001 From: fynnbe Date: Mon, 31 Oct 2022 20:34:24 +0100 Subject: [PATCH 14/40] axes and options --- bioimageio/spec/workflow/v0_2/raw_nodes.py | 6 ++++ example_specs/workflows/dummy/rdf.yaml | 32 ++++++++++++++-------- 2 files changed, 27 insertions(+), 11 deletions(-) diff --git a/bioimageio/spec/workflow/v0_2/raw_nodes.py b/bioimageio/spec/workflow/v0_2/raw_nodes.py index ee20266d0..f46dd4ef8 100644 --- a/bioimageio/spec/workflow/v0_2/raw_nodes.py +++ b/bioimageio/spec/workflow/v0_2/raw_nodes.py @@ -36,6 +36,11 @@ class Parameter(RawNode): @dataclass class Input(Parameter): + pass + + +@dataclass +class Option(Parameter): default: Union[_Missing, DefaultType] = missing @@ -57,6 +62,7 @@ class Workflow(_RDF): type: Literal["workflow"] = missing inputs: List[Input] = missing + options: List[Option] = missing outputs: List[Output] = missing steps: List[Step] = missing diff --git a/example_specs/workflows/dummy/rdf.yaml b/example_specs/workflows/dummy/rdf.yaml index ce17c876d..86e3e887c 100644 --- a/example_specs/workflows/dummy/rdf.yaml +++ b/example_specs/workflows/dummy/rdf.yaml @@ -6,7 +6,10 @@ type: workflow inputs: - name: shape type: list - default: [2, 3] +- name: axes + type: str + +options: - name: threshold type: float default: 0.5 @@ -17,30 +20,37 @@ outputs: steps: - id: step0 - op: generate_random_tensor + op: generate_random_uniform_tensor inputs: shape: ${{ inputs.shape }} - dtype: float - distribution: uniform - low: 0 - high: 1 + axes: ${{ inputs.axes }} +# options: +# low: 0 +# high: 1 - id: step1 op: binarize inputs: threshold: ${{ inputs.threshold }} - op: log - kwargs: + options: + log_level: 50 threshold: ${{ kwargs.threshold }} original: ${{ step0.outputs.random_tensor }} - binarized: ${{ step1.outputs.binarized }} +# binarized: ${{ step1.outputs.binarized }} + test_steps: - id: wf op: run_workflow - outputs: [out0] - kwargs: + inputs: rdf_source: ${{ self.rdf_source }} - threshold: 0.5 + options: + inputs: + shape: [2, 3] + axes: cx + options: + threshold: 0.5 + outputs: [out0] - op: assert_shape inputs: tensor: ${{ wf.outputs.out0 }} From 6dacb68f4f59e7459caae8134a33b1b326798312 Mon Sep 17 00:00:00 2001 From: fynnbe Date: Thu, 3 Nov 2022 16:59:13 +0100 Subject: [PATCH 15/40] update workflow RDF schema and raw_nodes --- bioimageio/spec/shared/field_validators.py | 2 + bioimageio/spec/workflow/v0_2/raw_nodes.py | 77 +++++++- bioimageio/spec/workflow/v0_2/schema.py | 216 +++++++++++++++------ 3 files changed, 236 insertions(+), 59 deletions(-) diff --git a/bioimageio/spec/shared/field_validators.py b/bioimageio/spec/shared/field_validators.py index f6012d786..61aecfde4 100644 --- a/bioimageio/spec/shared/field_validators.py +++ b/bioimageio/spec/shared/field_validators.py @@ -4,6 +4,7 @@ ContainsNoneOf, Equal, Length, + NoneOf, OneOf, Predicate as MarshmallowPredicate, Range, @@ -15,6 +16,7 @@ ContainsNoneOf = ContainsNoneOf Equal = Equal Length = Length +NoneOf = NoneOf OneOf = OneOf Range = Range diff --git a/bioimageio/spec/workflow/v0_2/raw_nodes.py b/bioimageio/spec/workflow/v0_2/raw_nodes.py index f46dd4ef8..fbe00e0dd 100644 --- a/bioimageio/spec/workflow/v0_2/raw_nodes.py +++ b/bioimageio/spec/workflow/v0_2/raw_nodes.py @@ -25,13 +25,82 @@ DefaultType = Union[int, float, str, bool, list, dict, None] TYPE_NAME_MAP = {int: "int", float: "float", str: "string", bool: "boolean", list: "list", dict: "dict", None: "null"} +# unit names from https://ngff.openmicroscopy.org/latest/#axes-md +SpaceUnit = Literal[ + "angstrom", + "attometer", + "centimeter", + "decimeter", + "exameter", + "femtometer", + "foot", + "gigameter", + "hectometer", + "inch", + "kilometer", + "megameter", + "meter", + "micrometer", + "mile", + "millimeter", + "nanometer", + "parsec", + "petameter", + "picometer", + "terameter", + "yard", + "yoctometer", + "yottameter", + "zeptometer", + "zettameter", +] + +TimeUnit = Literal[ + "attosecond", + "centisecond", + "day", + "decisecond", + "exasecond", + "femtosecond", + "gigasecond", + "hectosecond", + "hour", + "kilosecond", + "megasecond", + "microsecond", + "millisecond", + "minute", + "nanosecond", + "petasecond", + "picosecond", + "second", + "terasecond", + "yoctosecond", + "yottasecond", + "zeptosecond", + "zettasecond", +] + +# this Axis definition is compatible with the NGFF draft from October 24, 2022 +# https://ngff.openmicroscopy.org/latest/#axes-md +AxisType = Literal["batch", "channel", "index", "time", "space"] + + +@dataclass +class Axis: + name: str = missing + type: AxisType = missing + description: Union[_Missing, str] = missing + unit: Union[_Missing, SpaceUnit, TimeUnit, str] = missing + step: Union[_Missing, int] = missing + @dataclass class Parameter(RawNode): name: str = missing type: ParameterType = missing description: Union[_Missing, str] = missing - axes: Union[_Missing, str] = missing + axes: Union[_Missing, List[Axis]] = missing @dataclass @@ -51,9 +120,10 @@ class Output(Parameter): @dataclass class Step(RawNode): - id: Union[_Missing, str] = missing op: str = missing - inputs: Union[_Missing, List[Any], Dict[str, Any]] = missing + id: Union[_Missing, str] = missing + inputs: Union[_Missing, List[Any]] = missing + options: Union[_Missing, Dict[str, Any]] = missing outputs: Union[_Missing, List[str]] = missing @@ -64,6 +134,5 @@ class Workflow(_RDF): inputs: List[Input] = missing options: List[Option] = missing outputs: List[Output] = missing - steps: List[Step] = missing test_steps: List[Step] = missing diff --git a/bioimageio/spec/workflow/v0_2/schema.py b/bioimageio/spec/workflow/v0_2/schema.py index b85bef138..c6de875a4 100644 --- a/bioimageio/spec/workflow/v0_2/schema.py +++ b/bioimageio/spec/workflow/v0_2/schema.py @@ -1,4 +1,5 @@ import typing +import warnings from marshmallow import ValidationError, missing, validates, validates_schema @@ -17,6 +18,95 @@ class _BioImageIOSchema(SharedBioImageIOSchema): raw_nodes = raw_nodes +class Axis(SharedBioImageIOSchema): + name = fields.String( + required=True, + bioimageio_description="A unique axis name (max 32 characters).", + validate=field_validators.Length(max=32), + ) + type = fields.String( + required=True, + validate=field_validators.OneOf(get_args(raw_nodes.AxisType)), + bioimageio_description=f"One of: {get_args(raw_nodes.AxisType)}", + ) + description = fields.String( + validate=field_validators.Length(max=128), + bioimageio_description="Description of axis (max 128 characters).", + ) + unit = fields.String(bioimageio_description="Physical unit of this axis.", bioimageio_maybe_required=True) + # Recommendations:\n\n for type: 'space' one of:\n\n\t{get_args(raw_nodes.SpaceUnit)}\n\n for type: 'time' one of:\n\n\t{get_args(raw_nodes.TimeUnit)}") + step = fields.Integer( + bioimageio_description="One 'pixel' along this axis corresponds to 'step'+'unit'. If specified 'unit' is mandatory." + ) + + @validates_schema + def step_has_unit(self, data, **kwargs): + if "step" in data and not "unit" in data: + raise ValidationError("Missing 'unit' for specified 'step'.", "unit") + + +class BatchAxis(Axis): + class Meta: + exclude = ("name", "description", "unit", "step") + + type = fields.String(required=True, validate=field_validators.Equal("batch"), bioimageio_description="'batch'") + + +class ChannelAxis(Axis): + class Meta: + exclude = ("step",) + + type = fields.String(required=True, validate=field_validators.Equal("channel"), bioimageio_description="'channel'") + name = fields.Union( + [ + fields.List(fields.String(validate=field_validators.Length(max=32))), + fields.String(validate=field_validators.Length(max=32)), + ], + required=True, + bioimageio_description="A unique axis name (max 32 characters; per channel if list).", + ) + unit = fields.Union( + [ + fields.List(fields.String(validate=field_validators.Length(max=32))), + fields.String(validate=field_validators.Length(max=32)), + ], + required=True, + bioimageio_description="Physical unit of data values (max 32 characters; per channel if list).", + ) + + +class IndexAxis(Axis): + class Meta: + exclude = ("step", "unit") + + type = fields.String(required=True, validate=field_validators.Equal("index"), bioimageio_description="'index'") + + +class SpaceAxis(Axis): + name = fields.String( + validate=field_validators.OneOf(["x", "y", "z"]), + required=True, + bioimageio_description="One of: ['x', 'y', 'z'].", + ) + type = fields.String(required=True, validate=field_validators.Equal("space"), bioimageio_description="'space'") + + @validates("unit") + def recommend_unit(self, value: str): + recommended_units = get_args(raw_nodes.SpaceUnit) + if not value in recommended_units: + self.warn("unit", f"unknown space unit {value}. Recommend units are: {recommended_units}") + + +class TimeAxis(Axis): + type = fields.String(required=True, validate=field_validators.Equal("time"), bioimageio_description="'time'") + + @validates("unit") + def recommend_unit(self, value: str): + recommended_units = get_args(raw_nodes.TimeUnit) + if not value in recommended_units: + self.warn("unit", f"unknown time unit {value}. Recommend units are: {recommended_units}") + + class Parameter(_BioImageIOSchema): name = fields.String( required=True, @@ -25,15 +115,25 @@ class Parameter(_BioImageIOSchema): type = fields.String( required=True, validate=field_validators.OneOf(get_args(raw_nodes.ParameterType)), - bioimageio_description=f"Parameter type. One of: {get_args(raw_nodes.ParameterType)}", + bioimageio_description=f"One of: {get_args(raw_nodes.ParameterType)}", ) - axes = fields.Axes( + axes = fields.List( + fields.Union( + [ + fields.Nested(BatchAxis()), + fields.Nested(ChannelAxis()), + fields.Nested(IndexAxis()), + fields.Nested(SpaceAxis()), + fields.Nested(TimeAxis()), + ] + ), required=False, bioimageio_maybe_required=True, - bioimageio_description="[only applicable if type is 'tensor'] one letter out of 'bitczyx' per tensor dimension", - valid_axes="bitczyx", + bioimageio_description="Axis specifications (only required for type 'tensor').", + ) + description = fields.String( + bioimageio_description="Description (max 128 characters).", validate=field_validators.Length(max=128) ) - description = fields.String(bioimageio_description="Description of parameter.") @validates_schema def has_axes_if_tensor(self, data, **kwargs): @@ -44,8 +144,12 @@ def has_axes_if_tensor(self, data, **kwargs): class Input(Parameter): + pass + + +class Option(Parameter): default = fields.Raw( - required=False, + required=True, bioimageio_description="Default value compatible with type given by `type` field." "\n\nThe `null` value is compatible with any specified type.", allow_none=True, @@ -74,11 +178,16 @@ class Output(Parameter): class Step(_BioImageIOSchema): + op = fields.String( + required=True, + validate=field_validators.Predicate("isidentifier"), + bioimageio_description="Name of operation. Must be implemented in bioimageio.core or bioimageio.contrib.", + ) id = fields.String( required=False, bioimageio_maybe_required=True, - validate=field_validators.Predicate("isidentifier"), - bioimageio_description="Step id for referencing the steps' kwargs or outputs.", + validate=[field_validators.Predicate("isidentifier"), field_validators.NoneOf(["self"])], + bioimageio_description="Step id for referencing the steps' outputs (must not be 'self').", ) @validates_schema @@ -86,26 +195,17 @@ def has_id_if_outputs(self, data, **kwargs): if data.get("outputs") and "id" not in data: raise ValidationError("'id' required if 'outputs' are named.") - op = fields.String( - required=True, - validate=field_validators.Predicate("isidentifier"), - bioimageio_description="Name of operation. Must be implemented in bioimageio.core or bioimageio.contrib.", - ) - inputs = fields.Union( - [ - fields.List(fields.Raw()), - fields.YamlDict(fields.String(validate=field_validators.Predicate("isidentifier")), fields.Raw()), - ], - bioimageio_description="Either a list of input parameters (named parameters as dict with one entry after all positional parameters)." - "\n\nOr a dictionary of named parameters." - "\n\nIf not set the outputs of the previous step are used as positional input parameters.", + inputs = fields.List( + fields.Raw(), + bioimageio_description="A list of input parameters. Named outputs of previous steps may be referenced as '${{ \\.outputs.\\ }}'" + "\n\nIf not set, the outputs of the previous step are used as inputs.", ) outputs = fields.List( fields.String( validate=field_validators.Predicate("isidentifier"), ), required=False, - bioimageio_description="Output names for this step.", + bioimageio_description="Output names for this step for later referencing.", ) @@ -132,44 +232,64 @@ def verify_param_list(params: typing.Any) -> typing.List[typing.Union[raw_nodes. return params @staticmethod - def check_for_duplicate_param_names(params: typing.List[typing.Union[raw_nodes.Parameter]]): + def check_for_duplicate_param_names(params: typing.List[typing.Union[raw_nodes.Parameter]], param_name: str): names = set() for t in params: + if not isinstance(t, raw_nodes.Parameter): + raise ValidationError( + f"Could not check for duplicate {param_name} name due to other validation errors." + ) + if t.name in names: - raise ValidationError(f"Duplicate parameter name '{t.name}' not allowed.") + raise ValidationError(f"Duplicate {param_name} name '{t.name}' not allowed.") names.add(t.name) - @validates("inputs") - def no_duplicate_input_names(self, ipts: typing.List[raw_nodes.Input]): - self.check_for_duplicate_param_names(self.verify_param_list(ipts)) + options = fields.List( + fields.Nested(Option()), + required=True, + bioimageio_description="Describes the options that may be given to this workflow.", + ) + + @validates_schema + def no_duplicate_input_and_option_names(self, data, **kwargs): + if not isinstance(data, dict): + return + ipts = data.get("inputs", []) + opts = data.get("options", []) + if isinstance(ipts, list) and isinstance(opts, list): + self.check_for_duplicate_param_names(self.verify_param_list(ipts + opts), "input/option") outputs = fields.List( fields.Nested(Output()), validate=field_validators.Length(min=1), - bioimageio_description="Describes the outputs from this workflow.", + bioimageio_description="Describes the outputs of this workflow.", ) @validates("outputs") def no_duplicate_output_names(self, outs: typing.List[raw_nodes.Output]): - self.check_for_duplicate_param_names(self.verify_param_list(outs)) + self.check_for_duplicate_param_names(self.verify_param_list(outs), "output") @staticmethod - def get_input_reference_names(data) -> typing.Set[str]: - refs: typing.Set[str] = set() + def get_initial_reference_names(data) -> typing.Set[str]: + refs = {"${{ self.rdf_source }}"} inputs = data.get("inputs") if not isinstance(inputs, list): return refs for ipt in inputs: if isinstance(ipt, raw_nodes.Input): - refs.add(f"${{{{ inputs.{ipt.name} }}}}") + refs.add(f"${{{{ self.inputs.{ipt.name} }}}}") - return refs + options = data.get("options") + if not isinstance(options, list): + return refs - @staticmethod - def get_self_reference_names() -> typing.Set[str]: - return {"${{ self.rdf_source }}"} + for opt in options: + if isinstance(opt, raw_nodes.Option): + refs.add(f"${{{{ self.options.{opt.name} }}}}") + + return refs test_steps = fields.List( fields.Nested(Step()), @@ -179,38 +299,24 @@ def get_self_reference_names() -> typing.Set[str]: ) @validates_schema - def step_inputs_are_valid(self, data, **kwargs): + def step_inputs_and_options_are_valid(self, data, **kwargs): for step_type in ["steps", "test_steps"]: steps = data.get(step_type) if not steps or not isinstance(steps, list) or not isinstance(steps[0], raw_nodes.Step): raise ValidationError(f"Missing/invalid '{step_type}'") - references = self.get_input_reference_names(data) - references.update(self.get_self_reference_names()) + references = self.get_initial_reference_names(data) for step in steps: if isinstance(step.inputs, list): for si in step.inputs: if isinstance(si, str) and si.startswith("${{") and si.endwith("}}") and si not in references: raise ValidationError(f"Invalid reference '{si}'") - elif isinstance(si, dict) and len(si) == 1: - si_ref = list(si.values())[0] - if si_ref not in references: - raise ValidationError(f"Invalid reference '{si_ref}'") - - elif isinstance(step.inputs, dict): - for key, value in step.inputs.values(): - if key.startswith("${{") and key.endswith("}}"): - raise ValidationError("Invalid input name. (no reference allowed here)") - if value.startswith("${{") and value.endswith("}}") and value not in references: - raise ValidationError(f"Invalid reference '{value}'") - - if step.outputs: - references.update({f"{step.id}.outputs.{out}" for out in step.outputs}) - for k, v in step.kwargs.items(): + if step.options: + for k, v in step.options.items(): if isinstance(v, str) and v.startswith("${{") and v.endswith("}}") and v not in references: raise ValidationError( - f"Invalid {step_type[:-1].replace('_', ' ')} kwarg ({k}) referencing '{v}'" + f"Invalid {step_type[:-1].replace('_', ' ')} option ({k}) referencing '{v}'" ) if step.outputs: From 97688488131fba0d4c57a4df65cdc70c54cb0ae1 Mon Sep 17 00:00:00 2001 From: fynnbe Date: Thu, 3 Nov 2022 21:31:21 +0100 Subject: [PATCH 16/40] finish first draft of workflow RDF spec --- bioimageio/spec/workflow/v0_2/raw_nodes.py | 41 ++++++++++- bioimageio/spec/workflow/v0_2/schema.py | 33 ++++++--- example_specs/workflows/dummy/rdf.yaml | 33 ++++----- .../hpa/single_cell_classification.yaml | 61 ++++++++++++---- .../workflows/stardist/stardist_example.yaml | 72 ++++++++++++------- tests/test_workflow_rdf.py | 6 +- 6 files changed, 174 insertions(+), 72 deletions(-) diff --git a/bioimageio/spec/workflow/v0_2/raw_nodes.py b/bioimageio/spec/workflow/v0_2/raw_nodes.py index fbe00e0dd..8bbcc813b 100644 --- a/bioimageio/spec/workflow/v0_2/raw_nodes.py +++ b/bioimageio/spec/workflow/v0_2/raw_nodes.py @@ -88,13 +88,50 @@ @dataclass class Axis: - name: str = missing type: AxisType = missing + name: Union[_Missing, str, List[str]] = missing description: Union[_Missing, str] = missing - unit: Union[_Missing, SpaceUnit, TimeUnit, str] = missing + unit: Union[_Missing, SpaceUnit, TimeUnit, str, List[str]] = missing step: Union[_Missing, int] = missing +@dataclass +class BatchAxis(Axis): + type: Literal["batch"] = "batch" + name: _Missing = missing + description: _Missing = missing + unit: _Missing = missing + step: _Missing = missing + + +@dataclass +class ChannelAxis(Axis): + type: Literal["channel"] = "channel" + step: _Missing = missing + + +@dataclass +class IndexAxis(Axis): + type: Literal["index"] = "index" + name: Union[_Missing, str] = missing + unit: Union[_Missing, str] = missing + step: _Missing = missing + + +@dataclass +class SpaceAxis(Axis): + type: Literal["space"] = "space" + name: Literal["x", "y", "z"] = missing + unit: Union[_Missing, str, SpaceUnit] = missing + + +@dataclass +class TimeAxis(Axis): + type: Literal["time"] = "time" + name: Union[_Missing, str] = missing + unit: Union[_Missing, str, TimeUnit] = missing + + @dataclass class Parameter(RawNode): name: str = missing diff --git a/bioimageio/spec/workflow/v0_2/schema.py b/bioimageio/spec/workflow/v0_2/schema.py index c6de875a4..065c9a2ae 100644 --- a/bioimageio/spec/workflow/v0_2/schema.py +++ b/bioimageio/spec/workflow/v0_2/schema.py @@ -1,7 +1,7 @@ import typing -import warnings -from marshmallow import ValidationError, missing, validates, validates_schema +from marshmallow import ValidationError, validates, validates_schema +from marshmallow.exceptions import SCHEMA from bioimageio.spec.rdf.v0_2.schema import RDF from bioimageio.spec.shared import field_validators, fields @@ -18,7 +18,7 @@ class _BioImageIOSchema(SharedBioImageIOSchema): raw_nodes = raw_nodes -class Axis(SharedBioImageIOSchema): +class Axis(_BioImageIOSchema): name = fields.String( required=True, bioimageio_description="A unique axis name (max 32 characters).", @@ -70,7 +70,7 @@ class Meta: fields.List(fields.String(validate=field_validators.Length(max=32))), fields.String(validate=field_validators.Length(max=32)), ], - required=True, + required=False, bioimageio_description="Physical unit of data values (max 32 characters; per channel if list).", ) @@ -197,9 +197,15 @@ def has_id_if_outputs(self, data, **kwargs): inputs = fields.List( fields.Raw(), - bioimageio_description="A list of input parameters. Named outputs of previous steps may be referenced as '${{ \\.outputs.\\ }}'" + bioimageio_description="A list of input parameters. Named outputs of previous steps may be referenced here as '${{ \\.outputs.\\ }}'." "\n\nIf not set, the outputs of the previous step are used as inputs.", ) + options = fields.YamlDict( + fields.String(validate=field_validators.Predicate("isidentifier")), + fields.Raw(), + bioimageio_description="Named options. Named outputs of previous steps may be referenced here as '${{ \\.outputs.\\ }}'.", + ) + outputs = fields.List( fields.String( validate=field_validators.Predicate("isidentifier"), @@ -232,7 +238,9 @@ def verify_param_list(params: typing.Any) -> typing.List[typing.Union[raw_nodes. return params @staticmethod - def check_for_duplicate_param_names(params: typing.List[typing.Union[raw_nodes.Parameter]], param_name: str): + def check_for_duplicate_param_names( + params: typing.List[typing.Union[raw_nodes.Parameter]], param_name: str, field_name=SCHEMA + ): names = set() for t in params: if not isinstance(t, raw_nodes.Parameter): @@ -241,7 +249,7 @@ def check_for_duplicate_param_names(params: typing.List[typing.Union[raw_nodes.P ) if t.name in names: - raise ValidationError(f"Duplicate {param_name} name '{t.name}' not allowed.") + raise ValidationError(f"Duplicate {param_name} name '{t.name}' not allowed.", field_name) names.add(t.name) @@ -258,7 +266,7 @@ def no_duplicate_input_and_option_names(self, data, **kwargs): ipts = data.get("inputs", []) opts = data.get("options", []) if isinstance(ipts, list) and isinstance(opts, list): - self.check_for_duplicate_param_names(self.verify_param_list(ipts + opts), "input/option") + self.check_for_duplicate_param_names(self.verify_param_list(ipts + opts), "input/option", "inputs/options") outputs = fields.List( fields.Nested(Output()), @@ -291,6 +299,13 @@ def get_initial_reference_names(data) -> typing.Set[str]: return refs + steps = fields.List( + fields.Nested(Step()), + validate=field_validators.Length(min=1), + required=True, + bioimageio_description="Workflow steps---a series of operators---to be executed consecutively.", + ) + test_steps = fields.List( fields.Nested(Step()), validate=field_validators.Length(min=1), @@ -309,7 +324,7 @@ def step_inputs_and_options_are_valid(self, data, **kwargs): for step in steps: if isinstance(step.inputs, list): for si in step.inputs: - if isinstance(si, str) and si.startswith("${{") and si.endwith("}}") and si not in references: + if isinstance(si, str) and si.startswith("${{") and si.endswith("}}") and si not in references: raise ValidationError(f"Invalid reference '{si}'") if step.options: diff --git a/example_specs/workflows/dummy/rdf.yaml b/example_specs/workflows/dummy/rdf.yaml index 86e3e887c..463ae35db 100644 --- a/example_specs/workflows/dummy/rdf.yaml +++ b/example_specs/workflows/dummy/rdf.yaml @@ -6,8 +6,7 @@ type: workflow inputs: - name: shape type: list -- name: axes - type: str + description: two integer to describe a 2d shape options: - name: threshold @@ -17,41 +16,39 @@ options: outputs: - name: nonsense type: tensor + axes: + - name: fantasy time + type: time + - name: x + type: space steps: - id: step0 op: generate_random_uniform_tensor - inputs: - shape: ${{ inputs.shape }} - axes: ${{ inputs.axes }} + inputs: ["${{ self.inputs.shape }}", ['fantasy time', 'x']] # options: # low: 0 # high: 1 + outputs: [tensor] - id: step1 op: binarize - inputs: - threshold: ${{ inputs.threshold }} + inputs: ["${{ step0.outputs.tensor }}", "${{ self.options.threshold }}"] - op: log options: log_level: 50 - threshold: ${{ kwargs.threshold }} - original: ${{ step0.outputs.random_tensor }} -# binarized: ${{ step1.outputs.binarized }} + threshold: "${{ self.options.threshold }}" + original: "${{ step0.outputs.tensor }}" +# binarized: "{{ step1.outputs.binarized }}" # given implicitly as arg already test_steps: - id: wf op: run_workflow - inputs: - rdf_source: ${{ self.rdf_source }} + inputs: ["${{ self.rdf_source }}"] options: - inputs: - shape: [2, 3] - axes: cx + inputs: [&test-shape [2, 3]] options: threshold: 0.5 outputs: [out0] - op: assert_shape - inputs: - tensor: ${{ wf.outputs.out0 }} - shape: ${{ inputs.shape }} + inputs: ["${{ wf.outputs.out0 }}", *test-shape] diff --git a/example_specs/workflows/hpa/single_cell_classification.yaml b/example_specs/workflows/hpa/single_cell_classification.yaml index 5eb10495c..b61cb247f 100644 --- a/example_specs/workflows/hpa/single_cell_classification.yaml +++ b/example_specs/workflows/hpa/single_cell_classification.yaml @@ -6,10 +6,26 @@ type: workflow inputs: - name: nuclei type: tensor + axes: + - type: batch + - name: gray scale + type: channel + - name: x + type: space + - name: y + type: space - name: protein type: tensor + axes: + - type: batch + - name: gray scale + type: channel + - name: x + type: space + - name: y + type: space -kwargs: +options: - name: seg_prep type: boolean default: false @@ -18,42 +34,57 @@ kwargs: outputs: - name: cells type: tensor + axes: + - type: batch + - name: gray scale + type: channel + - name: x + type: space + - name: y + type: space - name: scores type: tensor - + axes: + - type: batch + - name: gray scale + type: channel + - name: x + type: space + - name: y + type: space steps: - id: segmentation op: model_inference inputs: [inputs.nuclei] # take the first output of step 1 (id: data) as the only input - outputs: [cells] - kwargs: + options: rdf_source: conscientious-seashell - preprocessing: ${{ kwargs.seg_prep }} + preprocessing: ${{ self.options.seg_prep }} postprocessing: false + outputs: [cells] - id: classification op: model_inference inputs: [inputs.protein, segmentation.outputs.cells] # take the second output of step1 and the output of step 2 - outputs: [scores] - kwargs: + options: rdf_source: straightforward-crocodile preprocessing: true postprocessing: false + outputs: [scores] - op: select_outputs inputs: [segmentation.outputs.cells, classification.outputs.scores] test_steps: -- id: test_tensors - op: load_tensors - outputs: [nuclei, protein, cells, scores] - kwargs: +- op: load_tensors + id: test_tensors + options: sources: [nuclei.npy, protein.npy, cells.npy, scores.npy] -- id: workflow - op: run_workflow + outputs: [nuclei, protein, cells, scores] +- op: run_workflow + id: workflow inputs: [test_tensors.outputs.nuclei, test_tensors.outputs.protein] - outputs: [cells, scores] - kwargs: + options: rdf_source: ${{ self.rdf_source }} + outputs: [cells, scores] - op: assert_close inputs: [test_tensors.outputs.cells, workflow.outputs.cells] - op: assert_close diff --git a/example_specs/workflows/stardist/stardist_example.yaml b/example_specs/workflows/stardist/stardist_example.yaml index 9adef7fb5..7cb840de5 100644 --- a/example_specs/workflows/stardist/stardist_example.yaml +++ b/example_specs/workflows/stardist/stardist_example.yaml @@ -7,64 +7,88 @@ inputs: - name: raw type: tensor description: image with star-convex objects + axes: + - type: batch + - name: gray scale + type: channel + - name: x + type: space + - name: y + type: space + +options: +- name: diameter + type: float + default: 2.3 outputs: - name: labels type: tensor + axes: + - type: batch + - name: label id + type: channel + - name: x + type: space + - name: y + type: space - name: coord - type: tensor + type: list - name: points - type: tensor + type: list - name: prob type: tensor - -kwargs: -- name: diameter - type: float - default: 2.3 + axes: + - type: batch + - name: probability + type: channel + - name: x + type: space + - name: y + type: space steps: - op: zero_mean_unit_variance - op: model_inference - kwargs: + options: rdf_source: fearless-crab preprocessing: false # disable the preprocessing postprocessing: false # disable the postprocessing - op: stardist_postprocessing - kwargs: - diameter: ${{ kwargs.diameter }} + options: + diameter: "${{ self.options.diameter }}" test_steps: - id: test_tensors op: load_tensors - outputs: - - raw - - labels - - coord - - points - - prob - kwargs: + options: sources: - raw.npy - labels.npy - coord.npy - points.npy - prob.npy + outputs: + - raw + - labels + - coord + - points + - prob - id: workflow op: run_workflow - inputs: [test_tensors.outputs.raw] + inputs: [ "${{ test_tensors.outputs.raw }}" ] + options: + rdf_source: "${{ self.rdf_source }}" outputs: - labels - coord - points - prob - kwargs: - rdf_source: ${{ self.rdf_source }} - op: assert_close - inputs: [test_tensors.outputs.labels, workflow.outputs.labels] + inputs: ["${{ test_tensors.outputs.labels }}", "${{ workflow.outputs.labels }}"] - op: assert_close - inputs: [test_tensors.outputs.coord, workflow.outputs.coord] + inputs: ["${{ test_tensors.outputs.coord }}", "${{ workflow.outputs.coord }}"] - op: assert_close - inputs: [test_tensors.outputs.points, workflow.outputs.points] + inputs: ["${{ test_tensors.outputs.points }}", "${{ workflow.outputs.points }}"] - op: assert_close - inputs: [test_tensors.outputs.prob, workflow.outputs.prob] + inputs: ["${{ test_tensors.outputs.prob }}", "${{ workflow.outputs.prob }}"] diff --git a/tests/test_workflow_rdf.py b/tests/test_workflow_rdf.py index 3a2625e1d..9a0522890 100644 --- a/tests/test_workflow_rdf.py +++ b/tests/test_workflow_rdf.py @@ -9,7 +9,6 @@ def test_workflow_rdf_stardist_example(stardist_workflow_rdf): from bioimageio.spec.workflow.schema import Workflow data = yaml.load(stardist_workflow_rdf) - # data["root_path"] = stardist_workflow_rdf.parent workflow = Workflow().load(data) assert isinstance(workflow, raw_nodes.Workflow) @@ -20,7 +19,6 @@ def test_workflow_rdf_hpa_example(hpa_workflow_rdf): from bioimageio.spec.workflow.schema import Workflow data = yaml.load(hpa_workflow_rdf) - # data["root_path"] = hpa_workflow_rdf.parent workflow = Workflow().load(data) assert isinstance(workflow, raw_nodes.Workflow) @@ -40,9 +38,9 @@ def test_invalid_kwarg_name_duplicate(dummy_workflow_rdf): from bioimageio.spec.workflow.schema import Workflow data = yaml.load(dummy_workflow_rdf) - data["kwargs"].append(data["kwargs"][0]) + data["options"].append(data["options"][0]) with pytest.raises(ValidationError) as e: Workflow().load(data) - assert e.value.messages == {"kwargs": ["Duplicate kwarg name 'shape'."]} + assert e.value.messages == {"inputs/options": ["Duplicate input/option name 'threshold' not allowed."]} From e3d963e16bcfafe42f8dd2cb0f568e2df134d08d Mon Sep 17 00:00:00 2001 From: fynnbe Date: Fri, 4 Nov 2022 12:58:33 +0100 Subject: [PATCH 17/40] inputs/options/outputs -> *_spec --- bioimageio/spec/workflow/v0_2/raw_nodes.py | 14 +++--- bioimageio/spec/workflow/v0_2/schema.py | 50 ++++++++++--------- example_specs/workflows/dummy/rdf.yaml | 6 +-- .../hpa/single_cell_classification.yaml | 6 +-- .../workflows/stardist/stardist_example.yaml | 6 +-- tests/test_workflow_rdf.py | 6 +-- 6 files changed, 45 insertions(+), 43 deletions(-) diff --git a/bioimageio/spec/workflow/v0_2/raw_nodes.py b/bioimageio/spec/workflow/v0_2/raw_nodes.py index 8bbcc813b..64095a712 100644 --- a/bioimageio/spec/workflow/v0_2/raw_nodes.py +++ b/bioimageio/spec/workflow/v0_2/raw_nodes.py @@ -133,7 +133,7 @@ class TimeAxis(Axis): @dataclass -class Parameter(RawNode): +class ParameterSpec(RawNode): name: str = missing type: ParameterType = missing description: Union[_Missing, str] = missing @@ -141,17 +141,17 @@ class Parameter(RawNode): @dataclass -class Input(Parameter): +class InputSpec(ParameterSpec): pass @dataclass -class Option(Parameter): +class OptionSpec(ParameterSpec): default: Union[_Missing, DefaultType] = missing @dataclass -class Output(Parameter): +class OutputSpec(ParameterSpec): pass @@ -168,8 +168,8 @@ class Step(RawNode): class Workflow(_RDF): type: Literal["workflow"] = missing - inputs: List[Input] = missing - options: List[Option] = missing - outputs: List[Output] = missing + inputs_spec: List[InputSpec] = missing + options_spec: List[OptionSpec] = missing + outputs_spec: List[OutputSpec] = missing steps: List[Step] = missing test_steps: List[Step] = missing diff --git a/bioimageio/spec/workflow/v0_2/schema.py b/bioimageio/spec/workflow/v0_2/schema.py index 065c9a2ae..a782c441b 100644 --- a/bioimageio/spec/workflow/v0_2/schema.py +++ b/bioimageio/spec/workflow/v0_2/schema.py @@ -107,7 +107,7 @@ def recommend_unit(self, value: str): self.warn("unit", f"unknown time unit {value}. Recommend units are: {recommended_units}") -class Parameter(_BioImageIOSchema): +class ParameterSpec(_BioImageIOSchema): name = fields.String( required=True, bioimageio_description="Parameter name. No duplicates are allowed.", @@ -143,11 +143,11 @@ def has_axes_if_tensor(self, data, **kwargs): raise ValidationError("'axes' required for input type 'tensor'.") -class Input(Parameter): +class InputSpec(ParameterSpec): pass -class Option(Parameter): +class OptionSpec(ParameterSpec): default = fields.Raw( required=True, bioimageio_description="Default value compatible with type given by `type` field." @@ -173,7 +173,7 @@ def default_has_compatible_type(self, data, **kwargs): ) -class Output(Parameter): +class OutputSpec(ParameterSpec): pass @@ -224,26 +224,26 @@ class Workflow(_BioImageIOSchema, RDF): _optional*_ with an asterisk indicates the field is optional depending on the value in another field. """ - inputs = fields.List( - fields.Nested(Input()), + inputs_spec = fields.List( + fields.Nested(InputSpec()), required=True, bioimageio_description="Describes the inputs expected by this workflow.", ) @staticmethod - def verify_param_list(params: typing.Any) -> typing.List[typing.Union[raw_nodes.Parameter]]: - if not isinstance(params, list) or not all(isinstance(v, raw_nodes.Parameter) for v in params): + def verify_param_list(params: typing.Any) -> typing.List[typing.Union[raw_nodes.ParameterSpec]]: + if not isinstance(params, list) or not all(isinstance(v, raw_nodes.ParameterSpec) for v in params): raise ValidationError("Could not check for duplicate parameter names due to another validation error.") return params @staticmethod def check_for_duplicate_param_names( - params: typing.List[typing.Union[raw_nodes.Parameter]], param_name: str, field_name=SCHEMA + params: typing.List[typing.Union[raw_nodes.ParameterSpec]], param_name: str, field_name=SCHEMA ): names = set() for t in params: - if not isinstance(t, raw_nodes.Parameter): + if not isinstance(t, raw_nodes.ParameterSpec): raise ValidationError( f"Could not check for duplicate {param_name} name due to other validation errors." ) @@ -253,8 +253,8 @@ def check_for_duplicate_param_names( names.add(t.name) - options = fields.List( - fields.Nested(Option()), + options_spec = fields.List( + fields.Nested(OptionSpec()), required=True, bioimageio_description="Describes the options that may be given to this workflow.", ) @@ -263,38 +263,40 @@ def check_for_duplicate_param_names( def no_duplicate_input_and_option_names(self, data, **kwargs): if not isinstance(data, dict): return - ipts = data.get("inputs", []) - opts = data.get("options", []) + ipts = data.get("inputs_spec", []) + opts = data.get("options_spec", []) if isinstance(ipts, list) and isinstance(opts, list): - self.check_for_duplicate_param_names(self.verify_param_list(ipts + opts), "input/option", "inputs/options") + self.check_for_duplicate_param_names( + self.verify_param_list(ipts + opts), "input/option", "inputs_spec/options_spec" + ) - outputs = fields.List( - fields.Nested(Output()), + outputs_spec = fields.List( + fields.Nested(OutputSpec()), validate=field_validators.Length(min=1), bioimageio_description="Describes the outputs of this workflow.", ) - @validates("outputs") - def no_duplicate_output_names(self, outs: typing.List[raw_nodes.Output]): - self.check_for_duplicate_param_names(self.verify_param_list(outs), "output") + @validates("outputs_spec") + def no_duplicate_output_names(self, outs: typing.List[raw_nodes.OutputSpec]): + self.check_for_duplicate_param_names(self.verify_param_list(outs), "output_spec") @staticmethod def get_initial_reference_names(data) -> typing.Set[str]: refs = {"${{ self.rdf_source }}"} - inputs = data.get("inputs") + inputs = data.get("inputs_spec") if not isinstance(inputs, list): return refs for ipt in inputs: - if isinstance(ipt, raw_nodes.Input): + if isinstance(ipt, raw_nodes.InputSpec): refs.add(f"${{{{ self.inputs.{ipt.name} }}}}") - options = data.get("options") + options = data.get("options_spec") if not isinstance(options, list): return refs for opt in options: - if isinstance(opt, raw_nodes.Option): + if isinstance(opt, raw_nodes.OptionSpec): refs.add(f"${{{{ self.options.{opt.name} }}}}") return refs diff --git a/example_specs/workflows/dummy/rdf.yaml b/example_specs/workflows/dummy/rdf.yaml index 463ae35db..42d2bb434 100644 --- a/example_specs/workflows/dummy/rdf.yaml +++ b/example_specs/workflows/dummy/rdf.yaml @@ -3,17 +3,17 @@ description: A workflow to produce some nonsense format_version: 0.2.3 type: workflow -inputs: +inputs_spec: - name: shape type: list description: two integer to describe a 2d shape -options: +options_spec: - name: threshold type: float default: 0.5 -outputs: +outputs_spec: - name: nonsense type: tensor axes: diff --git a/example_specs/workflows/hpa/single_cell_classification.yaml b/example_specs/workflows/hpa/single_cell_classification.yaml index b61cb247f..f3e9773c1 100644 --- a/example_specs/workflows/hpa/single_cell_classification.yaml +++ b/example_specs/workflows/hpa/single_cell_classification.yaml @@ -3,7 +3,7 @@ description: A workflow for running HPA single-cell classification format_version: 0.2.3 type: workflow -inputs: +inputs_spec: - name: nuclei type: tensor axes: @@ -25,13 +25,13 @@ inputs: - name: y type: space -options: +options_spec: - name: seg_prep type: boolean default: false -outputs: +outputs_spec: - name: cells type: tensor axes: diff --git a/example_specs/workflows/stardist/stardist_example.yaml b/example_specs/workflows/stardist/stardist_example.yaml index 7cb840de5..46a2465e5 100644 --- a/example_specs/workflows/stardist/stardist_example.yaml +++ b/example_specs/workflows/stardist/stardist_example.yaml @@ -3,7 +3,7 @@ description: A workflow for running stardist format_version: 0.2.3 type: workflow -inputs: +inputs_spec: - name: raw type: tensor description: image with star-convex objects @@ -16,12 +16,12 @@ inputs: - name: y type: space -options: +options_spec: - name: diameter type: float default: 2.3 -outputs: +outputs_spec: - name: labels type: tensor axes: diff --git a/tests/test_workflow_rdf.py b/tests/test_workflow_rdf.py index 9a0522890..b55f3ddde 100644 --- a/tests/test_workflow_rdf.py +++ b/tests/test_workflow_rdf.py @@ -22,7 +22,7 @@ def test_workflow_rdf_hpa_example(hpa_workflow_rdf): workflow = Workflow().load(data) assert isinstance(workflow, raw_nodes.Workflow) - assert workflow.outputs[0].name == "cells" + assert workflow.outputs_spec[0].name == "cells" def test_dummy_workflow_rdf(dummy_workflow_rdf): @@ -38,9 +38,9 @@ def test_invalid_kwarg_name_duplicate(dummy_workflow_rdf): from bioimageio.spec.workflow.schema import Workflow data = yaml.load(dummy_workflow_rdf) - data["options"].append(data["options"][0]) + data["options_spec"].append(data["options_spec"][0]) with pytest.raises(ValidationError) as e: Workflow().load(data) - assert e.value.messages == {"inputs/options": ["Duplicate input/option name 'threshold' not allowed."]} + assert e.value.messages == {"inputs_spec/options_spec": ["Duplicate input/option name 'threshold' not allowed."]} From cd4bd4c5be1a9dad58554e3f5678211230e82006 Mon Sep 17 00:00:00 2001 From: fynnbe Date: Fri, 4 Nov 2022 14:07:15 +0100 Subject: [PATCH 18/40] enforce unique step ids --- bioimageio/spec/workflow/v0_2/schema.py | 24 +++++++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) diff --git a/bioimageio/spec/workflow/v0_2/schema.py b/bioimageio/spec/workflow/v0_2/schema.py index a782c441b..37798825d 100644 --- a/bioimageio/spec/workflow/v0_2/schema.py +++ b/bioimageio/spec/workflow/v0_2/schema.py @@ -1,6 +1,6 @@ import typing -from marshmallow import ValidationError, validates, validates_schema +from marshmallow import ValidationError, missing, validates, validates_schema from marshmallow.exceptions import SCHEMA from bioimageio.spec.rdf.v0_2.schema import RDF @@ -315,6 +315,28 @@ def get_initial_reference_names(data) -> typing.Set[str]: bioimageio_description="Test steps to be executed consecutively.", ) + @staticmethod + def unique_step_ids_impl(steps: typing.List[raw_nodes.Step], field_name: str): + if not steps or not isinstance(steps, list) or not all(isinstance(s, raw_nodes.Step) for s in steps): + raise ValidationError("Could not check for unique step ids due to other validation errors.", field_name) + + ids = set() + for step in steps: + if step.id is missing: + continue + if step.id in ids: + raise ValidationError(f"Duplicated step id {step.id}", field_name) + + ids.add(step.id) + + @validates("steps") + def unique_step_ids(self, value): + self.unique_step_ids_impl(value, "steps") + + @validates("test_steps") + def unique_test_step_ids(self, value): + self.unique_step_ids_impl(value, "test_steps") + @validates_schema def step_inputs_and_options_are_valid(self, data, **kwargs): for step_type in ["steps", "test_steps"]: From d894da9324fa84026d71c473f7933765cbb2d95c Mon Sep 17 00:00:00 2001 From: fynnbe Date: Fri, 4 Nov 2022 23:11:31 +0100 Subject: [PATCH 19/40] detect type workflow --- bioimageio/spec/shared/common.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bioimageio/spec/shared/common.py b/bioimageio/spec/shared/common.py index 362492388..b2530df81 100644 --- a/bioimageio/spec/shared/common.py +++ b/bioimageio/spec/shared/common.py @@ -162,7 +162,7 @@ def get_patched_format_version(type_: str, format_version: str): def get_spec_type_from_type(type_: Optional[str]): - if type_ in ("model", "collection", "dataset"): + if type_ in ("model", "collection", "dataset", "workflow"): return type_ else: return "rdf" From 7ace197a881bbec3edf9463f72991e914bd6b99c Mon Sep 17 00:00:00 2001 From: fynnbe Date: Fri, 4 Nov 2022 23:12:16 +0100 Subject: [PATCH 20/40] don't accept emtpy strings --- bioimageio/spec/workflow/v0_2/schema.py | 14 +++++++------- example_specs/workflows/dummy/rdf.yaml | 2 +- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/bioimageio/spec/workflow/v0_2/schema.py b/bioimageio/spec/workflow/v0_2/schema.py index 37798825d..ec2bbb103 100644 --- a/bioimageio/spec/workflow/v0_2/schema.py +++ b/bioimageio/spec/workflow/v0_2/schema.py @@ -22,7 +22,7 @@ class Axis(_BioImageIOSchema): name = fields.String( required=True, bioimageio_description="A unique axis name (max 32 characters).", - validate=field_validators.Length(max=32), + validate=field_validators.Length(min=1, max=32), ) type = fields.String( required=True, @@ -30,7 +30,7 @@ class Axis(_BioImageIOSchema): bioimageio_description=f"One of: {get_args(raw_nodes.AxisType)}", ) description = fields.String( - validate=field_validators.Length(max=128), + validate=field_validators.Length(min=1, max=128), bioimageio_description="Description of axis (max 128 characters).", ) unit = fields.String(bioimageio_description="Physical unit of this axis.", bioimageio_maybe_required=True) @@ -59,16 +59,16 @@ class Meta: type = fields.String(required=True, validate=field_validators.Equal("channel"), bioimageio_description="'channel'") name = fields.Union( [ - fields.List(fields.String(validate=field_validators.Length(max=32))), - fields.String(validate=field_validators.Length(max=32)), + fields.List(fields.String(validate=field_validators.Length(min=1, max=32))), + fields.String(validate=field_validators.Length(min=1, max=32)), ], required=True, bioimageio_description="A unique axis name (max 32 characters; per channel if list).", ) unit = fields.Union( [ - fields.List(fields.String(validate=field_validators.Length(max=32))), - fields.String(validate=field_validators.Length(max=32)), + fields.List(fields.String(validate=field_validators.Length(min=1, max=32))), + fields.String(validate=field_validators.Length(min=1, max=32)), ], required=False, bioimageio_description="Physical unit of data values (max 32 characters; per channel if list).", @@ -132,7 +132,7 @@ class ParameterSpec(_BioImageIOSchema): bioimageio_description="Axis specifications (only required for type 'tensor').", ) description = fields.String( - bioimageio_description="Description (max 128 characters).", validate=field_validators.Length(max=128) + bioimageio_description="Description (max 128 characters).", validate=field_validators.Length(min=1, max=128) ) @validates_schema diff --git a/example_specs/workflows/dummy/rdf.yaml b/example_specs/workflows/dummy/rdf.yaml index 42d2bb434..be92b8770 100644 --- a/example_specs/workflows/dummy/rdf.yaml +++ b/example_specs/workflows/dummy/rdf.yaml @@ -6,7 +6,7 @@ type: workflow inputs_spec: - name: shape type: list - description: two integer to describe a 2d shape + description: two integers to describe a 2d shape options_spec: - name: threshold From f5af22f35f5e458305d3b9a9347cfcf0b54f8f5e Mon Sep 17 00:00:00 2001 From: fynnbe Date: Sat, 5 Nov 2022 00:29:16 +0100 Subject: [PATCH 21/40] also log binarized --- example_specs/workflows/dummy/rdf.yaml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/example_specs/workflows/dummy/rdf.yaml b/example_specs/workflows/dummy/rdf.yaml index be92b8770..ff3b9c7fc 100644 --- a/example_specs/workflows/dummy/rdf.yaml +++ b/example_specs/workflows/dummy/rdf.yaml @@ -33,12 +33,13 @@ steps: - id: step1 op: binarize inputs: ["${{ step0.outputs.tensor }}", "${{ self.options.threshold }}"] + outputs: [binarized] - op: log options: log_level: 50 threshold: "${{ self.options.threshold }}" original: "${{ step0.outputs.tensor }}" -# binarized: "{{ step1.outputs.binarized }}" # given implicitly as arg already + binarized: "${{ step1.outputs.binarized }}" test_steps: From 9fe4ca20e59f01b1e71ac4b7e0a46be05a2c86c8 Mon Sep 17 00:00:00 2001 From: fynnbe Date: Thu, 24 Nov 2022 10:00:00 +0100 Subject: [PATCH 22/40] wip remove wf steps --- bioimageio/spec/workflow/v0_2/raw_nodes.py | 18 ++-------- bioimageio/spec/workflow/v0_2/schema.py | 38 ---------------------- 2 files changed, 3 insertions(+), 53 deletions(-) diff --git a/bioimageio/spec/workflow/v0_2/raw_nodes.py b/bioimageio/spec/workflow/v0_2/raw_nodes.py index 64095a712..f68d839eb 100644 --- a/bioimageio/spec/workflow/v0_2/raw_nodes.py +++ b/bioimageio/spec/workflow/v0_2/raw_nodes.py @@ -1,18 +1,16 @@ -""" raw nodes for the dataset RDF spec +""" raw nodes for the workflow RDF spec raw nodes are the deserialized equivalent to the content of any RDF. serialization and deserialization are defined in schema: RDF <--schema--> raw nodes """ -import typing from dataclasses import dataclass -from pathlib import Path from typing import Any, Dict, List, Union from marshmallow import missing from marshmallow.utils import _Missing -from bioimageio.spec.rdf.v0_2.raw_nodes import FormatVersion, RDF as _RDF, URI +from bioimageio.spec.rdf.v0_2.raw_nodes import FormatVersion, RDF as _RDF from bioimageio.spec.shared.raw_nodes import RawNode try: @@ -21,7 +19,6 @@ from typing_extensions import Literal, get_args # type: ignore FormatVersion = FormatVersion -ParameterType = Literal["tensor", "int", "float", "string", "boolean", "list", "dict", "any"] DefaultType = Union[int, float, str, bool, list, dict, None] TYPE_NAME_MAP = {int: "int", float: "float", str: "string", bool: "boolean", list: "list", dict: "dict", None: "null"} @@ -135,7 +132,7 @@ class TimeAxis(Axis): @dataclass class ParameterSpec(RawNode): name: str = missing - type: ParameterType = missing + type: str = missing description: Union[_Missing, str] = missing axes: Union[_Missing, List[Axis]] = missing @@ -155,15 +152,6 @@ class OutputSpec(ParameterSpec): pass -@dataclass -class Step(RawNode): - op: str = missing - id: Union[_Missing, str] = missing - inputs: Union[_Missing, List[Any]] = missing - options: Union[_Missing, Dict[str, Any]] = missing - outputs: Union[_Missing, List[str]] = missing - - @dataclass class Workflow(_RDF): type: Literal["workflow"] = missing diff --git a/bioimageio/spec/workflow/v0_2/schema.py b/bioimageio/spec/workflow/v0_2/schema.py index ec2bbb103..380faab08 100644 --- a/bioimageio/spec/workflow/v0_2/schema.py +++ b/bioimageio/spec/workflow/v0_2/schema.py @@ -177,44 +177,6 @@ class OutputSpec(ParameterSpec): pass -class Step(_BioImageIOSchema): - op = fields.String( - required=True, - validate=field_validators.Predicate("isidentifier"), - bioimageio_description="Name of operation. Must be implemented in bioimageio.core or bioimageio.contrib.", - ) - id = fields.String( - required=False, - bioimageio_maybe_required=True, - validate=[field_validators.Predicate("isidentifier"), field_validators.NoneOf(["self"])], - bioimageio_description="Step id for referencing the steps' outputs (must not be 'self').", - ) - - @validates_schema - def has_id_if_outputs(self, data, **kwargs): - if data.get("outputs") and "id" not in data: - raise ValidationError("'id' required if 'outputs' are named.") - - inputs = fields.List( - fields.Raw(), - bioimageio_description="A list of input parameters. Named outputs of previous steps may be referenced here as '${{ \\.outputs.\\ }}'." - "\n\nIf not set, the outputs of the previous step are used as inputs.", - ) - options = fields.YamlDict( - fields.String(validate=field_validators.Predicate("isidentifier")), - fields.Raw(), - bioimageio_description="Named options. Named outputs of previous steps may be referenced here as '${{ \\.outputs.\\ }}'.", - ) - - outputs = fields.List( - fields.String( - validate=field_validators.Predicate("isidentifier"), - ), - required=False, - bioimageio_description="Output names for this step for later referencing.", - ) - - class Workflow(_BioImageIOSchema, RDF): bioimageio_description = f"""# BioImage.IO Workflow Resource Description File {get_args(raw_nodes.FormatVersion)[-1]} This specification defines the fields used in a BioImage.IO-compliant resource description file (`RDF`) for describing workflows. From 31ecba9afe1b29b1a1d567f6d1f74b02403c80c6 Mon Sep 17 00:00:00 2001 From: fynnbe Date: Thu, 24 Nov 2022 10:16:25 +0100 Subject: [PATCH 23/40] rename importable sources --- bioimageio/spec/model/v0_3/raw_nodes.py | 8 +++--- bioimageio/spec/model/v0_3/schema.py | 2 +- bioimageio/spec/model/v0_4/raw_nodes.py | 8 +++--- bioimageio/spec/model/v0_4/schema.py | 6 ++--- bioimageio/spec/shared/_resolve_source.py | 16 ++++++------ bioimageio/spec/shared/fields.py | 14 +++++----- bioimageio/spec/shared/node_transformer.py | 30 +++++++++++----------- bioimageio/spec/shared/raw_nodes.py | 14 +++++----- tests/test_raw_nodes_match_schema.py | 6 ++--- 9 files changed, 52 insertions(+), 52 deletions(-) diff --git a/bioimageio/spec/model/v0_3/raw_nodes.py b/bioimageio/spec/model/v0_3/raw_nodes.py index 864c9f654..c487aebb1 100644 --- a/bioimageio/spec/model/v0_3/raw_nodes.py +++ b/bioimageio/spec/model/v0_3/raw_nodes.py @@ -11,8 +11,8 @@ from bioimageio.spec.shared.raw_nodes import ( Dependencies, ImplicitOutputShape, - ImportableModule, - ImportableSourceFile, + CallableFromModule, + CallableFromSourceFile, ParametrizedInputShape, RawNode, URI, @@ -136,7 +136,7 @@ class TensorflowSavedModelBundleWeightsEntry(_WeightsEntryBase): TensorflowSavedModelBundleWeightsEntry, ] -ImportableSource = Union[ImportableSourceFile, ImportableModule] +CallableSource = Union[CallableFromSourceFile, CallableFromModule] @dataclass @@ -168,7 +168,7 @@ class Model(RDF): timestamp: datetime = missing type: Literal["model"] = missing - source: Union[_Missing, ImportableSource] = missing + source: Union[_Missing, CallableSource] = missing test_inputs: List[Union[URI, Path]] = missing test_outputs: List[Union[URI, Path]] = missing weights: Dict[WeightsFormat, WeightsEntry] = missing diff --git a/bioimageio/spec/model/v0_3/schema.py b/bioimageio/spec/model/v0_3/schema.py index e3c6c7ead..87bdf8545 100644 --- a/bioimageio/spec/model/v0_3/schema.py +++ b/bioimageio/spec/model/v0_3/schema.py @@ -600,7 +600,7 @@ class Meta: + " This field is only required if the field source is present.", ) - source = fields.ImportableSource( + source = fields.CallableSource( bioimageio_maybe_required=True, bioimageio_description="Language and framework specific implementation. As some weights contain the model " "architecture, the source is optional depending on the present weight formats. `source` can either point to a " diff --git a/bioimageio/spec/model/v0_4/raw_nodes.py b/bioimageio/spec/model/v0_4/raw_nodes.py index 87ae59810..db3474ba1 100644 --- a/bioimageio/spec/model/v0_4/raw_nodes.py +++ b/bioimageio/spec/model/v0_4/raw_nodes.py @@ -26,8 +26,8 @@ from bioimageio.spec.shared.raw_nodes import ( Dependencies, ImplicitOutputShape, - ImportableModule, - ImportableSourceFile, + CallableFromModule, + CallableFromSourceFile, ParametrizedInputShape, RawNode, URI, @@ -54,7 +54,7 @@ "pytorch_state_dict", "torchscript", "keras_hdf5", "tensorflow_js", "tensorflow_saved_model_bundle", "onnx" ] -ImportableSource = Union[ImportableSourceFile, ImportableModule] +CallableSource = Union[CallableFromSourceFile, CallableFromModule] @dataclass @@ -75,7 +75,7 @@ class OnnxWeightsEntry(_WeightsEntryBase, OnnxWeightsEntry03): @dataclass class PytorchStateDictWeightsEntry(_WeightsEntryBase): weights_format_name = "Pytorch State Dict" - architecture: ImportableSource = missing + architecture: CallableSource = missing architecture_sha256: Union[_Missing, str] = missing kwargs: Union[_Missing, Dict[str, Any]] = missing pytorch_version: Union[_Missing, packaging.version.Version] = missing diff --git a/bioimageio/spec/model/v0_4/schema.py b/bioimageio/spec/model/v0_4/schema.py index aee8dc357..2afd5e649 100644 --- a/bioimageio/spec/model/v0_4/schema.py +++ b/bioimageio/spec/model/v0_4/schema.py @@ -203,7 +203,7 @@ class OnnxWeightsEntry(OnnxWeightsEntry03, _WeightsEntryBase): class PytorchStateDictWeightsEntry(_WeightsEntryBase): bioimageio_description = "PyTorch state dictionary weights format" weights_format = fields.String(validate=field_validators.Equal("pytorch_state_dict"), required=True, load_only=True) - architecture = fields.ImportableSource( + architecture = fields.CallableSource( required=True, bioimageio_description="Source code of the model architecture that either points to a " "local implementation: `:` or the " @@ -225,9 +225,9 @@ class PytorchStateDictWeightsEntry(_WeightsEntryBase): @validates_schema def sha_for_source_code_file(self, data, **kwargs): arch = data.get("architecture") - if isinstance(arch, raw_nodes.ImportableModule): + if isinstance(arch, raw_nodes.CallableFromModule): return - elif isinstance(arch, raw_nodes.ImportableSourceFile): + elif isinstance(arch, raw_nodes.CallableFromSourceFile): sha = data.get("architecture_sha256") if sha is None: raise ValidationError( diff --git a/bioimageio/spec/shared/_resolve_source.py b/bioimageio/spec/shared/_resolve_source.py index a03e9ab13..56a0a8657 100644 --- a/bioimageio/spec/shared/_resolve_source.py +++ b/bioimageio/spec/shared/_resolve_source.py @@ -290,25 +290,25 @@ def _resolve_source_path( @resolve_source.register -def _resolve_source_resolved_importable_path( - source: raw_nodes.ResolvedImportableSourceFile, +def _resolve_source_resolved_callable_path( + source: raw_nodes.ResolvedCallableFromSourceFile, root_path: typing.Union[os.PathLike, URI] = pathlib.Path(), output: typing.Optional[os.PathLike] = None, pbar=None, -) -> raw_nodes.ResolvedImportableSourceFile: - return raw_nodes.ResolvedImportableSourceFile( +) -> raw_nodes.ResolvedCallableFromSourceFile: + return raw_nodes.ResolvedCallableFromSourceFile( callable_name=source.callable_name, source_file=resolve_source(source.source_file, root_path, output, pbar) ) @resolve_source.register -def _resolve_source_importable_path( - source: raw_nodes.ImportableSourceFile, +def _resolve_source_callable_path( + source: raw_nodes.CallableFromSourceFile, root_path: typing.Union[os.PathLike, URI] = pathlib.Path(), output: typing.Optional[os.PathLike] = None, pbar=None, -) -> raw_nodes.ResolvedImportableSourceFile: - return raw_nodes.ResolvedImportableSourceFile( +) -> raw_nodes.ResolvedCallableFromSourceFile: + return raw_nodes.ResolvedCallableFromSourceFile( callable_name=source.callable_name, source_file=resolve_source(source.source_file, root_path, output, pbar) ) diff --git a/bioimageio/spec/shared/fields.py b/bioimageio/spec/shared/fields.py index 3ef1e5a02..2d409fa66 100644 --- a/bioimageio/spec/shared/fields.py +++ b/bioimageio/spec/shared/fields.py @@ -323,7 +323,7 @@ def __init__(self, **super_kwargs): super().__init__(Integer(), **super_kwargs) -class ImportableSource(String): +class CallableSource(String): @staticmethod def _is_import(path): return ":" not in path @@ -342,15 +342,15 @@ def _deserialize(self, *args, **kwargs) -> typing.Any: if not module_name: raise ValidationError( - f"Missing module name in importable source: {source_str}. Is it just missing a dot?" + f"Missing module name in callable source: {source_str}." ) if not object_name: raise ValidationError( - f"Missing object/callable name in importable source: {source_str}. Is it just missing a dot?" + f"Missing object/callable name in callable source: {source_str}. Is it just missing a dot?" ) - return raw_nodes.ImportableModule(callable_name=object_name, module_name=module_name) + return raw_nodes.CallableFromModule(callable_name=object_name, module_name=module_name) elif self._is_filepath(source_str): *module_uri_parts, object_name = source_str.split(":") @@ -369,7 +369,7 @@ def _deserialize(self, *args, **kwargs) -> typing.Any: ), ] ) - return raw_nodes.ImportableSourceFile( + return raw_nodes.CallableFromSourceFile( callable_name=object_name, source_file=source_file_field.deserialize(module_uri) ) else: @@ -378,9 +378,9 @@ def _deserialize(self, *args, **kwargs) -> typing.Any: def _serialize(self, value, attr, obj, **kwargs) -> typing.Optional[str]: if value is None: return None - elif isinstance(value, raw_nodes.ImportableModule): + elif isinstance(value, raw_nodes.CallableFromModule): return f"{value.module_name}.{value.callable_name}" - elif isinstance(value, raw_nodes.ImportableSourceFile): + elif isinstance(value, raw_nodes.CallableFromSourceFile): return f"{value.source_file}:{value.callable_name}" else: raise TypeError(f"{value} has unexpected type {type(value)}") diff --git a/bioimageio/spec/shared/node_transformer.py b/bioimageio/spec/shared/node_transformer.py index ef10554d3..4f54a2d8e 100644 --- a/bioimageio/spec/shared/node_transformer.py +++ b/bioimageio/spec/shared/node_transformer.py @@ -235,14 +235,14 @@ def __init__(self, *, root: typing.Union[os.PathLike, URI]): else: self.root = pathlib.Path(root).resolve() - def transform_ImportableSourceFile( - self, node: raw_nodes.ImportableSourceFile, **kwargs - ) -> raw_nodes.ImportableSourceFile: + def transform_CallableFromSourceFile( + self, node: raw_nodes.CallableFromSourceFile, **kwargs + ) -> raw_nodes.CallableFromSourceFile: if isinstance(node.source_file, pathlib.Path) and node.source_file.is_absolute(): if not isinstance(self.root, pathlib.Path): raise TypeError(f"Cannot convert absolute path '{node.source_file}' with URI root '{self.root}'") sf = node.source_file.relative_to(self.root) - return raw_nodes.ImportableSourceFile(source_file=sf, callable_name=node.callable_name) + return raw_nodes.CallableFromSourceFile(source_file=sf, callable_name=node.callable_name) else: return node @@ -296,21 +296,21 @@ def transform_PosixPath(self, leaf: pathlib.PosixPath, **kwargs) -> typing.Union def transform_WindowsPath(self, leaf: pathlib.WindowsPath, **kwargs) -> typing.Union[URI, pathlib.Path]: return self._transform_Path(leaf) - def transform_ImportableSourceFile( - self, node: raw_nodes.ImportableSourceFile, **kwargs - ) -> raw_nodes.ImportableSourceFile: + def transform_CallableFromSourceFile( + self, node: raw_nodes.CallableFromSourceFile, **kwargs + ) -> raw_nodes.CallableFromSourceFile: if isinstance(node.source_file, URI): return node elif isinstance(node.source_file, pathlib.Path): if node.source_file.is_absolute(): return node else: - return raw_nodes.ImportableSourceFile( + return raw_nodes.CallableFromSourceFile( source_file=self.root / node.source_file, callable_name=node.callable_name ) else: raise TypeError( - f"Unexpected type '{type(node.source_file)}' for raw_nodes.ImportableSourceFile.source_file '{node.source_file}'" + f"Unexpected type '{type(node.source_file)}' for raw_nodes.CallableFromSourceFile.source_file '{node.source_file}'" ) @@ -333,13 +333,13 @@ def transform_URI( local_path = _resolve_source(node, root_path=self.root) return local_path - def transform_ImportableSourceFile( - self, node: raw_nodes.ImportableSourceFile, **kwargs - ) -> raw_nodes.ResolvedImportableSourceFile: - return raw_nodes.ResolvedImportableSourceFile( + def transform_CallableFromSourceFile( + self, node: raw_nodes.CallableFromSourceFile, **kwargs + ) -> raw_nodes.ResolvedCallableFromSourceFile: + return raw_nodes.ResolvedCallableFromSourceFile( source_file=_resolve_source(node.source_file, self.root), callable_name=node.callable_name ) - def transform_ImportableModule(self, node: raw_nodes.ImportableModule, **kwargs) -> raw_nodes.LocalImportableModule: + def transform_CallableFromModule(self, node: raw_nodes.CallableFromModule, **kwargs) -> raw_nodes.LocalCallableFromModule: r = self.root if isinstance(self.root, pathlib.Path) else pathlib.Path() - return raw_nodes.LocalImportableModule(**dataclasses.asdict(node), root_path=r) + return raw_nodes.LocalCallableFromModule(**dataclasses.asdict(node), root_path=r) diff --git a/bioimageio/spec/shared/raw_nodes.py b/bioimageio/spec/shared/raw_nodes.py index f6b708e61..39882b8fc 100644 --- a/bioimageio/spec/shared/raw_nodes.py +++ b/bioimageio/spec/shared/raw_nodes.py @@ -164,7 +164,7 @@ def __len__(self): @dataclass -class ImportableModule(RawNode): +class CallableFromModule(RawNode): module_name: str = missing callable_name: str = missing @@ -173,8 +173,8 @@ def __str__(self): @dataclass -class LocalImportableModule(ImportableModule): - """intermediate between raw_nodes.ImportableModule and core.resource_io.nodes.ImportedSource. +class LocalCallableFromModule(CallableFromModule): + """intermediate between raw_nodes.CallableFromModule and core.resource_io.nodes.ImportedCallable. Used by SourceNodeTransformer """ @@ -183,7 +183,7 @@ class LocalImportableModule(ImportableModule): @dataclass -class ImportableSourceFile(RawNode): +class CallableFromSourceFile(RawNode): _include_in_package = ("source_file",) callable_name: str = missing @@ -194,8 +194,8 @@ def __str__(self): @dataclass -class ResolvedImportableSourceFile(ImportableSourceFile): - """intermediate between raw_nodes.ImportableSourceFile and core.resource_io.nodes.ImportedSource. +class ResolvedCallableFromSourceFile(CallableFromSourceFile): + """intermediate between raw_nodes.CallableFromSourceFile and core.resource_io.nodes.ImportedCallable. Used by SourceNodeTransformer """ @@ -203,4 +203,4 @@ class ResolvedImportableSourceFile(ImportableSourceFile): source_file: pathlib.Path = missing -ImportableSource = Union[ImportableModule, ImportableSourceFile, ResolvedImportableSourceFile, LocalImportableModule] +CallableSource = Union[CallableFromModule, CallableFromSourceFile, ResolvedCallableFromSourceFile, LocalCallableFromModule] diff --git a/tests/test_raw_nodes_match_schema.py b/tests/test_raw_nodes_match_schema.py index 0c75265b9..5bd3387a6 100644 --- a/tests/test_raw_nodes_match_schema.py +++ b/tests/test_raw_nodes_match_schema.py @@ -35,9 +35,9 @@ def test_model_spec(schema_raw_nodes_pair): } assert field_names # did we get any? node_names -= field_names - # if present, ignore raw_nodes.ImportableModule and raw_nodes.ImportableSourceFile which are coming from - # fields.ImportableSource - node_names -= {n for n in {"ImportableModule", "ImportableSourceFile"} if hasattr(raw_nodes, n)} + # if present, ignore raw_nodes.CallableFromModule and raw_nodes.CallableFromSourceFile which are coming from + # fields.CallableSource + node_names -= {n for n in {"CallableFromModule", "CallableFromSourceFile"} if hasattr(raw_nodes, n)} assert node_names # did we get any? From ea1b82658a27370af050255e177570cad4af6208 Mon Sep 17 00:00:00 2001 From: fynnbe Date: Thu, 24 Nov 2022 10:16:55 +0100 Subject: [PATCH 24/40] black --- bioimageio/spec/shared/fields.py | 4 +--- bioimageio/spec/shared/node_transformer.py | 4 +++- bioimageio/spec/shared/raw_nodes.py | 4 +++- 3 files changed, 7 insertions(+), 5 deletions(-) diff --git a/bioimageio/spec/shared/fields.py b/bioimageio/spec/shared/fields.py index 2d409fa66..7f07e6981 100644 --- a/bioimageio/spec/shared/fields.py +++ b/bioimageio/spec/shared/fields.py @@ -341,9 +341,7 @@ def _deserialize(self, *args, **kwargs) -> typing.Any: object_name = source_str[last_dot_idx + 1 :] if not module_name: - raise ValidationError( - f"Missing module name in callable source: {source_str}." - ) + raise ValidationError(f"Missing module name in callable source: {source_str}.") if not object_name: raise ValidationError( diff --git a/bioimageio/spec/shared/node_transformer.py b/bioimageio/spec/shared/node_transformer.py index 4f54a2d8e..b93cbbce8 100644 --- a/bioimageio/spec/shared/node_transformer.py +++ b/bioimageio/spec/shared/node_transformer.py @@ -340,6 +340,8 @@ def transform_CallableFromSourceFile( source_file=_resolve_source(node.source_file, self.root), callable_name=node.callable_name ) - def transform_CallableFromModule(self, node: raw_nodes.CallableFromModule, **kwargs) -> raw_nodes.LocalCallableFromModule: + def transform_CallableFromModule( + self, node: raw_nodes.CallableFromModule, **kwargs + ) -> raw_nodes.LocalCallableFromModule: r = self.root if isinstance(self.root, pathlib.Path) else pathlib.Path() return raw_nodes.LocalCallableFromModule(**dataclasses.asdict(node), root_path=r) diff --git a/bioimageio/spec/shared/raw_nodes.py b/bioimageio/spec/shared/raw_nodes.py index 39882b8fc..e7224f6e6 100644 --- a/bioimageio/spec/shared/raw_nodes.py +++ b/bioimageio/spec/shared/raw_nodes.py @@ -203,4 +203,6 @@ class ResolvedCallableFromSourceFile(CallableFromSourceFile): source_file: pathlib.Path = missing -CallableSource = Union[CallableFromModule, CallableFromSourceFile, ResolvedCallableFromSourceFile, LocalCallableFromModule] +CallableSource = Union[ + CallableFromModule, CallableFromSourceFile, ResolvedCallableFromSourceFile, LocalCallableFromModule +] From 283da9a836bb9420818de48d2d206c24ee0eb984 Mon Sep 17 00:00:00 2001 From: fynnbe Date: Thu, 24 Nov 2022 10:24:25 +0100 Subject: [PATCH 25/40] update changelog --- README.md | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/README.md b/README.md index b7823c00e..45f53d45e 100644 --- a/README.md +++ b/README.md @@ -149,6 +149,12 @@ As a dependency it is included in [bioimageio.core](https://github.com/bioimage- ## Changelog #### bioimageio.spec tbd - make pre-/postprocessing kwargs `mode` and `axes` always optional for model RDF 0.3 and 0.4 +- rename + - `ImportableSource`→`CallableSource` + - `ImportableModule`→`CallableFromModule` + - `ImportableSourceFile`→`CallableFromSourceFile` + - `ResolvedImportableSourceFile`→`ResolvedCallableFromSourceFile` + - `LocalImportableModule`→`LocalCallableFromModule` #### bioimageio.spec 0.4.8post1 - add `axes` and `eps` to `scale_mean_var` From 73b31b9e2eafe985c810f705eedfaea89ddc4020 Mon Sep 17 00:00:00 2001 From: fynnbe Date: Thu, 24 Nov 2022 11:05:24 +0100 Subject: [PATCH 26/40] remove steps from workflow spec --- bioimageio/spec/workflow/v0_2/raw_nodes.py | 7 ++- bioimageio/spec/workflow/v0_2/schema.py | 60 ---------------------- 2 files changed, 3 insertions(+), 64 deletions(-) diff --git a/bioimageio/spec/workflow/v0_2/raw_nodes.py b/bioimageio/spec/workflow/v0_2/raw_nodes.py index f68d839eb..967562d28 100644 --- a/bioimageio/spec/workflow/v0_2/raw_nodes.py +++ b/bioimageio/spec/workflow/v0_2/raw_nodes.py @@ -5,7 +5,7 @@ RDF <--schema--> raw nodes """ from dataclasses import dataclass -from typing import Any, Dict, List, Union +from typing import List, Union from marshmallow import missing from marshmallow.utils import _Missing @@ -20,6 +20,7 @@ FormatVersion = FormatVersion DefaultType = Union[int, float, str, bool, list, dict, None] +ParameterType = Literal["tensor", "int", "float", "string", "boolean", "list", "dict", "any"] TYPE_NAME_MAP = {int: "int", float: "float", str: "string", bool: "boolean", list: "list", dict: "dict", None: "null"} # unit names from https://ngff.openmicroscopy.org/latest/#axes-md @@ -132,7 +133,7 @@ class TimeAxis(Axis): @dataclass class ParameterSpec(RawNode): name: str = missing - type: str = missing + type: ParameterType = missing description: Union[_Missing, str] = missing axes: Union[_Missing, List[Axis]] = missing @@ -159,5 +160,3 @@ class Workflow(_RDF): inputs_spec: List[InputSpec] = missing options_spec: List[OptionSpec] = missing outputs_spec: List[OutputSpec] = missing - steps: List[Step] = missing - test_steps: List[Step] = missing diff --git a/bioimageio/spec/workflow/v0_2/schema.py b/bioimageio/spec/workflow/v0_2/schema.py index 380faab08..4f111f48d 100644 --- a/bioimageio/spec/workflow/v0_2/schema.py +++ b/bioimageio/spec/workflow/v0_2/schema.py @@ -262,63 +262,3 @@ def get_initial_reference_names(data) -> typing.Set[str]: refs.add(f"${{{{ self.options.{opt.name} }}}}") return refs - - steps = fields.List( - fields.Nested(Step()), - validate=field_validators.Length(min=1), - required=True, - bioimageio_description="Workflow steps---a series of operators---to be executed consecutively.", - ) - - test_steps = fields.List( - fields.Nested(Step()), - validate=field_validators.Length(min=1), - required=True, - bioimageio_description="Test steps to be executed consecutively.", - ) - - @staticmethod - def unique_step_ids_impl(steps: typing.List[raw_nodes.Step], field_name: str): - if not steps or not isinstance(steps, list) or not all(isinstance(s, raw_nodes.Step) for s in steps): - raise ValidationError("Could not check for unique step ids due to other validation errors.", field_name) - - ids = set() - for step in steps: - if step.id is missing: - continue - if step.id in ids: - raise ValidationError(f"Duplicated step id {step.id}", field_name) - - ids.add(step.id) - - @validates("steps") - def unique_step_ids(self, value): - self.unique_step_ids_impl(value, "steps") - - @validates("test_steps") - def unique_test_step_ids(self, value): - self.unique_step_ids_impl(value, "test_steps") - - @validates_schema - def step_inputs_and_options_are_valid(self, data, **kwargs): - for step_type in ["steps", "test_steps"]: - steps = data.get(step_type) - if not steps or not isinstance(steps, list) or not isinstance(steps[0], raw_nodes.Step): - raise ValidationError(f"Missing/invalid '{step_type}'") - - references = self.get_initial_reference_names(data) - for step in steps: - if isinstance(step.inputs, list): - for si in step.inputs: - if isinstance(si, str) and si.startswith("${{") and si.endswith("}}") and si not in references: - raise ValidationError(f"Invalid reference '{si}'") - - if step.options: - for k, v in step.options.items(): - if isinstance(v, str) and v.startswith("${{") and v.endswith("}}") and v not in references: - raise ValidationError( - f"Invalid {step_type[:-1].replace('_', ' ')} option ({k}) referencing '{v}'" - ) - - if step.outputs: - references.update({f"${{{{ {step.id}.outputs.{out} }}}}" for out in step.outputs}) From 9c4a81dd291e89948db5993c8fbdeaff05507224 Mon Sep 17 00:00:00 2001 From: fynnbe Date: Thu, 24 Nov 2022 11:06:00 +0100 Subject: [PATCH 27/40] split up CallableSource field --- bioimageio/spec/shared/fields.py | 33 ++++++++++++++++++++++++-------- 1 file changed, 25 insertions(+), 8 deletions(-) diff --git a/bioimageio/spec/shared/fields.py b/bioimageio/spec/shared/fields.py index 7f07e6981..293be4c63 100644 --- a/bioimageio/spec/shared/fields.py +++ b/bioimageio/spec/shared/fields.py @@ -323,15 +323,11 @@ def __init__(self, **super_kwargs): super().__init__(Integer(), **super_kwargs) -class CallableSource(String): +class CallableFromModule(String): @staticmethod def _is_import(path): return ":" not in path - @staticmethod - def _is_filepath(path): - return ":" in path - def _deserialize(self, *args, **kwargs) -> typing.Any: source_str: str = super()._deserialize(*args, **kwargs) if self._is_import(source_str): @@ -349,8 +345,26 @@ def _deserialize(self, *args, **kwargs) -> typing.Any: ) return raw_nodes.CallableFromModule(callable_name=object_name, module_name=module_name) + else: + raise ValidationError(source_str) - elif self._is_filepath(source_str): + def _serialize(self, value, attr, obj, **kwargs) -> typing.Optional[str]: + if value is None: + return None + elif isinstance(value, raw_nodes.CallableFromModule): + return f"{value.module_name}.{value.callable_name}" + else: + raise TypeError(f"{value} has unexpected type {type(value)}") + + +class CallableFromSourceFile(String): + @staticmethod + def _is_filepath(path): + return ":" in path + + def _deserialize(self, *args, **kwargs) -> typing.Any: + source_str: str = super()._deserialize(*args, **kwargs) + if self._is_filepath(source_str): *module_uri_parts, object_name = source_str.split(":") module_uri = ":".join(module_uri_parts).strip(":") @@ -376,14 +390,17 @@ def _deserialize(self, *args, **kwargs) -> typing.Any: def _serialize(self, value, attr, obj, **kwargs) -> typing.Optional[str]: if value is None: return None - elif isinstance(value, raw_nodes.CallableFromModule): - return f"{value.module_name}.{value.callable_name}" elif isinstance(value, raw_nodes.CallableFromSourceFile): return f"{value.source_file}:{value.callable_name}" else: raise TypeError(f"{value} has unexpected type {type(value)}") +class CallableSource(Union): + def __init__(self, **kwargs): + super().__init__([CallableFromModule(), CallableFromSourceFile()], **kwargs) + + class Kwargs(Dict): def __init__( self, From 91e6783c5fb484aabe9252c47d8442ad36e0456b Mon Sep 17 00:00:00 2001 From: fynnbe Date: Thu, 24 Nov 2022 23:06:33 +0100 Subject: [PATCH 28/40] set format_version as default applies only for creating an RDF raw node in code --- bioimageio/spec/rdf/v0_2/raw_nodes.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/bioimageio/spec/rdf/v0_2/raw_nodes.py b/bioimageio/spec/rdf/v0_2/raw_nodes.py index ac1fbd51f..5cc6c2964 100644 --- a/bioimageio/spec/rdf/v0_2/raw_nodes.py +++ b/bioimageio/spec/rdf/v0_2/raw_nodes.py @@ -165,4 +165,7 @@ def __post_init__(self): if self.type is missing: self.type = self.__class__.__name__.lower() + if self.format_version is missing: + self.format_version = get_args(FormatVersion)[-1] + super().__post_init__() From 8bdb9c94c3bcd76c5c95f6154c68e0f532b57672 Mon Sep 17 00:00:00 2001 From: fynnbe Date: Thu, 24 Nov 2022 23:07:23 +0100 Subject: [PATCH 29/40] prohibit serializing a list from a string --- bioimageio/spec/shared/fields.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/bioimageio/spec/shared/fields.py b/bioimageio/spec/shared/fields.py index 293be4c63..3157a3aa1 100644 --- a/bioimageio/spec/shared/fields.py +++ b/bioimageio/spec/shared/fields.py @@ -179,6 +179,12 @@ def __init__(self, instance: DocumentedField, *super_args, **super_kwargs): super().__init__(instance, *super_args, **super_kwargs) self.type_name += f"\\[{self.inner.type_name}\\]" # add type of list elements + def _serialize(self, value, attr, obj, **kwargs) -> typing.Optional[typing.List[typing.Any]]: + if isinstance(value, str): + raise TypeError("Avoiding bugs by prohibiting to serialize a list from a string.") + + return super()._serialize(value, attr, obj, **kwargs) + class Number(DocumentedField, marshmallow_fields.Number): pass From a89bf0751c95e8526c6ffc6e9156c59524ab689c Mon Sep 17 00:00:00 2001 From: fynnbe Date: Thu, 24 Nov 2022 23:11:24 +0100 Subject: [PATCH 30/40] remove specialized axes classes --- bioimageio/spec/workflow/v0_2/raw_nodes.py | 68 ++----- bioimageio/spec/workflow/v0_2/schema.py | 211 +++++++++++---------- 2 files changed, 129 insertions(+), 150 deletions(-) diff --git a/bioimageio/spec/workflow/v0_2/raw_nodes.py b/bioimageio/spec/workflow/v0_2/raw_nodes.py index 967562d28..b0d9f1db6 100644 --- a/bioimageio/spec/workflow/v0_2/raw_nodes.py +++ b/bioimageio/spec/workflow/v0_2/raw_nodes.py @@ -19,9 +19,19 @@ from typing_extensions import Literal, get_args # type: ignore FormatVersion = FormatVersion -DefaultType = Union[int, float, str, bool, list, dict, None] ParameterType = Literal["tensor", "int", "float", "string", "boolean", "list", "dict", "any"] -TYPE_NAME_MAP = {int: "int", float: "float", str: "string", bool: "boolean", list: "list", dict: "dict", None: "null"} + +DefaultType = Union[int, float, str, bool, list, dict, None] +# mapping of types of possible default values +DEFAULT_TYPE_NAME_MAP = { + int: "int", + float: "float", + str: "string", + bool: "boolean", + list: "list", + dict: "dict", + None: "null", +} # unit names from https://ngff.openmicroscopy.org/latest/#axes-md SpaceUnit = Literal[ @@ -82,6 +92,7 @@ # this Axis definition is compatible with the NGFF draft from October 24, 2022 # https://ngff.openmicroscopy.org/latest/#axes-md AxisType = Literal["batch", "channel", "index", "time", "space"] +ArbitraryAxes = Literal["arbitrary"] @dataclass @@ -94,62 +105,25 @@ class Axis: @dataclass -class BatchAxis(Axis): - type: Literal["batch"] = "batch" - name: _Missing = missing - description: _Missing = missing - unit: _Missing = missing - step: _Missing = missing - - -@dataclass -class ChannelAxis(Axis): - type: Literal["channel"] = "channel" - step: _Missing = missing - - -@dataclass -class IndexAxis(Axis): - type: Literal["index"] = "index" - name: Union[_Missing, str] = missing - unit: Union[_Missing, str] = missing - step: _Missing = missing - - -@dataclass -class SpaceAxis(Axis): - type: Literal["space"] = "space" - name: Literal["x", "y", "z"] = missing - unit: Union[_Missing, str, SpaceUnit] = missing - - -@dataclass -class TimeAxis(Axis): - type: Literal["time"] = "time" - name: Union[_Missing, str] = missing - unit: Union[_Missing, str, TimeUnit] = missing - - -@dataclass -class ParameterSpec(RawNode): +class Parameter(RawNode): name: str = missing type: ParameterType = missing description: Union[_Missing, str] = missing - axes: Union[_Missing, List[Axis]] = missing + axes: Union[_Missing, List[Axis], ArbitraryAxes] = missing @dataclass -class InputSpec(ParameterSpec): +class Input(Parameter): pass @dataclass -class OptionSpec(ParameterSpec): +class Option(Parameter): default: Union[_Missing, DefaultType] = missing @dataclass -class OutputSpec(ParameterSpec): +class Output(Parameter): pass @@ -157,6 +131,6 @@ class OutputSpec(ParameterSpec): class Workflow(_RDF): type: Literal["workflow"] = missing - inputs_spec: List[InputSpec] = missing - options_spec: List[OptionSpec] = missing - outputs_spec: List[OutputSpec] = missing + inputs: List[Input] = missing + options: List[Option] = missing + outputs: List[Output] = missing diff --git a/bioimageio/spec/workflow/v0_2/schema.py b/bioimageio/spec/workflow/v0_2/schema.py index 4f111f48d..e26623873 100644 --- a/bioimageio/spec/workflow/v0_2/schema.py +++ b/bioimageio/spec/workflow/v0_2/schema.py @@ -1,6 +1,6 @@ import typing -from marshmallow import ValidationError, missing, validates, validates_schema +from marshmallow import ValidationError, validates, validates_schema from marshmallow.exceptions import SCHEMA from bioimageio.spec.rdf.v0_2.schema import RDF @@ -19,10 +19,24 @@ class _BioImageIOSchema(SharedBioImageIOSchema): class Axis(_BioImageIOSchema): - name = fields.String( - required=True, - bioimageio_description="A unique axis name (max 32 characters).", - validate=field_validators.Length(min=1, max=32), + # name = fields.String( + # required=True, + # bioimageio_description="A unique axis name (max 32 characters).", + # validate=field_validators.Length(min=1, max=32), + # ) + name = fields.Union( + [ + fields.String( + validate=(field_validators.Length(min=1, max=32), field_validators.ContainsNoneOf([","])), + bioimageio_description="Axis name. Indexed for channel axis, e.g. RGB -> RGB[0],RGB[1],RGB[2]", + ), + fields.List( + fields.String(validate=field_validators.Length(min=1, max=32)), + bioimageio_description="For channel axis only: Name per channel, e.g. [red, green, blue]", + ), + ], + bioimageio_maybe_required=True, + bioimageio_description="A unique axis name (max 32 characters)", ) type = fields.String( required=True, @@ -32,82 +46,77 @@ class Axis(_BioImageIOSchema): description = fields.String( validate=field_validators.Length(min=1, max=128), bioimageio_description="Description of axis (max 128 characters).", + bioimageio_maybe_required=True, ) - unit = fields.String(bioimageio_description="Physical unit of this axis.", bioimageio_maybe_required=True) - # Recommendations:\n\n for type: 'space' one of:\n\n\t{get_args(raw_nodes.SpaceUnit)}\n\n for type: 'time' one of:\n\n\t{get_args(raw_nodes.TimeUnit)}") - step = fields.Integer( - bioimageio_description="One 'pixel' along this axis corresponds to 'step'+'unit'. If specified 'unit' is mandatory." - ) - - @validates_schema - def step_has_unit(self, data, **kwargs): - if "step" in data and not "unit" in data: - raise ValidationError("Missing 'unit' for specified 'step'.", "unit") - - -class BatchAxis(Axis): - class Meta: - exclude = ("name", "description", "unit", "step") - - type = fields.String(required=True, validate=field_validators.Equal("batch"), bioimageio_description="'batch'") - - -class ChannelAxis(Axis): - class Meta: - exclude = ("step",) - - type = fields.String(required=True, validate=field_validators.Equal("channel"), bioimageio_description="'channel'") - name = fields.Union( + unit = fields.Union( [ - fields.List(fields.String(validate=field_validators.Length(min=1, max=32))), - fields.String(validate=field_validators.Length(min=1, max=32)), + fields.String(validate=(field_validators.Length(min=1, max=32), field_validators.ContainsNoneOf([","]))), + fields.List( + fields.String(validate=field_validators.Length(min=1, max=32)), + bioimageio_description="For channel axis only: unit of data values (max 32 characters; per channel if list).", + ), ], - required=True, - bioimageio_description="A unique axis name (max 32 characters; per channel if list).", + bioimageio_description="Physical unit of this axis (max 32 characters).", + bioimageio_maybe_required=True, ) - unit = fields.Union( + # unit = fields.String(bioimageio_description="Physical unit of this axis.", bioimageio_maybe_required=True) + # Recommendations:\n\n for type: 'space' one of:\n\n\t{get_args(raw_nodes.SpaceUnit)}\n\n for type: 'time' one of:\n\n\t{get_args(raw_nodes.TimeUnit)}") + step = fields.Float( + bioimageio_description="One 'pixel' along this axis corresponds to 'step' 'unit'. (Invalid for channel axis.)" + ) + scaling_factor = fields.Union( [ - fields.List(fields.String(validate=field_validators.Length(min=1, max=32))), - fields.String(validate=field_validators.Length(min=1, max=32)), + fields.Float( + validate=field_validators.Range(min=0, min_inclusive=False), + bioimageio_description="Scaling factor for all channels.", + ), + fields.List( + fields.Float(validate=field_validators.Range(min=0, min_inclusive=False)), + bioimageio_description="Scaling factor per channel.", + ), ], - required=False, - bioimageio_description="Physical unit of data values (max 32 characters; per channel if list).", + bioimageio_description="For channel axis only: Scaling factor (per channel). Values are given in 'scaling_factor' 'unit'.", ) + @validates_schema + def step_has_unit(self, data, **kwargs): + if "step" in data and not "unit" in data: + raise ValidationError("Missing 'unit' for specified 'step'.", "unit") -class IndexAxis(Axis): - class Meta: - exclude = ("step", "unit") - - type = fields.String(required=True, validate=field_validators.Equal("index"), bioimageio_description="'index'") - - -class SpaceAxis(Axis): - name = fields.String( - validate=field_validators.OneOf(["x", "y", "z"]), - required=True, - bioimageio_description="One of: ['x', 'y', 'z'].", - ) - type = fields.String(required=True, validate=field_validators.Equal("space"), bioimageio_description="'space'") - - @validates("unit") - def recommend_unit(self, value: str): - recommended_units = get_args(raw_nodes.SpaceUnit) - if not value in recommended_units: - self.warn("unit", f"unknown space unit {value}. Recommend units are: {recommended_units}") + @validates_schema + def validate_type_specifics(self, data, **kwargs): + type_ = data.get("type") + unit = data.get("unit") + for invalid in dict( + batch=["scaling_factor", "step", "unit", "description", "name"], + channel=["step"], + index=["unit", "step", "scaling_factor"], + space=["scaling_factor"], + time=["scaling_factor"], + ).get(type_, []): + if invalid in data: + raise ValidationError(f"'{invalid}' invalid for {type_} axis") + + if type_ != "channel" and isinstance(data.get("name"), list): + raise ValidationError( + f"A list of names is only valid for axis type channel, not axis type {data.get('type')}." + ) + if type_ == "space": + if data.get("name") not in "xyz": + raise ValidationError("For a space axis only the names 'x', 'y', or 'z' are allowed.") -class TimeAxis(Axis): - type = fields.String(required=True, validate=field_validators.Equal("time"), bioimageio_description="'time'") + recommended_units = get_args(raw_nodes.SpaceUnit) + if unit not in recommended_units: + self.warn("unit", f"unknown unit '{unit}' for space axis. Recommend units are: {recommended_units}.") - @validates("unit") - def recommend_unit(self, value: str): - recommended_units = get_args(raw_nodes.TimeUnit) - if not value in recommended_units: - self.warn("unit", f"unknown time unit {value}. Recommend units are: {recommended_units}") + if type_ == "time": + recommended_units = get_args(raw_nodes.TimeUnit) + if unit not in recommended_units: + self.warn("unit", f"unknown unit '{unit}' for time axis. Recommend units are: {recommended_units}.") -class ParameterSpec(_BioImageIOSchema): +class Parameter(_BioImageIOSchema): name = fields.String( required=True, bioimageio_description="Parameter name. No duplicates are allowed.", @@ -117,16 +126,14 @@ class ParameterSpec(_BioImageIOSchema): validate=field_validators.OneOf(get_args(raw_nodes.ParameterType)), bioimageio_description=f"One of: {get_args(raw_nodes.ParameterType)}", ) - axes = fields.List( - fields.Union( - [ - fields.Nested(BatchAxis()), - fields.Nested(ChannelAxis()), - fields.Nested(IndexAxis()), - fields.Nested(SpaceAxis()), - fields.Nested(TimeAxis()), - ] - ), + axes = fields.Union( + [ + fields.List(fields.Nested(Axis())), + fields.String( + bioimageio_description="Arbitrary combination of valid axis types.", + validate=field_validators.Equal(get_args(raw_nodes.ArbitraryAxes)[0]), + ), + ], required=False, bioimageio_maybe_required=True, bioimageio_description="Axis specifications (only required for type 'tensor').", @@ -143,11 +150,11 @@ def has_axes_if_tensor(self, data, **kwargs): raise ValidationError("'axes' required for input type 'tensor'.") -class InputSpec(ParameterSpec): +class Input(Parameter): pass -class OptionSpec(ParameterSpec): +class Option(Parameter): default = fields.Raw( required=True, bioimageio_description="Default value compatible with type given by `type` field." @@ -166,14 +173,14 @@ def default_has_compatible_type(self, data, **kwargs): return default_type = type(data["default"]) - type_name = raw_nodes.TYPE_NAME_MAP[default_type] + type_name = raw_nodes.DEFAULT_TYPE_NAME_MAP[default_type] if type_name != input_type_name: raise ValidationError( f"Default value of type {default_type} (type name: {type_name}) does not match type: {input_type_name}" ) -class OutputSpec(ParameterSpec): +class Output(Parameter): pass @@ -186,26 +193,26 @@ class Workflow(_BioImageIOSchema, RDF): _optional*_ with an asterisk indicates the field is optional depending on the value in another field. """ - inputs_spec = fields.List( - fields.Nested(InputSpec()), + inputs = fields.List( + fields.Nested(Input()), required=True, bioimageio_description="Describes the inputs expected by this workflow.", ) @staticmethod - def verify_param_list(params: typing.Any) -> typing.List[typing.Union[raw_nodes.ParameterSpec]]: - if not isinstance(params, list) or not all(isinstance(v, raw_nodes.ParameterSpec) for v in params): + def verify_param_list(params: typing.Any) -> typing.List[raw_nodes.Parameter]: + if not isinstance(params, list) or not all(isinstance(v, raw_nodes.Parameter) for v in params): raise ValidationError("Could not check for duplicate parameter names due to another validation error.") return params @staticmethod def check_for_duplicate_param_names( - params: typing.List[typing.Union[raw_nodes.ParameterSpec]], param_name: str, field_name=SCHEMA + params: typing.List[typing.Union[raw_nodes.Parameter]], param_name: str, field_name=SCHEMA ): names = set() for t in params: - if not isinstance(t, raw_nodes.ParameterSpec): + if not isinstance(t, raw_nodes.Parameter): raise ValidationError( f"Could not check for duplicate {param_name} name due to other validation errors." ) @@ -215,8 +222,8 @@ def check_for_duplicate_param_names( names.add(t.name) - options_spec = fields.List( - fields.Nested(OptionSpec()), + options = fields.List( + fields.Nested(Option()), required=True, bioimageio_description="Describes the options that may be given to this workflow.", ) @@ -225,40 +232,38 @@ def check_for_duplicate_param_names( def no_duplicate_input_and_option_names(self, data, **kwargs): if not isinstance(data, dict): return - ipts = data.get("inputs_spec", []) - opts = data.get("options_spec", []) + ipts = data.get("inputs", []) + opts = data.get("options", []) if isinstance(ipts, list) and isinstance(opts, list): - self.check_for_duplicate_param_names( - self.verify_param_list(ipts + opts), "input/option", "inputs_spec/options_spec" - ) + self.check_for_duplicate_param_names(self.verify_param_list(ipts + opts), "input/option", "inputs/options") - outputs_spec = fields.List( - fields.Nested(OutputSpec()), + outputs = fields.List( + fields.Nested(Output()), validate=field_validators.Length(min=1), bioimageio_description="Describes the outputs of this workflow.", ) - @validates("outputs_spec") - def no_duplicate_output_names(self, outs: typing.List[raw_nodes.OutputSpec]): - self.check_for_duplicate_param_names(self.verify_param_list(outs), "output_spec") + @validates("outputs") + def no_duplicate_output_names(self, outs: typing.List[raw_nodes.Output]): + self.check_for_duplicate_param_names(self.verify_param_list(outs), "outputs") @staticmethod def get_initial_reference_names(data) -> typing.Set[str]: refs = {"${{ self.rdf_source }}"} - inputs = data.get("inputs_spec") + inputs = data.get("inputs") if not isinstance(inputs, list): return refs for ipt in inputs: - if isinstance(ipt, raw_nodes.InputSpec): + if isinstance(ipt, raw_nodes.Input): refs.add(f"${{{{ self.inputs.{ipt.name} }}}}") - options = data.get("options_spec") + options = data.get("options") if not isinstance(options, list): return refs for opt in options: - if isinstance(opt, raw_nodes.OptionSpec): + if isinstance(opt, raw_nodes.Option): refs.add(f"${{{{ self.options.{opt.name} }}}}") return refs From 5670cbcf36e68cce060239572f0dffc6b0a84ad9 Mon Sep 17 00:00:00 2001 From: fynnbe Date: Thu, 24 Nov 2022 23:12:50 +0100 Subject: [PATCH 31/40] remove redundant brackets --- scripts/generate_weight_formats_overview.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/generate_weight_formats_overview.py b/scripts/generate_weight_formats_overview.py index e636e7cb0..259e1136b 100644 --- a/scripts/generate_weight_formats_overview.py +++ b/scripts/generate_weight_formats_overview.py @@ -23,7 +23,7 @@ def parse_args(): - p = ArgumentParser(description=("script that generates weights formats overview")) + p = ArgumentParser(description="script that generates weights formats overview") p.add_argument("command", choices=["check", "generate"]) args = p.parse_args() From 9b82e900ead2919cbc91cf1b4c27df10a7bd5368 Mon Sep 17 00:00:00 2001 From: fynnbe Date: Fri, 25 Nov 2022 10:16:22 +0100 Subject: [PATCH 32/40] update workflow tests --- example_specs/workflows/dummy/rdf.yaml | 55 ----------- example_specs/workflows/hello.yaml | 39 ++++++++ .../hpa/single_cell_classification.yaml | 91 ------------------ .../workflows/stardist/stardist_example.yaml | 94 ------------------- tests/conftest.py | 2 +- tests/test_workflow_rdf.py | 24 +---- 6 files changed, 42 insertions(+), 263 deletions(-) delete mode 100644 example_specs/workflows/dummy/rdf.yaml create mode 100644 example_specs/workflows/hello.yaml delete mode 100644 example_specs/workflows/hpa/single_cell_classification.yaml delete mode 100644 example_specs/workflows/stardist/stardist_example.yaml diff --git a/example_specs/workflows/dummy/rdf.yaml b/example_specs/workflows/dummy/rdf.yaml deleted file mode 100644 index ff3b9c7fc..000000000 --- a/example_specs/workflows/dummy/rdf.yaml +++ /dev/null @@ -1,55 +0,0 @@ -name: dummy workflow -description: A workflow to produce some nonsense -format_version: 0.2.3 -type: workflow - -inputs_spec: -- name: shape - type: list - description: two integers to describe a 2d shape - -options_spec: -- name: threshold - type: float - default: 0.5 - -outputs_spec: -- name: nonsense - type: tensor - axes: - - name: fantasy time - type: time - - name: x - type: space - -steps: -- id: step0 - op: generate_random_uniform_tensor - inputs: ["${{ self.inputs.shape }}", ['fantasy time', 'x']] -# options: -# low: 0 -# high: 1 - outputs: [tensor] -- id: step1 - op: binarize - inputs: ["${{ step0.outputs.tensor }}", "${{ self.options.threshold }}"] - outputs: [binarized] -- op: log - options: - log_level: 50 - threshold: "${{ self.options.threshold }}" - original: "${{ step0.outputs.tensor }}" - binarized: "${{ step1.outputs.binarized }}" - - -test_steps: -- id: wf - op: run_workflow - inputs: ["${{ self.rdf_source }}"] - options: - inputs: [&test-shape [2, 3]] - options: - threshold: 0.5 - outputs: [out0] -- op: assert_shape - inputs: ["${{ wf.outputs.out0 }}", *test-shape] diff --git a/example_specs/workflows/hello.yaml b/example_specs/workflows/hello.yaml new file mode 100644 index 000000000..5a740f61e --- /dev/null +++ b/example_specs/workflows/hello.yaml @@ -0,0 +1,39 @@ +name: dummy workflow printing msg +description: This dummy workflow is intended as a demonstration and for testing. +type: workflow +format_version: 0.2.3 +cite: +- text: BioImage.IO + url: 'https://doi.org/10.1101/2022.06.07.495102' + +inputs: [] + +options: +- name: msg + type: string + description: Message + default: Hello! +- name: tensor_a + type: tensor + description: tensor_a whose shape is added to message + default: null + axes: arbitrary +- name: tensor_b + type: tensor + description: tensor_b whose shape is added to message + default: null + axes: + - type: batch + - type: space + name: x + description: x dimension + step: 1.5 + unit: millimeter + - type: index + name: demo index + description: a special index axis + +outputs: +- name: msg + type: string + description: A possibly manipulated message. diff --git a/example_specs/workflows/hpa/single_cell_classification.yaml b/example_specs/workflows/hpa/single_cell_classification.yaml deleted file mode 100644 index f3e9773c1..000000000 --- a/example_specs/workflows/hpa/single_cell_classification.yaml +++ /dev/null @@ -1,91 +0,0 @@ -name: HPA Single-cell Classification Example Workflow -description: A workflow for running HPA single-cell classification -format_version: 0.2.3 -type: workflow - -inputs_spec: -- name: nuclei - type: tensor - axes: - - type: batch - - name: gray scale - type: channel - - name: x - type: space - - name: y - type: space -- name: protein - type: tensor - axes: - - type: batch - - name: gray scale - type: channel - - name: x - type: space - - name: y - type: space - -options_spec: -- name: seg_prep - type: boolean - default: false - - -outputs_spec: -- name: cells - type: tensor - axes: - - type: batch - - name: gray scale - type: channel - - name: x - type: space - - name: y - type: space -- name: scores - type: tensor - axes: - - type: batch - - name: gray scale - type: channel - - name: x - type: space - - name: y - type: space - -steps: -- id: segmentation - op: model_inference - inputs: [inputs.nuclei] # take the first output of step 1 (id: data) as the only input - options: - rdf_source: conscientious-seashell - preprocessing: ${{ self.options.seg_prep }} - postprocessing: false - outputs: [cells] -- id: classification - op: model_inference - inputs: [inputs.protein, segmentation.outputs.cells] # take the second output of step1 and the output of step 2 - options: - rdf_source: straightforward-crocodile - preprocessing: true - postprocessing: false - outputs: [scores] -- op: select_outputs - inputs: [segmentation.outputs.cells, classification.outputs.scores] - -test_steps: -- op: load_tensors - id: test_tensors - options: - sources: [nuclei.npy, protein.npy, cells.npy, scores.npy] - outputs: [nuclei, protein, cells, scores] -- op: run_workflow - id: workflow - inputs: [test_tensors.outputs.nuclei, test_tensors.outputs.protein] - options: - rdf_source: ${{ self.rdf_source }} - outputs: [cells, scores] -- op: assert_close - inputs: [test_tensors.outputs.cells, workflow.outputs.cells] -- op: assert_close - inputs: [test_tensors.outputs.scores, workflow.outputs.scores] diff --git a/example_specs/workflows/stardist/stardist_example.yaml b/example_specs/workflows/stardist/stardist_example.yaml deleted file mode 100644 index 46a2465e5..000000000 --- a/example_specs/workflows/stardist/stardist_example.yaml +++ /dev/null @@ -1,94 +0,0 @@ -name: StarDist Example Workflow -description: A workflow for running stardist -format_version: 0.2.3 -type: workflow - -inputs_spec: -- name: raw - type: tensor - description: image with star-convex objects - axes: - - type: batch - - name: gray scale - type: channel - - name: x - type: space - - name: y - type: space - -options_spec: -- name: diameter - type: float - default: 2.3 - -outputs_spec: -- name: labels - type: tensor - axes: - - type: batch - - name: label id - type: channel - - name: x - type: space - - name: y - type: space -- name: coord - type: list -- name: points - type: list -- name: prob - type: tensor - axes: - - type: batch - - name: probability - type: channel - - name: x - type: space - - name: y - type: space - -steps: -- op: zero_mean_unit_variance -- op: model_inference - options: - rdf_source: fearless-crab - preprocessing: false # disable the preprocessing - postprocessing: false # disable the postprocessing -- op: stardist_postprocessing - options: - diameter: "${{ self.options.diameter }}" - -test_steps: -- id: test_tensors - op: load_tensors - options: - sources: - - raw.npy - - labels.npy - - coord.npy - - points.npy - - prob.npy - outputs: - - raw - - labels - - coord - - points - - prob -- id: workflow - op: run_workflow - inputs: [ "${{ test_tensors.outputs.raw }}" ] - options: - rdf_source: "${{ self.rdf_source }}" - outputs: - - labels - - coord - - points - - prob -- op: assert_close - inputs: ["${{ test_tensors.outputs.labels }}", "${{ workflow.outputs.labels }}"] -- op: assert_close - inputs: ["${{ test_tensors.outputs.coord }}", "${{ workflow.outputs.coord }}"] -- op: assert_close - inputs: ["${{ test_tensors.outputs.points }}", "${{ workflow.outputs.points }}"] -- op: assert_close - inputs: ["${{ test_tensors.outputs.prob }}", "${{ workflow.outputs.prob }}"] diff --git a/tests/conftest.py b/tests/conftest.py index d8afd3099..ae96a985d 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -125,4 +125,4 @@ def hpa_workflow_rdf(): @pytest.fixture def dummy_workflow_rdf(): - return pathlib.Path(__file__).parent / "../example_specs/workflows/dummy/rdf.yaml" + return pathlib.Path(__file__).parent / "../example_specs/workflows/hello.yaml" diff --git a/tests/test_workflow_rdf.py b/tests/test_workflow_rdf.py index b55f3ddde..54ed35f39 100644 --- a/tests/test_workflow_rdf.py +++ b/tests/test_workflow_rdf.py @@ -5,26 +5,6 @@ from bioimageio.spec.workflow import raw_nodes -def test_workflow_rdf_stardist_example(stardist_workflow_rdf): - from bioimageio.spec.workflow.schema import Workflow - - data = yaml.load(stardist_workflow_rdf) - - workflow = Workflow().load(data) - assert isinstance(workflow, raw_nodes.Workflow) - assert workflow.steps[0].op == "zero_mean_unit_variance" - - -def test_workflow_rdf_hpa_example(hpa_workflow_rdf): - from bioimageio.spec.workflow.schema import Workflow - - data = yaml.load(hpa_workflow_rdf) - - workflow = Workflow().load(data) - assert isinstance(workflow, raw_nodes.Workflow) - assert workflow.outputs_spec[0].name == "cells" - - def test_dummy_workflow_rdf(dummy_workflow_rdf): from bioimageio.spec.workflow.schema import Workflow @@ -38,9 +18,9 @@ def test_invalid_kwarg_name_duplicate(dummy_workflow_rdf): from bioimageio.spec.workflow.schema import Workflow data = yaml.load(dummy_workflow_rdf) - data["options_spec"].append(data["options_spec"][0]) + data["options"].append(data["options"][0]) with pytest.raises(ValidationError) as e: Workflow().load(data) - assert e.value.messages == {"inputs_spec/options_spec": ["Duplicate input/option name 'threshold' not allowed."]} + assert e.value.messages == {"inputs/options": ["Duplicate input/option name 'msg' not allowed."]} From 024366533c61c38db35d43e10a02bc0822591ba6 Mon Sep 17 00:00:00 2001 From: fynnbe Date: Mon, 28 Nov 2022 09:41:58 +0100 Subject: [PATCH 33/40] rename DEFAULT_TYPE_NAME_MAP --- bioimageio/spec/workflow/v0_2/raw_nodes.py | 12 ++++++++++-- bioimageio/spec/workflow/v0_2/schema.py | 2 +- 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/bioimageio/spec/workflow/v0_2/raw_nodes.py b/bioimageio/spec/workflow/v0_2/raw_nodes.py index b0d9f1db6..b1574849f 100644 --- a/bioimageio/spec/workflow/v0_2/raw_nodes.py +++ b/bioimageio/spec/workflow/v0_2/raw_nodes.py @@ -23,7 +23,7 @@ DefaultType = Union[int, float, str, bool, list, dict, None] # mapping of types of possible default values -DEFAULT_TYPE_NAME_MAP = { +TYPE_NAMES = { int: "int", float: "float", str: "string", @@ -32,7 +32,15 @@ dict: "dict", None: "null", } - +TYPE_NAME_TYPES = dict( + int=int, + float=float, + string=str, + boolean=bool, + list=list, + dict=dict, + null=None, +) # unit names from https://ngff.openmicroscopy.org/latest/#axes-md SpaceUnit = Literal[ "angstrom", diff --git a/bioimageio/spec/workflow/v0_2/schema.py b/bioimageio/spec/workflow/v0_2/schema.py index e26623873..309846ef5 100644 --- a/bioimageio/spec/workflow/v0_2/schema.py +++ b/bioimageio/spec/workflow/v0_2/schema.py @@ -173,7 +173,7 @@ def default_has_compatible_type(self, data, **kwargs): return default_type = type(data["default"]) - type_name = raw_nodes.DEFAULT_TYPE_NAME_MAP[default_type] + type_name = raw_nodes.TYPE_NAMES[default_type] if type_name != input_type_name: raise ValidationError( f"Default value of type {default_type} (type name: {type_name}) does not match type: {input_type_name}" From 91d141fad80c5d07006a349107fdcbfe007f7379 Mon Sep 17 00:00:00 2001 From: fynnbe Date: Mon, 28 Nov 2022 14:53:38 +0100 Subject: [PATCH 34/40] rename ArbitraryAxes to UnknownAxes --- bioimageio/spec/workflow/v0_2/raw_nodes.py | 4 ++-- bioimageio/spec/workflow/v0_2/schema.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/bioimageio/spec/workflow/v0_2/raw_nodes.py b/bioimageio/spec/workflow/v0_2/raw_nodes.py index b1574849f..34d8fa477 100644 --- a/bioimageio/spec/workflow/v0_2/raw_nodes.py +++ b/bioimageio/spec/workflow/v0_2/raw_nodes.py @@ -100,7 +100,7 @@ # this Axis definition is compatible with the NGFF draft from October 24, 2022 # https://ngff.openmicroscopy.org/latest/#axes-md AxisType = Literal["batch", "channel", "index", "time", "space"] -ArbitraryAxes = Literal["arbitrary"] +UnknownAxes = Literal["unknown"] @dataclass @@ -117,7 +117,7 @@ class Parameter(RawNode): name: str = missing type: ParameterType = missing description: Union[_Missing, str] = missing - axes: Union[_Missing, List[Axis], ArbitraryAxes] = missing + axes: Union[_Missing, List[Axis], UnknownAxes] = missing @dataclass diff --git a/bioimageio/spec/workflow/v0_2/schema.py b/bioimageio/spec/workflow/v0_2/schema.py index 309846ef5..cc3b64025 100644 --- a/bioimageio/spec/workflow/v0_2/schema.py +++ b/bioimageio/spec/workflow/v0_2/schema.py @@ -130,8 +130,8 @@ class Parameter(_BioImageIOSchema): [ fields.List(fields.Nested(Axis())), fields.String( - bioimageio_description="Arbitrary combination of valid axis types.", - validate=field_validators.Equal(get_args(raw_nodes.ArbitraryAxes)[0]), + bioimageio_description="Arbitrary or unknown combination of valid axis types.", + validate=field_validators.Equal(get_args(raw_nodes.UnknownAxes)[0]), ), ], required=False, From a3d97c81a1360cf5c37f32856debceea54496882 Mon Sep 17 00:00:00 2001 From: fynnbe Date: Wed, 30 Nov 2022 15:39:24 +0100 Subject: [PATCH 35/40] make nested_errors optional --- bioimageio/spec/shared/common.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/bioimageio/spec/shared/common.py b/bioimageio/spec/shared/common.py index b2530df81..b73a59aaf 100644 --- a/bioimageio/spec/shared/common.py +++ b/bioimageio/spec/shared/common.py @@ -133,7 +133,9 @@ class ValidationSummary(TypedDict): bioimageio_spec_version: str error: Union[None, str, Dict[str, Any]] name: str - nested_errors: Dict[str, dict] # todo: mark as not required: typing_extensions.NotRequired (typing py 3.11) + nested_errors: Optional[ + Dict[str, dict] + ] # todo: mark as not required: typing_extensions.NotRequired (typing py 3.11) source_name: str status: Union[Literal["passed", "failed"], str] traceback: Optional[List[str]] From b7b51a9b288ca664261895363cddbfc3dcdb4da6 Mon Sep 17 00:00:00 2001 From: fynnbe Date: Tue, 6 Dec 2022 14:54:59 +0100 Subject: [PATCH 36/40] assert for mypy --- bioimageio/spec/v.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/bioimageio/spec/v.py b/bioimageio/spec/v.py index f03752036..17f6e53da 100644 --- a/bioimageio/spec/v.py +++ b/bioimageio/spec/v.py @@ -3,3 +3,5 @@ with (pathlib.Path(__file__).parent / "VERSION").open() as f: __version__ = json.load(f)["version"] + +assert isinstance(__version__, str) From eb4e3f8a70de6a0f911a719a887707d20c76030c Mon Sep 17 00:00:00 2001 From: fynnbe Date: Tue, 6 Dec 2022 14:55:48 +0100 Subject: [PATCH 37/40] some aliases for backward compatibility --- bioimageio/spec/shared/raw_nodes.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/bioimageio/spec/shared/raw_nodes.py b/bioimageio/spec/shared/raw_nodes.py index e7224f6e6..0fcad7308 100644 --- a/bioimageio/spec/shared/raw_nodes.py +++ b/bioimageio/spec/shared/raw_nodes.py @@ -206,3 +206,8 @@ class ResolvedCallableFromSourceFile(CallableFromSourceFile): CallableSource = Union[ CallableFromModule, CallableFromSourceFile, ResolvedCallableFromSourceFile, LocalCallableFromModule ] +# some aliases for backward compatibility +ImportableModule = CallableFromModule +LocalImportableModule = LocalCallableFromModule +ImportableSourceFile = CallableFromSourceFile +ResolvedImportableSourceFile = ResolvedCallableFromSourceFile From 1102f6ac042ab52871ae8f5bf78db21596e55b87 Mon Sep 17 00:00:00 2001 From: fynnbe Date: Thu, 8 Dec 2022 10:37:48 +0100 Subject: [PATCH 38/40] add AXIS_LETTER_TO_NAME and AXIS_NAME_TO_LETTER --- bioimageio/spec/shared/common.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/bioimageio/spec/shared/common.py b/bioimageio/spec/shared/common.py index b73a59aaf..9cf3c2ea3 100644 --- a/bioimageio/spec/shared/common.py +++ b/bioimageio/spec/shared/common.py @@ -211,3 +211,7 @@ def nested_default_dict_as_nested_dict(nested_dd): return [nested_default_dict_as_nested_dict(value) for value in nested_dd] else: return nested_dd + + +AXIS_LETTER_TO_NAME = dict(b="batch", t="time", c="channel", i="index") +AXIS_NAME_TO_LETTER = {v: k for k, v in AXIS_LETTER_TO_NAME.items()} From b66798a58d2a4e47f040a50919d7c74528a43534 Mon Sep 17 00:00:00 2001 From: fynnbe Date: Thu, 9 Feb 2023 12:25:49 +0100 Subject: [PATCH 39/40] update hello workflow example --- example_specs/workflows/hello.yaml | 52 ++++++++++++------------------ 1 file changed, 20 insertions(+), 32 deletions(-) diff --git a/example_specs/workflows/hello.yaml b/example_specs/workflows/hello.yaml index 5a740f61e..479898310 100644 --- a/example_specs/workflows/hello.yaml +++ b/example_specs/workflows/hello.yaml @@ -1,39 +1,27 @@ -name: dummy workflow printing msg +cite: +- {text: BioImage.IO, url: 'https://doi.org/10.1101/2022.06.07.495102'} description: This dummy workflow is intended as a demonstration and for testing. -type: workflow format_version: 0.2.3 -cite: -- text: BioImage.IO - url: 'https://doi.org/10.1101/2022.06.07.495102' - +icon: ⚙ +id: bioimageio/hello inputs: [] - +license: MIT +name: dummy workflow printing msg options: -- name: msg - type: string - description: Message - default: Hello! -- name: tensor_a - type: tensor - description: tensor_a whose shape is added to message +- {default: Hello!, description: Message, name: msg, type: string} +- {axes: unknown, default: null, description: tensor_a whose shape is added to message, + name: tensor_a, type: tensor} +- axes: + - {type: batch} + - {description: x dimension, name: x, step: 1.5, type: space, unit: millimeter} + - {description: a special index axis, name: demo index, type: index} default: null - axes: arbitrary -- name: tensor_b - type: tensor description: tensor_b whose shape is added to message - default: null - axes: - - type: batch - - type: space - name: x - description: x dimension - step: 1.5 - unit: millimeter - - type: index - name: demo index - description: a special index axis - + name: tensor_b + type: tensor outputs: -- name: msg - type: string - description: A possibly manipulated message. +- {description: A possibly manipulated message., name: msg, type: string} +rdf_source: https://raw.githubusercontent.com/bioimage-io/workflows-bioimage-io-python/main/src/bioimageio/workflows/static/workflow_rdfs/hello.yaml +tags: [workflow] +type: workflow +version: 0.1.0 From 052c5538dfc9aa5ca8d004595cc7887c3b34cf62 Mon Sep 17 00:00:00 2001 From: fynnbe Date: Wed, 15 Mar 2023 15:59:36 +0100 Subject: [PATCH 40/40] remove +\n from CLI help --- bioimageio/spec/__main__.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/bioimageio/spec/__main__.py b/bioimageio/spec/__main__.py index 95fbc8802..d9bb1a03f 100644 --- a/bioimageio/spec/__main__.py +++ b/bioimageio/spec/__main__.py @@ -15,7 +15,9 @@ enrich_partial_rdf_with_imjoy_plugin = None partner_help = "" else: - partner_help = f"\n+\nbioimageio.spec.partner {__version__}\nimplementing:\n\tpartner collection RDF {collection.format_version}" + partner_help = ( + f"\nbioimageio.spec.partner {__version__}\nimplementing:\n\tpartner collection RDF {collection.format_version}" + ) help_version = ( f"bioimageio.spec {__version__}"