Skip to content

Commit

Permalink
Engine: fix bug that allowed non-storable inputs to be passed to proc…
Browse files Browse the repository at this point in the history
…ess (#5532)

The basic assumption for a `Process` in `aiida-core` is that all of its
inputs should be storable in the database as nodes. Under the current
link model, this means that they should be instances of the `Data` class
or subclasses thereof. There is a noticeable exception for ports that
are explicitly marked as `non_db=True`, in which case the value is not
linked as a node, but is stored as an attribute directly on the process
node itself, or not stored whatsoever.

This basic rule was never explicitly enforced, which made it possible to
define processes that would happily take non-storable inputs. The input
would not get stored in the database, but would be available within the
processes lifetimes from the `inputs` property allowing it to be used.
This will most likely result into unintentional loss of provenance.

The reason is that the default `valid_type` of the top-level inputs
namespace of the `Process` class was never being set to `Data`. This
meant that any type would be excepted for a `Process` and all its
subclasses unless the valid type of a port was explicitly overridden.
This meant that for normal dynamic namespaces, even non-storable types
would be accepted just fine. Setting `valid_type=Data` for the input
namespace of the `Process` class fixes the problem therefore.
  • Loading branch information
sphuber authored May 21, 2022
1 parent a8ea6b7 commit 5c1eb3f
Show file tree
Hide file tree
Showing 4 changed files with 460 additions and 398 deletions.
2 changes: 2 additions & 0 deletions aiida/engine/processes/process.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,8 @@ def define(cls, spec: ProcessSpec) -> None: # type: ignore[override]
default='CALL',
help='The label to use for the `CALL` link if the process is called by another process.'
)
spec.inputs.valid_type = orm.Data
spec.inputs.dynamic = False # Settings a ``valid_type`` automatically makes it dynamic, so we reset it again
spec.exit_code(1, 'ERROR_UNSPECIFIED', message='The process has failed with an unspecified error.')
spec.exit_code(2, 'ERROR_LEGACY_FAILURE', message='The process failed with legacy failure mode.')
spec.exit_code(10, 'ERROR_INVALID_OUTPUT', message='The process returned an invalid output.')
Expand Down
26 changes: 13 additions & 13 deletions tests/engine/processes/test_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,11 +45,11 @@ class LazyProcessNamespace(Process):
@classmethod
def define(cls, spec):
super().define(spec)
spec.input_namespace('namespace')
spec.input_namespace('namespace.nested')
spec.input('namespace.nested.bird')
spec.input('namespace.a')
spec.input('namespace.c')
spec.input_namespace('namespace', non_db=True)
spec.input_namespace('namespace.nested', non_db=True)
spec.input('namespace.nested.bird', non_db=True)
spec.input('namespace.a', non_db=True)
spec.input('namespace.c', non_db=True)


class SimpleProcessNamespace(Process):
Expand All @@ -58,9 +58,9 @@ class SimpleProcessNamespace(Process):
@classmethod
def define(cls, spec):
super().define(spec)
spec.input_namespace('namespace.nested', dynamic=True)
spec.input('namespace.a', valid_type=int)
spec.input('namespace.c', valid_type=dict)
spec.input_namespace('namespace.nested', dynamic=True, non_db=True)
spec.input('namespace.a', valid_type=int, non_db=True)
spec.input('namespace.c', valid_type=dict, non_db=True)


class NestedNamespaceProcess(Process):
Expand All @@ -69,9 +69,9 @@ class NestedNamespaceProcess(Process):
@classmethod
def define(cls, spec):
super().define(spec)
spec.input('nested.namespace.int', valid_type=int, required=True)
spec.input('nested.namespace.float', valid_type=float, required=True)
spec.input('nested.namespace.str', valid_type=str, required=False)
spec.input('nested.namespace.int', valid_type=int, required=True, non_db=True)
spec.input('nested.namespace.float', valid_type=float, required=True, non_db=True)
spec.input('nested.namespace.str', valid_type=str, required=False, non_db=True)


class MappingData(Mapping, orm.Data):
Expand Down Expand Up @@ -398,15 +398,15 @@ class ProcessOne(Process):
@classmethod
def define(cls, spec):
super().define(spec)
spec.input('port', valid_type=int, default=1)
spec.input('port', valid_type=int, default=1, non_db=True)

class ProcessTwo(Process):
"""Process with nested required ports to check the update functionality."""

@classmethod
def define(cls, spec):
super().define(spec)
spec.input('port', valid_type=int, default=2)
spec.input('port', valid_type=int, default=2, non_db=True)

builder_one = ProcessOne.get_builder()
assert builder_one.port == 1
Expand Down
21 changes: 21 additions & 0 deletions tests/engine/test_process.py
Original file line number Diff line number Diff line change
Expand Up @@ -433,3 +433,24 @@ def define(cls, spec):
# If the ``namespace`` does not exist, for example because it is slightly misspelled, a ``KeyError`` is raised
with pytest.raises(KeyError):
process.exposed_outputs(node_child, ChildProcess, namespace='cildh')


class TestValidateDynamicNamespaceProcess(Process):
"""Simple process with dynamic input namespace."""

_node_class = orm.WorkflowNode

@classmethod
def define(cls, spec):
super().define(spec)
spec.inputs.dynamic = True


@pytest.mark.usefixtures('clear_database_before_test')
def test_input_validation_storable_nodes():
"""Test that validation catches non-storable inputs even if nested in dictionary for dynamic namespace.
Regression test for #5128.
"""
with pytest.raises(ValueError):
run(TestValidateDynamicNamespaceProcess, **{'namespace': {'a': 1}})
Loading

0 comments on commit 5c1eb3f

Please sign in to comment.