From 8039ad9147d2a4bc61bc86845a1c82c2fd484eae Mon Sep 17 00:00:00 2001 From: Daniel Hollas Date: Thu, 6 Feb 2025 14:02:16 +0000 Subject: [PATCH 1/4] Fix verdi devel check-undesired-imports when tui extra is installed (#6693) `verdi devel check-undesired-imports` makes sure that we don't have heavy imports during verdi startup to keep the CLI snappy (especially for tab completion). One of the expensive modules that it checks is `asyncio`. Unfortunately, when you install the tui extra, the trogon package that powers the tui functionality seems to use asyncio, and this makes the test fail. (indeed, one of the TUIs current downsides is that it makes all CLI interactions slower, even if you don't use the TUI subcommand). The PR makes the test more clever and don't check for `asyncio` import when `tui` extras is installed. --- .github/workflows/ci-code.yml | 2 -- src/aiida/cmdline/commands/cmd_devel.py | 12 ++++++++---- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/.github/workflows/ci-code.yml b/.github/workflows/ci-code.yml index 65bb5a0a19..77ecdc7a9c 100644 --- a/.github/workflows/ci-code.yml +++ b/.github/workflows/ci-code.yml @@ -126,8 +126,6 @@ jobs: with: python-version: '3.12' from-lock: 'true' - # NOTE: The `verdi devel check-undesired-imports` fails if - # the 'tui' extra is installed. extras: '' - name: Run verdi tests diff --git a/src/aiida/cmdline/commands/cmd_devel.py b/src/aiida/cmdline/commands/cmd_devel.py index f8c89d14c1..b311e5706e 100644 --- a/src/aiida/cmdline/commands/cmd_devel.py +++ b/src/aiida/cmdline/commands/cmd_devel.py @@ -61,12 +61,11 @@ def devel_check_load_time(): def devel_check_undesired_imports(): """Check that verdi does not import python modules it shouldn't. - Note: The blacklist was taken from the list of packages in the 'atomic_tools' extra but can be extended. + This is to keep the verdi CLI snappy, especially for tab-completion. """ loaded_modules = 0 - for modulename in [ - 'asyncio', + unwanted_modules = [ 'requests', 'plumpy', 'disk_objectstore', @@ -78,7 +77,12 @@ def devel_check_undesired_imports(): 'spglib', 'pymysql', 'yaml', - ]: + ] + # trogon powers the optional TUI and uses asyncio. + # Check for asyncio only when the optional tui extras are not installed. + if 'trogon' not in sys.modules: + unwanted_modules += 'asyncio' + for modulename in unwanted_modules: if modulename in sys.modules: echo.echo_warning(f'Detected loaded module "{modulename}"') loaded_modules += 1 From 8c5c709bece13c56af1d91384957e7ccb8dd320b Mon Sep 17 00:00:00 2001 From: Zisen Liu <29354199+rabbull@users.noreply.github.com> Date: Fri, 7 Feb 2025 10:58:16 +0100 Subject: [PATCH 2/4] CI: Ignore mamba Warning Message on stderr to Prevent JSON Parsing Errors (#6748) This commit addresses the CI issue caused by the deprecated `--no-python-version-warning` in mamba, which interferes with JSON parsing in tests. To resolve this: + Upgraded `pytest-docker` to version 3.2.0, which introduces the `ignore_stderr` feature. + Updated test cases (`test_correct_python_version_installed` and `test_correct_pgsql_version_installed`) to include `ignore_stderr=True`, ensuring that warning messages on `stderr` do not disrupt JSON parsing. This improvement ensures future robustness against JSON parsing issues caused by warnings in `stderr`. --- .docker/requirements.txt | 2 +- .docker/tests/test_aiida.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.docker/requirements.txt b/.docker/requirements.txt index 6c84d9fa30..802ffd4d8c 100644 --- a/.docker/requirements.txt +++ b/.docker/requirements.txt @@ -1,4 +1,4 @@ docker~=7.0.0 pytest~=8.2.0 requests~=2.32.0 -pytest-docker~=3.1.0 +pytest-docker~=3.2.0 diff --git a/.docker/tests/test_aiida.py b/.docker/tests/test_aiida.py index 7f952bd855..37040e3c06 100644 --- a/.docker/tests/test_aiida.py +++ b/.docker/tests/test_aiida.py @@ -6,7 +6,7 @@ def test_correct_python_version_installed(aiida_exec, python_version): - info = json.loads(aiida_exec('mamba list --json --full-name python').decode())[0] + info = json.loads(aiida_exec('mamba list --json --full-name python', ignore_stderr=True).decode())[0] assert info['name'] == 'python' assert parse(info['version']) == parse(python_version) @@ -15,7 +15,7 @@ def test_correct_pgsql_version_installed(aiida_exec, pgsql_version, variant): if variant == 'aiida-core-base': pytest.skip('PostgreSQL is not installed in the base image') - info = json.loads(aiida_exec('mamba list --json --full-name postgresql').decode())[0] + info = json.loads(aiida_exec('mamba list --json --full-name postgresql', ignore_stderr=True).decode())[0] assert info['name'] == 'postgresql' assert parse(info['version']).major == parse(pgsql_version).major From f56fcc31c70e3bd6a1422eb6cfee3f5fab5eaac4 Mon Sep 17 00:00:00 2001 From: ayushjariyal <148481273+ayushjariyal@users.noreply.github.com> Date: Mon, 10 Feb 2025 12:45:59 +0530 Subject: [PATCH 3/4] `Transport`: Bug fix in `rename` method (#6735) `rename()` method gets two arguments as input, `old_path` and `new_path`. Previously, it was raising an `OSError` in case `new_path` wouldn't exist before renaming. This bug is fixed in both `ssh.py` and `local.py`. Additional test has been added to `test_all_plugins.py` to verify the correct behaviour. --- src/aiida/transports/plugins/local.py | 6 +++--- src/aiida/transports/plugins/ssh.py | 14 +++++--------- tests/transports/test_all_plugins.py | 25 +++++++++++++++++++++++++ 3 files changed, 33 insertions(+), 12 deletions(-) diff --git a/src/aiida/transports/plugins/local.py b/src/aiida/transports/plugins/local.py index 937c499861..c0915cf84b 100644 --- a/src/aiida/transports/plugins/local.py +++ b/src/aiida/transports/plugins/local.py @@ -866,7 +866,7 @@ def rename(self, oldpath: TransportPath, newpath: TransportPath): :param str oldpath: existing name of the file or folder :param str newpath: new name for the file or folder - :raises OSError: if src/dst is not found + :raises OSError: if oldpath is not found or newpath already exists :raises ValueError: if src/dst is not a valid string """ oldpath = str(oldpath) @@ -877,8 +877,8 @@ def rename(self, oldpath: TransportPath, newpath: TransportPath): raise ValueError(f'Destination {newpath} is not a valid string') if not os.path.exists(oldpath): raise OSError(f'Source {oldpath} does not exist') - if not os.path.exists(newpath): - raise OSError(f'Destination {newpath} does not exist') + if os.path.exists(newpath): + raise OSError(f'Destination {newpath} already exists.') shutil.move(oldpath, newpath) diff --git a/src/aiida/transports/plugins/ssh.py b/src/aiida/transports/plugins/ssh.py index f5d0e7053f..fb5d7dec79 100644 --- a/src/aiida/transports/plugins/ssh.py +++ b/src/aiida/transports/plugins/ssh.py @@ -1357,8 +1357,8 @@ def rename(self, oldpath: TransportPath, newpath: TransportPath): :param str oldpath: existing name of the file or folder :param str newpath: new name for the file or folder - :raises OSError: if oldpath/newpath is not found - :raises ValueError: if sroldpathc/newpath is not a valid path + :raises OSError: if oldpath is not found or newpath already exists + :raises ValueError: if oldpath/newpath is not a valid path """ if not oldpath: raise ValueError(f'Source {oldpath} is not a valid path') @@ -1371,13 +1371,9 @@ def rename(self, oldpath: TransportPath, newpath: TransportPath): if not self.isfile(oldpath): if not self.isdir(oldpath): raise OSError(f'Source {oldpath} does not exist') - # TODO: this seems to be a bug (?) - # why to raise an OSError if the newpath does not exist? - # ofcourse newpath shouldn't exist, that's why we are renaming it! - # issue opened here: https://github.com/aiidateam/aiida-core/issues/6725 - if not self.isfile(newpath): - if not self.isdir(newpath): - raise OSError(f'Destination {newpath} does not exist') + + if self.path_exists(newpath): + raise OSError(f'Destination {newpath} already exist') return self.sftp.rename(oldpath, newpath) diff --git a/tests/transports/test_all_plugins.py b/tests/transports/test_all_plugins.py index 1dcd9c6957..805058ce6d 100644 --- a/tests/transports/test_all_plugins.py +++ b/tests/transports/test_all_plugins.py @@ -1232,3 +1232,28 @@ def test_asynchronous_execution(custom_transport, tmp_path): except ProcessLookupError: # If the process is already dead (or has never run), I just ignore the error pass + + +def test_rename(custom_transport, tmp_path_remote): + """Test the rename function of the transport plugin.""" + with custom_transport as transport: + old_file = tmp_path_remote / 'oldfile.txt' + new_file = tmp_path_remote / 'newfile.txt' + another_file = tmp_path_remote / 'anotherfile.txt' + + # Create a test file to rename + old_file.touch() + another_file.touch() + assert old_file.exists() + assert another_file.exists() + + # Perform rename operation + transport.rename(old_file, new_file) + + # Verify rename was successful + assert not old_file.exists() + assert new_file.exists() + + # Perform rename operation if new file already exists + with pytest.raises(OSError, match='already exist|destination exists'): + transport.rename(new_file, another_file) From d2fbf214ad2fcfe5e39f9ebe2982f05557196397 Mon Sep 17 00:00:00 2001 From: Zisen Liu <29354199+rabbull@users.noreply.github.com> Date: Mon, 10 Feb 2025 16:37:48 +0100 Subject: [PATCH 4/4] ORM: Use `skip_orm` as the default implementation for `SqlaGroup.add_nodes` and `SqlaGroup.remove_nodes` (#6720) This commit removes the `skip_orm` flag and adopts the optimized approach behind it as the default behavior for the `add_nodes` and `remove_nodes` methods. As discussed in #5453, the ORM-based implementations of `add_nodes` and `remove_nodes` were found to be inefficient, and to address this, the `skip_orm` flag was introduced. This flag enabled a faster, core API-based approach for these operations, but was never well documented or advertised. All existing tests pass with the new implementation, and are sufficiently comprehensive to ensure its safety. Consequently, it is a reasonable decision to make the `skip_orm` implementation the default for bulk insertion and removal of elements within a group. --- src/aiida/storage/psql_dos/orm/groups.py | 74 +++++------------------- tests/orm/implementation/test_groups.py | 63 -------------------- tests/orm/test_groups.py | 25 +++++++- 3 files changed, 40 insertions(+), 122 deletions(-) diff --git a/src/aiida/storage/psql_dos/orm/groups.py b/src/aiida/storage/psql_dos/orm/groups.py index ee82dc24ec..c030e74123 100644 --- a/src/aiida/storage/psql_dos/orm/groups.py +++ b/src/aiida/storage/psql_dos/orm/groups.py @@ -167,17 +167,10 @@ def add_nodes(self, nodes, **kwargs): :note: all the nodes *and* the group itself have to be stored. :param nodes: a list of `BackendNode` instance to be added to this group - - :param kwargs: - skip_orm: When the flag is on, the SQLA ORM is skipped and SQLA is used - to create a direct SQL INSERT statement to the group-node relationship - table (to improve speed). """ from sqlalchemy.dialects.postgresql import insert - from sqlalchemy.exc import IntegrityError super().add_nodes(nodes) - skip_orm = kwargs.get('skip_orm', False) def check_node(given_node): """Check if given node is of correct type and stored""" @@ -188,31 +181,16 @@ def check_node(given_node): raise ValueError('At least one of the provided nodes is unstored, stopping...') with utils.disable_expire_on_commit(self.backend.get_session()) as session: - if not skip_orm: - # Get dbnodes here ONCE, otherwise each call to dbnodes will re-read the current value in the database - dbnodes = self.model.dbnodes - - for node in nodes: - check_node(node) - - # Use pattern as suggested here: - # http://docs.sqlalchemy.org/en/latest/orm/session_transaction.html#using-savepoint - try: - with session.begin_nested(): - dbnodes.append(node.bare_model) - session.flush() - except IntegrityError: - # Duplicate entry, skip - pass - else: - ins_dict = [] - for node in nodes: - check_node(node) - ins_dict.append({'dbnode_id': node.id, 'dbgroup_id': self.id}) - - table = self.GROUP_NODE_CLASS.__table__ - ins = insert(table).values(ins_dict) - session.execute(ins.on_conflict_do_nothing(index_elements=['dbnode_id', 'dbgroup_id'])) + ins_dict = [] + for node in nodes: + check_node(node) + ins_dict.append({'dbnode_id': node.id, 'dbgroup_id': self.id}) + if len(ins_dict) == 0: + return + + table = self.GROUP_NODE_CLASS.__table__ + ins = insert(table).values(ins_dict) + session.execute(ins.on_conflict_do_nothing(index_elements=['dbnode_id', 'dbgroup_id'])) # Commit everything as up till now we've just flushed if not session.in_nested_transaction(): @@ -224,18 +202,11 @@ def remove_nodes(self, nodes, **kwargs): :note: all the nodes *and* the group itself have to be stored. :param nodes: a list of `BackendNode` instance to be added to this group - :param kwargs: - skip_orm: When the flag is set to `True`, the SQLA ORM is skipped and SQLA is used to create a direct SQL - DELETE statement to the group-node relationship table in order to improve speed. """ from sqlalchemy import and_ super().remove_nodes(nodes) - # Get dbnodes here ONCE, otherwise each call to dbnodes will re-read the current value in the database - dbnodes = self.model.dbnodes - skip_orm = kwargs.get('skip_orm', False) - def check_node(node): if not isinstance(node, self.NODE_CLASS): raise TypeError(f'invalid type {type(node)}, has to be {self.NODE_CLASS}') @@ -243,26 +214,13 @@ def check_node(node): if node.id is None: raise ValueError('At least one of the provided nodes is unstored, stopping...') - list_nodes = [] - with utils.disable_expire_on_commit(self.backend.get_session()) as session: - if not skip_orm: - for node in nodes: - check_node(node) - - # Check first, if SqlA issues a DELETE statement for an unexisting key it will result in an error - if node.bare_model in dbnodes: - list_nodes.append(node.bare_model) - - for node in list_nodes: - dbnodes.remove(node) - else: - table = self.GROUP_NODE_CLASS.__table__ - for node in nodes: - check_node(node) - clause = and_(table.c.dbnode_id == node.id, table.c.dbgroup_id == self.id) - statement = table.delete().where(clause) - session.execute(statement) + table = self.GROUP_NODE_CLASS.__table__ + for node in nodes: + check_node(node) + clause = and_(table.c.dbnode_id == node.id, table.c.dbgroup_id == self.id) + statement = table.delete().where(clause) + session.execute(statement) if not session.in_nested_transaction(): session.commit() diff --git a/tests/orm/implementation/test_groups.py b/tests/orm/implementation/test_groups.py index 199d363e25..19a6cf9d70 100644 --- a/tests/orm/implementation/test_groups.py +++ b/tests/orm/implementation/test_groups.py @@ -25,66 +25,3 @@ def test_creation_from_dbgroup(backend): assert group.pk == gcopy.pk assert group.uuid == gcopy.uuid - - -def test_add_nodes_skip_orm(): - """Test the `SqlaGroup.add_nodes` method with the `skip_orm=True` flag.""" - group = orm.Group(label='test_adding_nodes').store().backend_entity - - node_01 = orm.Data().store().backend_entity - node_02 = orm.Data().store().backend_entity - node_03 = orm.Data().store().backend_entity - node_04 = orm.Data().store().backend_entity - node_05 = orm.Data().store().backend_entity - nodes = [node_01, node_02, node_03, node_04, node_05] - - group.add_nodes([node_01], skip_orm=True) - group.add_nodes([node_02, node_03], skip_orm=True) - group.add_nodes((node_04, node_05), skip_orm=True) - - assert set(_.pk for _ in nodes) == set(_.pk for _ in group.nodes) - - # Try to add a node that is already present: there should be no problem - group.add_nodes([node_01], skip_orm=True) - assert set(_.pk for _ in nodes) == set(_.pk for _ in group.nodes) - - -def test_add_nodes_skip_orm_batch(): - """Test the `SqlaGroup.add_nodes` method with the `skip_orm=True` flag and batches.""" - nodes = [orm.Data().store().backend_entity for _ in range(100)] - - # Add nodes to groups using different batch size. Check in the end the correct addition. - batch_sizes = (1, 3, 10, 1000) - for batch_size in batch_sizes: - group = orm.Group(label=f'test_batches_{batch_size!s}').store() - group.backend_entity.add_nodes(nodes, skip_orm=True, batch_size=batch_size) - assert set(_.pk for _ in nodes) == set(_.pk for _ in group.nodes) - - -def test_remove_nodes_bulk(): - """Test node removal with `skip_orm=True`.""" - group = orm.Group(label='test_removing_nodes').store().backend_entity - - node_01 = orm.Data().store().backend_entity - node_02 = orm.Data().store().backend_entity - node_03 = orm.Data().store().backend_entity - node_04 = orm.Data().store().backend_entity - nodes = [node_01, node_02, node_03] - - group.add_nodes(nodes) - assert set(_.pk for _ in nodes) == set(_.pk for _ in group.nodes) - - # Remove a node that is not in the group: nothing should happen - group.remove_nodes([node_04], skip_orm=True) - assert set(_.pk for _ in nodes) == set(_.pk for _ in group.nodes) - - # Remove one Node - nodes.remove(node_03) - group.remove_nodes([node_03], skip_orm=True) - assert set(_.pk for _ in nodes) == set(_.pk for _ in group.nodes) - - # Remove a list of Nodes and check - nodes.remove(node_01) - nodes.remove(node_02) - group.remove_nodes([node_01, node_02], skip_orm=True) - assert set(_.pk for _ in nodes) == set(_.pk for _ in group.nodes) diff --git a/tests/orm/test_groups.py b/tests/orm/test_groups.py index c62f903400..3e0c592c9f 100644 --- a/tests/orm/test_groups.py +++ b/tests/orm/test_groups.py @@ -149,13 +149,27 @@ def test_add_nodes(self): group.add_nodes(node_01) assert set(_.pk for _ in nodes) == set(_.pk for _ in group.nodes) + # Try to add nothing: there should be no problem + group.add_nodes([]) + assert set(_.pk for _ in nodes) == set(_.pk for _ in group.nodes) + + nodes = [orm.Data().store().backend_entity for _ in range(100)] + + # Add nodes to groups using different batch size. Check in the end the correct addition. + batch_sizes = (1, 3, 10, 1000) + for batch_size in batch_sizes: + group = orm.Group(label=f'test_batches_{batch_size!s}').store() + group.backend_entity.add_nodes(nodes, batch_size=batch_size) + assert set(_.pk for _ in nodes) == set(_.pk for _ in group.nodes) + def test_remove_nodes(self): """Test node removal.""" node_01 = orm.Data().store() node_02 = orm.Data().store() node_03 = orm.Data().store() node_04 = orm.Data().store() - nodes = [node_01, node_02, node_03] + node_05 = orm.Data().store() + nodes = [node_01, node_02, node_03, node_05] group = orm.Group(label=uuid.uuid4().hex).store() # Add initial nodes @@ -177,6 +191,15 @@ def test_remove_nodes(self): group.remove_nodes([node_01, node_02]) assert set(_.pk for _ in nodes) == set(_.pk for _ in group.nodes) + # Remove to empty + nodes.remove(node_05) + group.remove_nodes([node_05]) + assert set(_.pk for _ in nodes) == set(_.pk for _ in group.nodes) + + # Try to remove nothing: there should be no problem + group.remove_nodes([]) + assert set(_.pk for _ in nodes) == set(_.pk for _ in group.nodes) + def test_clear(self): """Test the `clear` method to remove all nodes.""" node_01 = orm.Data().store()