Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

CLI: add option --clean-workdir to verdi node delete #6756

Open
wants to merge 4 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 2 additions & 15 deletions src/aiida/cmdline/commands/cmd_calcjob.py
Original file line number Diff line number Diff line change
Expand Up @@ -259,8 +259,7 @@ def calcjob_cleanworkdir(calcjobs, past_days, older_than, computers, force, exit
If both are specified, a logical AND is done between the two, i.e. the calcjobs that will be cleaned have been
modified AFTER [-p option] days from now, but BEFORE [-o option] days from now.
"""
from aiida import orm
from aiida.orm.utils.remote import get_calcjob_remote_paths
from aiida.orm.utils.remote import clean_mapping_remote_paths, get_calcjob_remote_paths

if calcjobs:
if past_days is not None and older_than is not None:
Expand All @@ -286,19 +285,7 @@ def calcjob_cleanworkdir(calcjobs, past_days, older_than, computers, force, exit
warning = f'Are you sure you want to clean the work directory of {path_count} calcjobs?'
click.confirm(warning, abort=True)

user = orm.User.collection.get_default()

for computer_uuid, paths in path_mapping.items():
counter = 0
computer = orm.load_computer(uuid=computer_uuid)
transport = orm.AuthInfo.collection.get(dbcomputer_id=computer.pk, aiidauser_id=user.pk).get_transport()

with transport:
for remote_folder in paths:
remote_folder._clean(transport=transport)
counter += 1

echo.echo_success(f'{counter} remote folders cleaned on {computer.label}')
clean_mapping_remote_paths(path_mapping)


def get_remote_and_path(calcjob, path=None):
Expand Down
65 changes: 61 additions & 4 deletions src/aiida/cmdline/commands/cmd_node.py
Original file line number Diff line number Diff line change
Expand Up @@ -328,9 +328,14 @@ def extras(nodes, keys, fmt, identifier, raw):
@click.argument('identifier', nargs=-1, metavar='NODES')
@options.DRY_RUN()
@options.FORCE()
@click.option(
'--clean-workdir',
is_flag=True,
Copy link
Member

@unkcpz unkcpz Feb 15, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Put default to True is backward incompatible and potentially danger.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Well, that's exactly what this command does :-)
It define it as a flag with is_flag=True so the default value is False

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah, right. Then could you write default=False to be more explicit?

help='Also clean the remote work directory, if applicable.',
)
@options.graph_traversal_rules(GraphTraversalRules.DELETE.value)
@with_dbenv()
def node_delete(identifier, dry_run, force, **traversal_rules):
def node_delete(identifier, dry_run, force, clean_workdir, **traversal_rules):
"""Delete nodes from the provenance graph.

This will not only delete the nodes explicitly provided via the command line, but will also include
Expand All @@ -356,10 +361,62 @@ def _dry_run_callback(pks):
echo.echo_info('The nodes with the following pks would be deleted: ' + ' '.join(map(str, pks)))
return not click.confirm('Shall I continue?', abort=True)

_, was_deleted = delete_nodes(pks, dry_run=dry_run or _dry_run_callback, **traversal_rules)
def _perform_delete():
_, was_deleted = delete_nodes(pks, dry_run=dry_run or _dry_run_callback, **traversal_rules)
if was_deleted:
echo.echo_success('Finished deletion.')

if clean_workdir:
from aiida.manage import get_manager
from aiida.orm import CalcJobNode, QueryBuilder
from aiida.orm.utils.remote import clean_mapping_remote_paths, get_calcjob_remote_paths
from aiida.tools.graph.graph_traversers import get_nodes_delete

backend = get_manager().get_backend()
# For here we ignore missing nodes will be raised via func:``delete_nodes`` in the next block
pks_set_to_delete = get_nodes_delete(
pks, get_links=False, missing_callback=lambda missing_pks: None, backend=backend, **traversal_rules
)['nodes']

qb = QueryBuilder()
qb.append(CalcJobNode, filters={'id': {'in': pks_set_to_delete}}, project='id')
calcjobs_pks = [result[0] for result in qb.all()]

if not calcjobs_pks:
echo.echo_report('--clean-workdir ignored. No CalcJobNode associated with the given node, found.')
Comment on lines +385 to +386
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think you can do early return here to avoid nested if..else.

_perform_delete()
return

path_mapping = get_calcjob_remote_paths(
calcjobs_pks,
only_not_cleaned=True,
)

if not path_mapping:
echo.echo_report('--clean-workdir ignored. CalcJobNode work directories are already cleaned.')
_perform_delete()
return

descendant_pks = [remote_folder.pk for paths in path_mapping.values() for remote_folder in paths]

if not force and not dry_run:
echo.echo_warning(
f'YOU ARE ABOUT TO CLEAN {len(descendant_pks)} REMOTE DIRECTORIES! ' 'THIS CANNOT BE UNDONE!'
)
echo.echo_info(
'Remote directories of nodes with the following pks would be cleaned: '
+ ' '.join(map(str, descendant_pks))
)
click.confirm('Shall I continue?', abort=True)

if dry_run:
echo.echo_report(
'Remote folders of these node are marked for deletion: ' + ' '.join(map(str, descendant_pks))
)
else:
clean_mapping_remote_paths(path_mapping)

if was_deleted:
echo.echo_success('Finished deletion.')
_perform_delete()


@verdi_node.command('rehash')
Expand Down
32 changes: 30 additions & 2 deletions src/aiida/orm/utils/remote.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,12 +13,13 @@
import os
import typing as t

from aiida import orm
from aiida.cmdline.utils import echo
from aiida.orm.nodes.data.remote.base import RemoteData

if t.TYPE_CHECKING:
from collections.abc import Sequence

from aiida import orm
from aiida.orm.implementation import StorageBackend
from aiida.transports import Transport

Expand All @@ -45,6 +46,34 @@
pass


def clean_mapping_remote_paths(path_mapping, silent=False):
"""Clean the remote folders for a given mapping of computer UUIDs to a list of remote folders.

:param path_mapping: a dictionary where the keys are the computer UUIDs and the values are lists of remote folders
It's designed to accept the output of `get_calcjob_remote_paths`
:param transport: the transport to use to clean the remote folders
:param silent: if True, the `echo` output will be suppressed
"""

user = orm.User.collection.get_default()

if not user:
raise ValueError('No default user found')

Check warning on line 61 in src/aiida/orm/utils/remote.py

View check run for this annotation

Codecov / codecov/patch

src/aiida/orm/utils/remote.py#L61

Added line #L61 was not covered by tests

for computer_uuid, paths in path_mapping.items():
counter = 0
computer = orm.load_computer(uuid=computer_uuid)
transport = orm.AuthInfo.collection.get(dbcomputer_id=computer.pk, aiidauser_id=user.pk).get_transport()

with transport:
for remote_folder in paths:
remote_folder._clean(transport=transport)
counter += 1

if not silent:
echo.echo_success(f'{counter} remote folders cleaned on {computer.label}')


def get_calcjob_remote_paths(
pks: list[int] | None = None,
past_days: int | None = None,
Expand All @@ -70,7 +99,6 @@
"""
from datetime import timedelta

from aiida import orm
from aiida.common import timezone
from aiida.orm import CalcJobNode

Expand Down
Loading