diff --git a/CHANGELOG.md b/CHANGELOG.md index 42510301a..fe743c744 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,6 @@ # Changelog +- 2026-04-10: Removing analysis modules downstream when restarting - Issue #1495 - 2026-04-08: Improved docstring in modules - 2026-04-08: Increased NOE restraints array size in scoring modules - Issue #1501 - 2026-03-09: Automated type casting for optional argument seed in haddock3-restraints random_removal - Issue #1485 diff --git a/src/haddock/clis/cli_analyse.py b/src/haddock/clis/cli_analyse.py index 6ec4779a0..8a61f6c07 100644 --- a/src/haddock/clis/cli_analyse.py +++ b/src/haddock/clis/cli_analyse.py @@ -30,7 +30,7 @@ from haddock import log from haddock.clis.cli_unpack import main as haddock3_unpack from haddock.clis.cli_clean import main as haddock3_clean -from haddock.core.defaults import INTERACTIVE_RE_SUFFIX +from haddock.core.defaults import INTERACTIVE_RE_SUFFIX, ANA_FOLDER from haddock.core.typing import ( Any, ArgumentParser, @@ -63,7 +63,6 @@ from haddock.modules.analysis.caprieval import HaddockModule -ANA_FOLDER = "analysis" # name of the analysis folder INTER_STR = INTERACTIVE_RE_SUFFIX # suffix of interactive analysis folders @@ -758,9 +757,15 @@ def main( # get the module folders from the run_dir input sel_steps = get_module_steps_folders(Path("./"), modules) if inter: - sel_steps = [st for st in sel_steps if st.endswith(INTER_STR)] + sel_steps = [ + st for st in sel_steps + if st.endswith(INTERACTIVE_RE_SUFFIX) + ] else: - sel_steps = [st for st in sel_steps if not st.endswith(INTER_STR)] + sel_steps = [ + st for st in sel_steps + if not st.endswith(INTERACTIVE_RE_SUFFIX) + ] log.info(f"selected steps: {', '.join(sel_steps)}") # analysis diff --git a/src/haddock/clis/cli_traceback.py b/src/haddock/clis/cli_traceback.py index 584f44756..118b71780 100644 --- a/src/haddock/clis/cli_traceback.py +++ b/src/haddock/clis/cli_traceback.py @@ -17,6 +17,7 @@ import pandas as pd from haddock import log +from haddock.core.defaults import TRACEBACK_FOLDER from haddock.core.typing import Any, FilePath from haddock.libs import libcli from haddock.libs.libontology import ModuleIO, PDBFile @@ -24,9 +25,6 @@ from haddock.modules import get_module_steps_folders -TRACK_FOLDER = "traceback" # name of the traceback folder - - def get_steps_without_pdbs(run_dir, all_steps): """ Get the modules that do not produce PDB files. @@ -270,7 +268,7 @@ def main(run_dir: FilePath, offline: bool = False) -> None: log.info(f"Steps to trace back: {', '.join(sel_step)}") # creating traceback folder - outdir = Path(run_dir, TRACK_FOLDER) + outdir = Path(run_dir, TRACEBACK_FOLDER) try: outdir.mkdir(exist_ok=False) log.info(f"Created directory: {str(outdir.resolve())}") @@ -350,18 +348,18 @@ def main(run_dir: FilePath, offline: bool = False) -> None: # ordering the dataframe df_output = order_traceback_df(df_output, sel_step) # dumping the dataframe - track_filename = Path(run_dir, TRACK_FOLDER, "traceback.tsv") + track_filename = Path(run_dir, TRACEBACK_FOLDER, "traceback.tsv") log.info( f"Output dataframe {track_filename} " f"created with shape {df_output.shape}" ) df_output.to_csv(track_filename, sep="\t", index=False) # taking (and writing) a subset of the dataframe - consensus_filename = Path(run_dir, TRACK_FOLDER, "consensus.tsv") + consensus_filename = Path(run_dir, TRACEBACK_FOLDER, "consensus.tsv") rank_data_subset = subset_traceback(df_output, consensus_filename) # plotting the traceback dataframe - plot_filename = Path(run_dir, TRACK_FOLDER, "traceback.html") + plot_filename = Path(run_dir, TRACEBACK_FOLDER, "traceback.html") make_traceback_plot(rank_data_subset, plot_filename, offline=offline) return diff --git a/src/haddock/core/defaults.py b/src/haddock/core/defaults.py index a2b504d4b..174f2feda 100644 --- a/src/haddock/core/defaults.py +++ b/src/haddock/core/defaults.py @@ -34,6 +34,12 @@ INTERACTIVE_RE_SUFFIX = "interactive" """Suffix added to interactive haddock3-re runs.""" +ANA_FOLDER = "analysis" +"""Name of the analysis folder.""" + +TRACEBACK_FOLDER = "traceback" +"""Name of the traceback folder.""" + MODULE_DEFAULT_YAML = "defaults.yaml" """Default name of the yaml default parameters file.""" diff --git a/src/haddock/gear/prepare_run.py b/src/haddock/gear/prepare_run.py index 8f1ee274b..d07c5ac09 100644 --- a/src/haddock/gear/prepare_run.py +++ b/src/haddock/gear/prepare_run.py @@ -6,9 +6,7 @@ import json import os import shutil -import string import sys -import tarfile from contextlib import contextmanager, suppress from copy import copy, deepcopy from functools import lru_cache, wraps @@ -59,7 +57,7 @@ config_optional_general_parameters_dict, ) from haddock.gear.preprocessing import process_pdbs, read_additional_residues -from haddock.gear.restart_run import remove_folders_after_number +from haddock.gear.restart_run import preprocess_restart_from from haddock.gear.validations import ( v_rundir, validate_defaults_yaml, @@ -330,9 +328,7 @@ def setup_run( check_mandatory_argments_are_present(general_params) if restarting_from: - remove_folders_after_number(general_params[RUNDIR], restart_from) - _data_dir = Path(general_params[RUNDIR], "data") - remove_folders_after_number(_data_dir, restart_from) + preprocess_restart_from(general_params[RUNDIR], restart_from) if restarting_from or starting_from_copy: # get run files in folder @@ -397,7 +393,7 @@ def setup_run( enhanced_haddock_params = deepcopy(general_params) enhanced_haddock_params.update(modules_params) config_files["enhanced_haddock_params"] = enhanced_haddock_params - config_saves = save_configuration_files(config_files, data_dir) # noqa : F841 + _config_saves = save_configuration_files(config_files, data_dir) # noqa : F841 if scratch_rest0: copy_molecules_to_data_dir( diff --git a/src/haddock/gear/restart_run.py b/src/haddock/gear/restart_run.py index a973bc960..0f1a54971 100644 --- a/src/haddock/gear/restart_run.py +++ b/src/haddock/gear/restart_run.py @@ -2,7 +2,9 @@ from argparse import ArgumentParser, ArgumentTypeError from functools import partial from pathlib import Path +from shutil import rmtree +from haddock.core.defaults import ANA_FOLDER, TRACEBACK_FOLDER from haddock.libs.libutil import non_negative_int, remove_folder from haddock.modules import get_module_steps_folders @@ -62,6 +64,31 @@ def remove_folders_after_number(run_dir: Path, num: int) -> None: if int(folder.split('_')[0]) >= num ] # Loop over folders to remove - for torm_folder in from_num_folders: - remove_folder(Path(run_dir, torm_folder)) + for toremove_folder in from_num_folders: + remove_folder(Path(run_dir, toremove_folder)) return + + +def preprocess_restart_from(rundir: str, restart_from: int) -> None: + """Remove all folders and files that are downstream of restart index. + + Also remove analyses directories and traceback. + + Parameters + ---------- + rundir : str + Workflow run directory + module_index : int + Index of module to restart from + """ + # Remove modules data after index + remove_folders_after_number(rundir, restart_from) + # Remove data for corresponding modules + _data_dir = Path(rundir, "data") + remove_folders_after_number(_data_dir, restart_from) + # Remove analysis folders after index + _analysis_dir = Path(rundir, ANA_FOLDER) + remove_folders_after_number(_analysis_dir, restart_from) + # Remove traceback directory + _traceback_dir = Path(rundir, TRACEBACK_FOLDER) + rmtree(_traceback_dir, ignore_errors=True) diff --git a/tests/test_gear_restart.py b/tests/test_gear_restart.py index 3f76810e4..4f802262d 100644 --- a/tests/test_gear_restart.py +++ b/tests/test_gear_restart.py @@ -1,9 +1,13 @@ """Test gear.restart_run.""" import argparse - +import os import pytest +import tempfile + +from pathlib import Path from haddock.gear import restart_run +from haddock.core.defaults import ANA_FOLDER, TRACEBACK_FOLDER def test_has_help(): @@ -65,3 +69,38 @@ def test_restart_cli_error(n): ap.parse_args(f'--restart {n}'.split()) assert exit.type == SystemExit assert exit.value.code == 2 + + +def test_preprocess_restart_from(): + """Test removal of downstream directories.""" + with tempfile.TemporaryDirectory() as tmpdir: + # Build mimic of previous run directories + module_dirs = ["topoaa", "rigidbody", "caprieval"] + for module_index, module_name in enumerate(module_dirs): + os.makedirs(f"{tmpdir}/data/{module_index}_{module_name}") + os.makedirs(f"{tmpdir}/{module_index}_{module_name}") + if module_name == "caprieval": + os.makedirs( + f"{tmpdir}/{ANA_FOLDER}/{module_index}_{module_name}_analysis" + ) + os.makedirs(f"{tmpdir}/{TRACEBACK_FOLDER}") + + # Test restart preprocessing function + restart_run.preprocess_restart_from(Path(tmpdir), 2) + + # Verify they were removed + expected_to_be_removed = [ + f"{tmpdir}/{TRACEBACK_FOLDER}", + f"{tmpdir}/{ANA_FOLDER}/2_caprieval_analysis", + f"{tmpdir}/2_caprieval", + ] + expected_to_stay_there = [ + f"{tmpdir}/1_rigidbody", + f"{tmpdir}/data/1_rigidbody", + f"{tmpdir}/0_topoaa", + f"{tmpdir}/data/0_topoaa", + ] + for should_not_exist in expected_to_be_removed: + assert not os.path.exists(should_not_exist) + for should_exist in expected_to_stay_there: + assert os.path.exists(should_exist)