From b65bdcc903c1cc2c1a8f28ce3aa2b674a068c76f Mon Sep 17 00:00:00 2001 From: VGPReys Date: Fri, 10 Apr 2026 10:43:48 +0200 Subject: [PATCH 1/9] removing analysis steps on run restart from --- src/haddock/clis/cli_analyse.py | 13 +++++++++---- src/haddock/core/defaults.py | 3 +++ src/haddock/gear/prepare_run.py | 8 ++++---- src/haddock/gear/restart_run.py | 4 ++-- 4 files changed, 18 insertions(+), 10 deletions(-) diff --git a/src/haddock/clis/cli_analyse.py b/src/haddock/clis/cli_analyse.py index 6ec4779a0a..8a61f6c07f 100644 --- a/src/haddock/clis/cli_analyse.py +++ b/src/haddock/clis/cli_analyse.py @@ -30,7 +30,7 @@ from haddock import log from haddock.clis.cli_unpack import main as haddock3_unpack from haddock.clis.cli_clean import main as haddock3_clean -from haddock.core.defaults import INTERACTIVE_RE_SUFFIX +from haddock.core.defaults import INTERACTIVE_RE_SUFFIX, ANA_FOLDER from haddock.core.typing import ( Any, ArgumentParser, @@ -63,7 +63,6 @@ from haddock.modules.analysis.caprieval import HaddockModule -ANA_FOLDER = "analysis" # name of the analysis folder INTER_STR = INTERACTIVE_RE_SUFFIX # suffix of interactive analysis folders @@ -758,9 +757,15 @@ def main( # get the module folders from the run_dir input sel_steps = get_module_steps_folders(Path("./"), modules) if inter: - sel_steps = [st for st in sel_steps if st.endswith(INTER_STR)] + sel_steps = [ + st for st in sel_steps + if st.endswith(INTERACTIVE_RE_SUFFIX) + ] else: - sel_steps = [st for st in sel_steps if not st.endswith(INTER_STR)] + sel_steps = [ + st for st in sel_steps + if not st.endswith(INTERACTIVE_RE_SUFFIX) + ] log.info(f"selected steps: {', '.join(sel_steps)}") # analysis diff --git a/src/haddock/core/defaults.py b/src/haddock/core/defaults.py index a2b504d4b5..a645053e7d 100644 --- a/src/haddock/core/defaults.py +++ b/src/haddock/core/defaults.py @@ -34,6 +34,9 @@ INTERACTIVE_RE_SUFFIX = "interactive" """Suffix added to interactive haddock3-re runs.""" +ANA_FOLDER = "analysis" +"""Name of the analysis folder.""" + MODULE_DEFAULT_YAML = "defaults.yaml" """Default name of the yaml default parameters file.""" diff --git a/src/haddock/gear/prepare_run.py b/src/haddock/gear/prepare_run.py index 8f1ee274b0..16c9d5407f 100644 --- a/src/haddock/gear/prepare_run.py +++ b/src/haddock/gear/prepare_run.py @@ -6,16 +6,14 @@ import json import os import shutil -import string import sys -import tarfile from contextlib import contextmanager, suppress from copy import copy, deepcopy from functools import lru_cache, wraps from pathlib import Path, PosixPath from haddock import EmptyPath, contact_us, haddock3_source_path, log -from haddock.core.defaults import RUNDIR, max_molecules_allowed +from haddock.core.defaults import ANA_FOLDER, RUNDIR, max_molecules_allowed from haddock.core.exceptions import ConfigurationError, ModuleError from haddock.core.typing import ( Any, @@ -333,6 +331,8 @@ def setup_run( remove_folders_after_number(general_params[RUNDIR], restart_from) _data_dir = Path(general_params[RUNDIR], "data") remove_folders_after_number(_data_dir, restart_from) + _analysis_dir = Path(general_params[RUNDIR], ANA_FOLDER) + remove_folders_after_number(_analysis_dir, restart_from) if restarting_from or starting_from_copy: # get run files in folder @@ -397,7 +397,7 @@ def setup_run( enhanced_haddock_params = deepcopy(general_params) enhanced_haddock_params.update(modules_params) config_files["enhanced_haddock_params"] = enhanced_haddock_params - config_saves = save_configuration_files(config_files, data_dir) # noqa : F841 + _config_saves = save_configuration_files(config_files, data_dir) # noqa : F841 if scratch_rest0: copy_molecules_to_data_dir( diff --git a/src/haddock/gear/restart_run.py b/src/haddock/gear/restart_run.py index a973bc9601..c8e114493a 100644 --- a/src/haddock/gear/restart_run.py +++ b/src/haddock/gear/restart_run.py @@ -62,6 +62,6 @@ def remove_folders_after_number(run_dir: Path, num: int) -> None: if int(folder.split('_')[0]) >= num ] # Loop over folders to remove - for torm_folder in from_num_folders: - remove_folder(Path(run_dir, torm_folder)) + for toremove_folder in from_num_folders: + remove_folder(Path(run_dir, toremove_folder)) return From d20c31b785e2eb2ffb8084f61d36080c0548e407 Mon Sep 17 00:00:00 2001 From: VGPReys Date: Fri, 10 Apr 2026 13:42:56 +0200 Subject: [PATCH 2/9] changelog update --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 42510301a6..fe743c7446 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,6 @@ # Changelog +- 2026-04-10: Removing analysis modules downstream when restarting - Issue #1495 - 2026-04-08: Improved docstring in modules - 2026-04-08: Increased NOE restraints array size in scoring modules - Issue #1501 - 2026-03-09: Automated type casting for optional argument seed in haddock3-restraints random_removal - Issue #1485 From 984e406d5c65a69b1262598b39b9ae70d7783a18 Mon Sep 17 00:00:00 2001 From: VGPReys Date: Sat, 11 Apr 2026 11:16:22 +0200 Subject: [PATCH 3/9] tests --- tests/test_gear_restart.py | 45 ++++++++++++++++++++++++++++++++++++-- 1 file changed, 43 insertions(+), 2 deletions(-) diff --git a/tests/test_gear_restart.py b/tests/test_gear_restart.py index 3f76810e4d..5f3a00dbb6 100644 --- a/tests/test_gear_restart.py +++ b/tests/test_gear_restart.py @@ -1,9 +1,15 @@ """Test gear.restart_run.""" import argparse - +import os import pytest +import tempfile -from haddock.gear import restart_run +from haddock.gear import ( + restart_run, + preprocess_restart_from, + ANA_DIR, + TRACEBACK_FOLDER, +) def test_has_help(): @@ -65,3 +71,38 @@ def test_restart_cli_error(n): ap.parse_args(f'--restart {n}'.split()) assert exit.type == SystemExit assert exit.value.code == 2 + + +def test_preprocess_restart_from(): + """Test removal of downstream directories.""" + with tempfile.TemporaryDirectory() as tmpdir: + # Build mimic of previous run directories + module_dirs = ["topoaa", "rigidbody", "caprieval"] + for module_index, module_name in module_dirs: + os.makedirs(f"{tmpdir}/data/{module_index}_{module_name}") + os.makedirs(f"{tmpdir}/{module_index}_{module_name}") + if module_name == "caprieval": + os.makedirs( + f"{tmpdir}/{ANA_DIR}/{module_index}_{module_name}_analysis" + ) + os.makedirs(f"{tmpdir}/{TRACEBACK_FOLDER}") + + # Test restart preprocessing function + preprocess_restart_from(tmpdir, 2) + + # Verify they were removed + expected_to_be_removed = [ + f"{tmpdir}/{TRACEBACK_FOLDER}", + f"{tmpdir}/{ANA_DIR}/2_caprieval_analysis", + f"{tmpdir}/2_caprieval", + ] + expected_to_stay_there = [ + f"{tmpdir}/1_rigidbody", + f"{tmpdir}/data/1_rigidbody", + f"{tmpdir}/0_topoaa", + f"{tmpdir}/data/0_topoaa", + ] + for should_not_exist in expected_to_be_removed: + assert not os.path.exists(should_not_exist) + for should_exist in expected_to_stay_there: + assert os.path.exists(should_exist) From 0fed00b8681bdafbf4ed22d45d6bb151fab434ef Mon Sep 17 00:00:00 2001 From: VGPReys Date: Sat, 11 Apr 2026 11:16:57 +0200 Subject: [PATCH 4/9] adding traceback + refactoring --- src/haddock/clis/cli_traceback.py | 12 +++++------- src/haddock/core/defaults.py | 3 +++ src/haddock/gear/prepare_run.py | 10 +++------- src/haddock/gear/restart_run.py | 27 +++++++++++++++++++++++++++ 4 files changed, 38 insertions(+), 14 deletions(-) diff --git a/src/haddock/clis/cli_traceback.py b/src/haddock/clis/cli_traceback.py index 584f447562..118b717808 100644 --- a/src/haddock/clis/cli_traceback.py +++ b/src/haddock/clis/cli_traceback.py @@ -17,6 +17,7 @@ import pandas as pd from haddock import log +from haddock.core.defaults import TRACEBACK_FOLDER from haddock.core.typing import Any, FilePath from haddock.libs import libcli from haddock.libs.libontology import ModuleIO, PDBFile @@ -24,9 +25,6 @@ from haddock.modules import get_module_steps_folders -TRACK_FOLDER = "traceback" # name of the traceback folder - - def get_steps_without_pdbs(run_dir, all_steps): """ Get the modules that do not produce PDB files. @@ -270,7 +268,7 @@ def main(run_dir: FilePath, offline: bool = False) -> None: log.info(f"Steps to trace back: {', '.join(sel_step)}") # creating traceback folder - outdir = Path(run_dir, TRACK_FOLDER) + outdir = Path(run_dir, TRACEBACK_FOLDER) try: outdir.mkdir(exist_ok=False) log.info(f"Created directory: {str(outdir.resolve())}") @@ -350,18 +348,18 @@ def main(run_dir: FilePath, offline: bool = False) -> None: # ordering the dataframe df_output = order_traceback_df(df_output, sel_step) # dumping the dataframe - track_filename = Path(run_dir, TRACK_FOLDER, "traceback.tsv") + track_filename = Path(run_dir, TRACEBACK_FOLDER, "traceback.tsv") log.info( f"Output dataframe {track_filename} " f"created with shape {df_output.shape}" ) df_output.to_csv(track_filename, sep="\t", index=False) # taking (and writing) a subset of the dataframe - consensus_filename = Path(run_dir, TRACK_FOLDER, "consensus.tsv") + consensus_filename = Path(run_dir, TRACEBACK_FOLDER, "consensus.tsv") rank_data_subset = subset_traceback(df_output, consensus_filename) # plotting the traceback dataframe - plot_filename = Path(run_dir, TRACK_FOLDER, "traceback.html") + plot_filename = Path(run_dir, TRACEBACK_FOLDER, "traceback.html") make_traceback_plot(rank_data_subset, plot_filename, offline=offline) return diff --git a/src/haddock/core/defaults.py b/src/haddock/core/defaults.py index a645053e7d..174f2feda4 100644 --- a/src/haddock/core/defaults.py +++ b/src/haddock/core/defaults.py @@ -37,6 +37,9 @@ ANA_FOLDER = "analysis" """Name of the analysis folder.""" +TRACEBACK_FOLDER = "traceback" +"""Name of the traceback folder.""" + MODULE_DEFAULT_YAML = "defaults.yaml" """Default name of the yaml default parameters file.""" diff --git a/src/haddock/gear/prepare_run.py b/src/haddock/gear/prepare_run.py index 16c9d5407f..d07c5ac099 100644 --- a/src/haddock/gear/prepare_run.py +++ b/src/haddock/gear/prepare_run.py @@ -13,7 +13,7 @@ from pathlib import Path, PosixPath from haddock import EmptyPath, contact_us, haddock3_source_path, log -from haddock.core.defaults import ANA_FOLDER, RUNDIR, max_molecules_allowed +from haddock.core.defaults import RUNDIR, max_molecules_allowed from haddock.core.exceptions import ConfigurationError, ModuleError from haddock.core.typing import ( Any, @@ -57,7 +57,7 @@ config_optional_general_parameters_dict, ) from haddock.gear.preprocessing import process_pdbs, read_additional_residues -from haddock.gear.restart_run import remove_folders_after_number +from haddock.gear.restart_run import preprocess_restart_from from haddock.gear.validations import ( v_rundir, validate_defaults_yaml, @@ -328,11 +328,7 @@ def setup_run( check_mandatory_argments_are_present(general_params) if restarting_from: - remove_folders_after_number(general_params[RUNDIR], restart_from) - _data_dir = Path(general_params[RUNDIR], "data") - remove_folders_after_number(_data_dir, restart_from) - _analysis_dir = Path(general_params[RUNDIR], ANA_FOLDER) - remove_folders_after_number(_analysis_dir, restart_from) + preprocess_restart_from(general_params[RUNDIR], restart_from) if restarting_from or starting_from_copy: # get run files in folder diff --git a/src/haddock/gear/restart_run.py b/src/haddock/gear/restart_run.py index c8e114493a..0f1a54971c 100644 --- a/src/haddock/gear/restart_run.py +++ b/src/haddock/gear/restart_run.py @@ -2,7 +2,9 @@ from argparse import ArgumentParser, ArgumentTypeError from functools import partial from pathlib import Path +from shutil import rmtree +from haddock.core.defaults import ANA_FOLDER, TRACEBACK_FOLDER from haddock.libs.libutil import non_negative_int, remove_folder from haddock.modules import get_module_steps_folders @@ -65,3 +67,28 @@ def remove_folders_after_number(run_dir: Path, num: int) -> None: for toremove_folder in from_num_folders: remove_folder(Path(run_dir, toremove_folder)) return + + +def preprocess_restart_from(rundir: str, restart_from: int) -> None: + """Remove all folders and files that are downstream of restart index. + + Also remove analyses directories and traceback. + + Parameters + ---------- + rundir : str + Workflow run directory + module_index : int + Index of module to restart from + """ + # Remove modules data after index + remove_folders_after_number(rundir, restart_from) + # Remove data for corresponding modules + _data_dir = Path(rundir, "data") + remove_folders_after_number(_data_dir, restart_from) + # Remove analysis folders after index + _analysis_dir = Path(rundir, ANA_FOLDER) + remove_folders_after_number(_analysis_dir, restart_from) + # Remove traceback directory + _traceback_dir = Path(rundir, TRACEBACK_FOLDER) + rmtree(_traceback_dir, ignore_errors=True) From dd947fcb50b561b63518b70141ad3c344f3d42cf Mon Sep 17 00:00:00 2001 From: VGPReys Date: Sat, 11 Apr 2026 11:38:45 +0200 Subject: [PATCH 5/9] fix test imports --- tests/test_gear_restart.py | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/tests/test_gear_restart.py b/tests/test_gear_restart.py index 5f3a00dbb6..2e4cb43ac3 100644 --- a/tests/test_gear_restart.py +++ b/tests/test_gear_restart.py @@ -4,12 +4,8 @@ import pytest import tempfile -from haddock.gear import ( - restart_run, - preprocess_restart_from, - ANA_DIR, - TRACEBACK_FOLDER, -) +from haddock.gear.restart_run import restart_run, preprocess_restart_from +from haddock.core.defaults import ANA_DIR, TRACEBACK_FOLDER def test_has_help(): From 45237a2d151f483578275482a29f0777a5a047bf Mon Sep 17 00:00:00 2001 From: VGPReys Date: Sat, 11 Apr 2026 11:43:14 +0200 Subject: [PATCH 6/9] fix test imports --- tests/test_gear_restart.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/test_gear_restart.py b/tests/test_gear_restart.py index 2e4cb43ac3..ec39755b63 100644 --- a/tests/test_gear_restart.py +++ b/tests/test_gear_restart.py @@ -4,7 +4,7 @@ import pytest import tempfile -from haddock.gear.restart_run import restart_run, preprocess_restart_from +from haddock.gear import restart_run from haddock.core.defaults import ANA_DIR, TRACEBACK_FOLDER @@ -84,7 +84,7 @@ def test_preprocess_restart_from(): os.makedirs(f"{tmpdir}/{TRACEBACK_FOLDER}") # Test restart preprocessing function - preprocess_restart_from(tmpdir, 2) + restart_run.preprocess_restart_from(tmpdir, 2) # Verify they were removed expected_to_be_removed = [ From 0ba42f1cf70d820a100af4e7ae01f7d58f084213 Mon Sep 17 00:00:00 2001 From: VGPReys Date: Sat, 11 Apr 2026 11:53:01 +0200 Subject: [PATCH 7/9] fix test imports --- tests/test_gear_restart.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/test_gear_restart.py b/tests/test_gear_restart.py index ec39755b63..aa7b5e7101 100644 --- a/tests/test_gear_restart.py +++ b/tests/test_gear_restart.py @@ -5,7 +5,7 @@ import tempfile from haddock.gear import restart_run -from haddock.core.defaults import ANA_DIR, TRACEBACK_FOLDER +from haddock.core.defaults import ANA_FOLDER, TRACEBACK_FOLDER def test_has_help(): @@ -79,7 +79,7 @@ def test_preprocess_restart_from(): os.makedirs(f"{tmpdir}/{module_index}_{module_name}") if module_name == "caprieval": os.makedirs( - f"{tmpdir}/{ANA_DIR}/{module_index}_{module_name}_analysis" + f"{tmpdir}/{ANA_FOLDER}/{module_index}_{module_name}_analysis" ) os.makedirs(f"{tmpdir}/{TRACEBACK_FOLDER}") @@ -89,7 +89,7 @@ def test_preprocess_restart_from(): # Verify they were removed expected_to_be_removed = [ f"{tmpdir}/{TRACEBACK_FOLDER}", - f"{tmpdir}/{ANA_DIR}/2_caprieval_analysis", + f"{tmpdir}/{ANA_FOLDER}/2_caprieval_analysis", f"{tmpdir}/2_caprieval", ] expected_to_stay_there = [ From 3c3f16564eded71ea90d5b42713b388114c466f0 Mon Sep 17 00:00:00 2001 From: VGPReys Date: Sat, 11 Apr 2026 12:00:41 +0200 Subject: [PATCH 8/9] fix enum --- tests/test_gear_restart.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_gear_restart.py b/tests/test_gear_restart.py index aa7b5e7101..ef04a61d5e 100644 --- a/tests/test_gear_restart.py +++ b/tests/test_gear_restart.py @@ -74,7 +74,7 @@ def test_preprocess_restart_from(): with tempfile.TemporaryDirectory() as tmpdir: # Build mimic of previous run directories module_dirs = ["topoaa", "rigidbody", "caprieval"] - for module_index, module_name in module_dirs: + for module_index, module_name in enumerate(module_dirs): os.makedirs(f"{tmpdir}/data/{module_index}_{module_name}") os.makedirs(f"{tmpdir}/{module_index}_{module_name}") if module_name == "caprieval": From b46c5e616c7abc477cf91ecba238ccd72ac07959 Mon Sep 17 00:00:00 2001 From: VGPReys Date: Sat, 11 Apr 2026 13:12:25 +0200 Subject: [PATCH 9/9] import Path --- tests/test_gear_restart.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tests/test_gear_restart.py b/tests/test_gear_restart.py index ef04a61d5e..4f802262d0 100644 --- a/tests/test_gear_restart.py +++ b/tests/test_gear_restart.py @@ -4,6 +4,8 @@ import pytest import tempfile +from pathlib import Path + from haddock.gear import restart_run from haddock.core.defaults import ANA_FOLDER, TRACEBACK_FOLDER @@ -84,7 +86,7 @@ def test_preprocess_restart_from(): os.makedirs(f"{tmpdir}/{TRACEBACK_FOLDER}") # Test restart preprocessing function - restart_run.preprocess_restart_from(tmpdir, 2) + restart_run.preprocess_restart_from(Path(tmpdir), 2) # Verify they were removed expected_to_be_removed = [