Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
# Changelog

- 2026-04-10: Removing analysis modules downstream when restarting - Issue #1495
- 2026-04-08: Improved docstring in modules
- 2026-04-08: Increased NOE restraints array size in scoring modules - Issue #1501
- 2026-03-09: Automated type casting for optional argument seed in haddock3-restraints random_removal - Issue #1485
Expand Down
13 changes: 9 additions & 4 deletions src/haddock/clis/cli_analyse.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@
from haddock import log
from haddock.clis.cli_unpack import main as haddock3_unpack
from haddock.clis.cli_clean import main as haddock3_clean
from haddock.core.defaults import INTERACTIVE_RE_SUFFIX
from haddock.core.defaults import INTERACTIVE_RE_SUFFIX, ANA_FOLDER
from haddock.core.typing import (
Any,
ArgumentParser,
Expand Down Expand Up @@ -63,7 +63,6 @@
from haddock.modules.analysis.caprieval import HaddockModule


ANA_FOLDER = "analysis" # name of the analysis folder
INTER_STR = INTERACTIVE_RE_SUFFIX # suffix of interactive analysis folders


Expand Down Expand Up @@ -758,9 +757,15 @@ def main(
# get the module folders from the run_dir input
sel_steps = get_module_steps_folders(Path("./"), modules)
if inter:
sel_steps = [st for st in sel_steps if st.endswith(INTER_STR)]
sel_steps = [
st for st in sel_steps
if st.endswith(INTERACTIVE_RE_SUFFIX)
]
else:
sel_steps = [st for st in sel_steps if not st.endswith(INTER_STR)]
sel_steps = [
st for st in sel_steps
if not st.endswith(INTERACTIVE_RE_SUFFIX)
]
log.info(f"selected steps: {', '.join(sel_steps)}")

# analysis
Expand Down
12 changes: 5 additions & 7 deletions src/haddock/clis/cli_traceback.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,16 +17,14 @@
import pandas as pd

from haddock import log
from haddock.core.defaults import TRACEBACK_FOLDER
from haddock.core.typing import Any, FilePath
from haddock.libs import libcli
from haddock.libs.libontology import ModuleIO, PDBFile
from haddock.libs.libplots import make_traceback_plot
from haddock.modules import get_module_steps_folders


TRACK_FOLDER = "traceback" # name of the traceback folder


def get_steps_without_pdbs(run_dir, all_steps):
"""
Get the modules that do not produce PDB files.
Expand Down Expand Up @@ -270,7 +268,7 @@ def main(run_dir: FilePath, offline: bool = False) -> None:
log.info(f"Steps to trace back: {', '.join(sel_step)}")

# creating traceback folder
outdir = Path(run_dir, TRACK_FOLDER)
outdir = Path(run_dir, TRACEBACK_FOLDER)
try:
outdir.mkdir(exist_ok=False)
log.info(f"Created directory: {str(outdir.resolve())}")
Expand Down Expand Up @@ -350,18 +348,18 @@ def main(run_dir: FilePath, offline: bool = False) -> None:
# ordering the dataframe
df_output = order_traceback_df(df_output, sel_step)
# dumping the dataframe
track_filename = Path(run_dir, TRACK_FOLDER, "traceback.tsv")
track_filename = Path(run_dir, TRACEBACK_FOLDER, "traceback.tsv")
log.info(
f"Output dataframe {track_filename} " f"created with shape {df_output.shape}"
)
df_output.to_csv(track_filename, sep="\t", index=False)

# taking (and writing) a subset of the dataframe
consensus_filename = Path(run_dir, TRACK_FOLDER, "consensus.tsv")
consensus_filename = Path(run_dir, TRACEBACK_FOLDER, "consensus.tsv")
rank_data_subset = subset_traceback(df_output, consensus_filename)

# plotting the traceback dataframe
plot_filename = Path(run_dir, TRACK_FOLDER, "traceback.html")
plot_filename = Path(run_dir, TRACEBACK_FOLDER, "traceback.html")
make_traceback_plot(rank_data_subset, plot_filename, offline=offline)
return

Expand Down
6 changes: 6 additions & 0 deletions src/haddock/core/defaults.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,12 @@
INTERACTIVE_RE_SUFFIX = "interactive"
"""Suffix added to interactive haddock3-re runs."""

ANA_FOLDER = "analysis"
"""Name of the analysis folder."""

TRACEBACK_FOLDER = "traceback"
"""Name of the traceback folder."""

MODULE_DEFAULT_YAML = "defaults.yaml"
"""Default name of the yaml default parameters file."""

Expand Down
10 changes: 3 additions & 7 deletions src/haddock/gear/prepare_run.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,7 @@
import json
import os
import shutil
import string
import sys
import tarfile
from contextlib import contextmanager, suppress
from copy import copy, deepcopy
from functools import lru_cache, wraps
Expand Down Expand Up @@ -59,7 +57,7 @@
config_optional_general_parameters_dict,
)
from haddock.gear.preprocessing import process_pdbs, read_additional_residues
from haddock.gear.restart_run import remove_folders_after_number
from haddock.gear.restart_run import preprocess_restart_from
from haddock.gear.validations import (
v_rundir,
validate_defaults_yaml,
Expand Down Expand Up @@ -330,9 +328,7 @@ def setup_run(
check_mandatory_argments_are_present(general_params)

if restarting_from:
remove_folders_after_number(general_params[RUNDIR], restart_from)
_data_dir = Path(general_params[RUNDIR], "data")
remove_folders_after_number(_data_dir, restart_from)
preprocess_restart_from(general_params[RUNDIR], restart_from)

if restarting_from or starting_from_copy:
# get run files in folder
Expand Down Expand Up @@ -397,7 +393,7 @@ def setup_run(
enhanced_haddock_params = deepcopy(general_params)
enhanced_haddock_params.update(modules_params)
config_files["enhanced_haddock_params"] = enhanced_haddock_params
config_saves = save_configuration_files(config_files, data_dir) # noqa : F841
_config_saves = save_configuration_files(config_files, data_dir) # noqa : F841

if scratch_rest0:
copy_molecules_to_data_dir(
Expand Down
31 changes: 29 additions & 2 deletions src/haddock/gear/restart_run.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,9 @@
from argparse import ArgumentParser, ArgumentTypeError
from functools import partial
from pathlib import Path
from shutil import rmtree

from haddock.core.defaults import ANA_FOLDER, TRACEBACK_FOLDER
from haddock.libs.libutil import non_negative_int, remove_folder
from haddock.modules import get_module_steps_folders

Expand Down Expand Up @@ -62,6 +64,31 @@ def remove_folders_after_number(run_dir: Path, num: int) -> None:
if int(folder.split('_')[0]) >= num
]
# Loop over folders to remove
for torm_folder in from_num_folders:
remove_folder(Path(run_dir, torm_folder))
for toremove_folder in from_num_folders:
remove_folder(Path(run_dir, toremove_folder))
return


def preprocess_restart_from(rundir: str, restart_from: int) -> None:
"""Remove all folders and files that are downstream of restart index.

Also remove analyses directories and traceback.

Parameters
----------
rundir : str
Workflow run directory
module_index : int
Index of module to restart from
"""
# Remove modules data after index
remove_folders_after_number(rundir, restart_from)
# Remove data for corresponding modules
_data_dir = Path(rundir, "data")
remove_folders_after_number(_data_dir, restart_from)
# Remove analysis folders after index
_analysis_dir = Path(rundir, ANA_FOLDER)
remove_folders_after_number(_analysis_dir, restart_from)
# Remove traceback directory
_traceback_dir = Path(rundir, TRACEBACK_FOLDER)
rmtree(_traceback_dir, ignore_errors=True)
41 changes: 40 additions & 1 deletion tests/test_gear_restart.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,13 @@
"""Test gear.restart_run."""
import argparse

import os
import pytest
import tempfile

from pathlib import Path

from haddock.gear import restart_run
from haddock.core.defaults import ANA_FOLDER, TRACEBACK_FOLDER


def test_has_help():
Expand Down Expand Up @@ -65,3 +69,38 @@ def test_restart_cli_error(n):
ap.parse_args(f'--restart {n}'.split())
assert exit.type == SystemExit
assert exit.value.code == 2


def test_preprocess_restart_from():
"""Test removal of downstream directories."""
with tempfile.TemporaryDirectory() as tmpdir:
# Build mimic of previous run directories
module_dirs = ["topoaa", "rigidbody", "caprieval"]
for module_index, module_name in enumerate(module_dirs):
os.makedirs(f"{tmpdir}/data/{module_index}_{module_name}")
os.makedirs(f"{tmpdir}/{module_index}_{module_name}")
if module_name == "caprieval":
os.makedirs(
f"{tmpdir}/{ANA_FOLDER}/{module_index}_{module_name}_analysis"
)
os.makedirs(f"{tmpdir}/{TRACEBACK_FOLDER}")

# Test restart preprocessing function
restart_run.preprocess_restart_from(Path(tmpdir), 2)

# Verify they were removed
expected_to_be_removed = [
f"{tmpdir}/{TRACEBACK_FOLDER}",
f"{tmpdir}/{ANA_FOLDER}/2_caprieval_analysis",
f"{tmpdir}/2_caprieval",
]
expected_to_stay_there = [
f"{tmpdir}/1_rigidbody",
f"{tmpdir}/data/1_rigidbody",
f"{tmpdir}/0_topoaa",
f"{tmpdir}/data/0_topoaa",
]
for should_not_exist in expected_to_be_removed:
assert not os.path.exists(should_not_exist)
for should_exist in expected_to_stay_there:
assert os.path.exists(should_exist)
Loading