Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
65 changes: 51 additions & 14 deletions integration_tests/test_contactmap.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
"""Integration-test of the CONTact MAP module."""

import os
import pytest
import pytest_mock # noqa : F401
import tempfile
from pathlib import Path
import glob


from haddock.libs.libontology import PDBFile
from haddock.modules.analysis.contactmap import HaddockModule as CMapModule
Expand All @@ -20,11 +22,11 @@ def contactmap():
order=0,
path=Path(tmpdir),
initial_params=CONTMAP_CONF,
)
)
yield preset_contactmap


class MockPreviousIO():
class MockPreviousIO:
"""A mocking class holding the retrieve_models method."""

def __init__(self, path):
Expand All @@ -33,13 +35,17 @@ def __init__(self, path):
def retrieve_models(individualize: bool = False):
"""Provide a set of models."""
models = [
PDBFile(Path(GOLDEN_DATA, "contactmap_rigidbody_3_cltid_None.pdb"),
path=GOLDEN_DATA),
PDBFile(Path(GOLDEN_DATA, "contactmap_rigidbody_5_clt_1.pdb"),
path=GOLDEN_DATA),
PDBFile(Path(GOLDEN_DATA, "contactmap_rigidbody_7_clt_1.pdb"),
path=GOLDEN_DATA),
]
PDBFile(
Path(GOLDEN_DATA, "contactmap_rigidbody_3_cltid_None.pdb"),
path=GOLDEN_DATA,
),
PDBFile(
Path(GOLDEN_DATA, "contactmap_rigidbody_5_clt_1.pdb"), path=GOLDEN_DATA
),
PDBFile(
Path(GOLDEN_DATA, "contactmap_rigidbody_7_clt_1.pdb"), path=GOLDEN_DATA
),
]
# set models cluster ids
models[0].clt_id = None
models[1].clt_id = 1
Expand All @@ -49,22 +55,27 @@ def retrieve_models(individualize: bool = False):
return models


def test_contactmap_example(contactmap, mocker):
def test_contactmap_example(contactmap, monkeypatch, mocker):
"""Test the contact map module run."""
# mock the previous_io behavior
contactmap.previous_io = MockPreviousIO
# mock the export_io_models function
mocker.patch(
"haddock.modules.BaseHaddockModule.export_io_models",
return_value=None,
)
)
monkeypatch.chdir(contactmap.path)
# Run the module
contactmap.run()
# check outputs
output_bp = contactmap.path
# clt_id == None
clustNone_tsv_fpath = f"{output_bp}/Unclustered_contactmap_rigidbody_3_cltid_None_contacts.tsv" # noqa : E501
clustNone_html_fpath = f"{output_bp}/Unclustered_contactmap_rigidbody_3_cltid_None_heatmap.html" # noqa : E501
clustNone_tsv_fpath = (
f"{output_bp}/Unclustered_contactmap_rigidbody_3_cltid_None_contacts.tsv"
)
clustNone_html_fpath = (
f"{output_bp}/Unclustered_contactmap_rigidbody_3_cltid_None_heatmap.html"
)
assert os.path.exists(clustNone_tsv_fpath)
assert Path(clustNone_tsv_fpath).stat().st_size != 0
assert os.path.exists(clustNone_html_fpath)
Expand All @@ -81,3 +92,29 @@ def test_contactmap_example(contactmap, mocker):
assert Path(clust1_html_fpath).stat().st_size != 0
Path(clust1_tsv_fpath).unlink(missing_ok=False)
Path(clust1_html_fpath).unlink(missing_ok=False)


def test_contactmap_low_memory(contactmap, monkeypatch, mocker):
"""Test the contact map module fails gracefully with insufficient memory."""
contactmap.previous_io = MockPreviousIO
mocker.patch(
"haddock.modules.BaseHaddockModule.export_io_models",
return_value=None,
)

mocker.patch(
"haddock.modules.analysis.contactmap.get_available_memory",
return_value=0.0,
)
mocker.patch(
"haddock.modules.analysis.contactmap.get_necessary_memory",
return_value=1.0,
)
monkeypatch.chdir(contactmap.path)

# Run the module - should skip execution due to low memory
contactmap.run()

# Check that the directory is empty
ls = list(glob.glob(f"{contactmap.path}/*"))
assert len(ls) == 0
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ dependencies = [
'prodigy-lig==1.1.4',
'plotly==6.5.2',
'freesasa>=2.2.1',
'psutil>=7.2.2',
]

[project.optional-dependencies]
Expand Down
52 changes: 52 additions & 0 deletions src/haddock/libs/libutil.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@
from importlib.resources import files
from pathlib import Path

import psutil

import haddock
from haddock import EmptyPath, log
from haddock.core.exceptions import SetupError
Expand Down Expand Up @@ -524,3 +526,53 @@ def get_prodrg_exec() -> tuple[Optional[Path], Optional[Path]]:
return None, None

return prodrg_exec, prodrg_param


def get_available_memory() -> float:
"""
Get the available system memory in GB.

Get the available virtual memory in Bytes
then divide it by (1024 ^ 3) to get this value in GigaBytes

Returns
-------
float
Available memory in gigabytes (GB).
"""
return psutil.virtual_memory().available / (1024**3)


def get_necessary_memory(models: list) -> float:
"""
Estimate the memory required to compute contact maps.

Calculates memory based on the largest model's estimated atom count.
The memory estimate assumes a distance matrix of size (atoms x atoms) * 8 bytes.

Parameters
----------
models : list
List of model objects with file_name attribute.

Returns
-------
float
Estimated memory requirement in gigabytes (GB).
"""
# Compute an approximation of the matrix size
# Most models will be similar, so just use the first one
try:
file_size = os.path.getsize(models[0].file_name)
estimated_atoms = file_size // 10
except Exception:
estimated_atoms = 10000

if estimated_atoms == 0:
estimated_atoms = 10000

matrix_size_bytes = (estimated_atoms * estimated_atoms) * 8
# Convert it into GigaBytes
matrix_size_gb = matrix_size_bytes / (1024**3)

return matrix_size_gb
52 changes: 42 additions & 10 deletions src/haddock/modules/analysis/contactmap/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,11 @@
get_clusters_sets,
make_contactmap_report,
topX_models,
)
)
from haddock.libs.libutil import (
get_available_memory,
get_necessary_memory,
)


RECIPE_PATH = Path(__file__).resolve().parent
Expand All @@ -40,13 +44,13 @@ class HaddockModule(BaseHaddockModule):
name = RECIPE_PATH.name

def __init__(
self,
order: int,
path: Path,
*ignore: Any,
init_params: FilePath = DEFAULT_CONFIG,
**everything: Any,
) -> None:
self,
order: int,
path: Path,
*ignore: Any,
init_params: FilePath = DEFAULT_CONFIG,
**everything: Any,
) -> None:
"""Initialize class."""
super().__init__(order, path, init_params)

Expand All @@ -61,11 +65,39 @@ def _run(self) -> None:
if type(self.previous_io) == iter:
_e = "This module cannot come after one that produced an iterable."
self.finish_with_error(_e)
models = []
try:
models = self.previous_io.retrieve_models(individualize=True)
except AttributeError as e:
self.finish_with_error(e)

# === IMPORTANT ================================================================
# This modules uses a NxN distance matrix, this means that the memory
# requirement will increase quadratically and can fail with an out-of-memory
# error. Changing this behaviour would require a total re-write of the module
# as of 04-2026 so instead we apply the following workaround:
# - Check what is the total size of the models (size is faster than reading)
# - Guesstimate how many atoms in total it would have based on the size
# - Calculate the expected matrix size and its memory requirements
# - Get how much memory the current host system has
# - If the system has less memory than needed, fail graciously
current_memory = get_available_memory()
needed_memory = get_necessary_memory(models) * self.params["ncores"]
if current_memory < needed_memory:
self.log(
msg=(
f"Not enough memory to execute `contactmap` "
f"(needs {needed_memory:.2f}Gb has {current_memory:.2f}Gb). "
"! Skipping this module !"
),
level="warning",
)
self.output_models = models
self.export_io_models()
return

# ==============================================================================

# Obtain clusters
clusters_sets = get_clusters_sets(models)

Expand All @@ -92,7 +124,7 @@ def _run(self) -> None:
Path(model.rel_path),
Path(jobname),
single_models_params,
)
)
contact_jobs.append(contmap_job)

# For clustered models
Expand All @@ -106,7 +138,7 @@ def _run(self) -> None:
[Path(model.rel_path) for model in clt_models],
Path(name),
self.params,
)
)
contact_jobs.append(contmap_job)

# Find execution engine
Expand Down
71 changes: 68 additions & 3 deletions tests/test_module_contmap.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,7 @@

from haddock.libs.libontology import PDBFile
from haddock.modules.analysis.contactmap import DEFAULT_CONFIG
from haddock.modules.analysis.contactmap import \
HaddockModule as ContactMapModule
from haddock.modules.analysis.contactmap import HaddockModule as ContactMapModule
from haddock.modules.analysis.contactmap.contmap import (
PI,
ClusteredContactMap,
Expand All @@ -35,7 +34,11 @@
topX_models,
within_2PI,
write_res_contacts,
)
)
from haddock.libs.libutil import (
get_available_memory,
get_necessary_memory,
)

from . import golden_data

Expand Down Expand Up @@ -625,3 +628,65 @@ def test_make_ideogram_arc_moduloAB():
assert arc_positions.shape == excpected_output.shape
for i in range(nb_points):
assert np.isclose(arc_positions[i], excpected_output[i], atol=0.0001)


def test_get_available_memory():
"""Test get_available_memory function."""
memory = get_available_memory()
# Should return a positive float (system memory)
assert isinstance(memory, float)
assert memory > 0


def test_get_necessary_memory():
"""Test get_necessary_memory function with valid models."""
models = [
PDBFile(Path(golden_data, "protprot_complex_1.pdb"), path=golden_data),
PDBFile(Path(golden_data, "protprot_complex_2.pdb"), path=golden_data),
]
memory = get_necessary_memory(models)
# Should return a positive float
assert isinstance(memory, float)
assert memory > 0


def test_get_necessary_memory_empty_list():
"""Test get_necessary_memory with empty list."""
models = []
memory = get_necessary_memory(models)
# Should fall back to default estimate
assert isinstance(memory, float)
assert memory > 0


def test_get_necessary_memory_invalid_files():
"""Test get_necessary_memory with invalid file paths."""
from unittest.mock import patch

# Mock os.path.getsize to raise an exception
with patch("os.path.getsize") as mock_getsize:
mock_getsize.side_effect = FileNotFoundError("File not found")

models = [
PDBFile(Path(golden_data, "protprot_complex_1.pdb"), path=golden_data),
]
memory = get_necessary_memory(models)
# Should fall back to default estimate
assert isinstance(memory, float)
assert memory > 0


def test_get_necessary_memory_zero_bytes():
"""Test get_necessary_memory with zero-byte files."""
with tempfile.TemporaryDirectory() as tempdir:
# Create a zero-byte file
zero_file = Path(tempdir, "empty.pdb")
zero_file.write_text("")

models = [
PDBFile(zero_file, path=str(zero_file.parent)),
]
memory = get_necessary_memory(models)
# Should use default fallback (10000 atoms)
assert isinstance(memory, float)
assert memory > 0
Loading