diff --git a/integration_tests/test_contactmap.py b/integration_tests/test_contactmap.py index 74385ca220..091302a2f2 100644 --- a/integration_tests/test_contactmap.py +++ b/integration_tests/test_contactmap.py @@ -1,9 +1,11 @@ """Integration-test of the CONTact MAP module.""" + import os import pytest -import pytest_mock # noqa : F401 import tempfile from pathlib import Path +import glob + from haddock.libs.libontology import PDBFile from haddock.modules.analysis.contactmap import HaddockModule as CMapModule @@ -20,11 +22,11 @@ def contactmap(): order=0, path=Path(tmpdir), initial_params=CONTMAP_CONF, - ) + ) yield preset_contactmap -class MockPreviousIO(): +class MockPreviousIO: """A mocking class holding the retrieve_models method.""" def __init__(self, path): @@ -33,13 +35,17 @@ def __init__(self, path): def retrieve_models(individualize: bool = False): """Provide a set of models.""" models = [ - PDBFile(Path(GOLDEN_DATA, "contactmap_rigidbody_3_cltid_None.pdb"), - path=GOLDEN_DATA), - PDBFile(Path(GOLDEN_DATA, "contactmap_rigidbody_5_clt_1.pdb"), - path=GOLDEN_DATA), - PDBFile(Path(GOLDEN_DATA, "contactmap_rigidbody_7_clt_1.pdb"), - path=GOLDEN_DATA), - ] + PDBFile( + Path(GOLDEN_DATA, "contactmap_rigidbody_3_cltid_None.pdb"), + path=GOLDEN_DATA, + ), + PDBFile( + Path(GOLDEN_DATA, "contactmap_rigidbody_5_clt_1.pdb"), path=GOLDEN_DATA + ), + PDBFile( + Path(GOLDEN_DATA, "contactmap_rigidbody_7_clt_1.pdb"), path=GOLDEN_DATA + ), + ] # set models cluster ids models[0].clt_id = None models[1].clt_id = 1 @@ -49,7 +55,7 @@ def retrieve_models(individualize: bool = False): return models -def test_contactmap_example(contactmap, mocker): +def test_contactmap_example(contactmap, monkeypatch, mocker): """Test the contact map module run.""" # mock the previous_io behavior contactmap.previous_io = MockPreviousIO @@ -57,14 +63,19 @@ def test_contactmap_example(contactmap, mocker): mocker.patch( "haddock.modules.BaseHaddockModule.export_io_models", return_value=None, - ) + ) + monkeypatch.chdir(contactmap.path) # Run the module contactmap.run() # check outputs output_bp = contactmap.path # clt_id == None - clustNone_tsv_fpath = f"{output_bp}/Unclustered_contactmap_rigidbody_3_cltid_None_contacts.tsv" # noqa : E501 - clustNone_html_fpath = f"{output_bp}/Unclustered_contactmap_rigidbody_3_cltid_None_heatmap.html" # noqa : E501 + clustNone_tsv_fpath = ( + f"{output_bp}/Unclustered_contactmap_rigidbody_3_cltid_None_contacts.tsv" + ) + clustNone_html_fpath = ( + f"{output_bp}/Unclustered_contactmap_rigidbody_3_cltid_None_heatmap.html" + ) assert os.path.exists(clustNone_tsv_fpath) assert Path(clustNone_tsv_fpath).stat().st_size != 0 assert os.path.exists(clustNone_html_fpath) @@ -81,3 +92,29 @@ def test_contactmap_example(contactmap, mocker): assert Path(clust1_html_fpath).stat().st_size != 0 Path(clust1_tsv_fpath).unlink(missing_ok=False) Path(clust1_html_fpath).unlink(missing_ok=False) + + +def test_contactmap_low_memory(contactmap, monkeypatch, mocker): + """Test the contact map module fails gracefully with insufficient memory.""" + contactmap.previous_io = MockPreviousIO + mocker.patch( + "haddock.modules.BaseHaddockModule.export_io_models", + return_value=None, + ) + + mocker.patch( + "haddock.modules.analysis.contactmap.get_available_memory", + return_value=0.0, + ) + mocker.patch( + "haddock.modules.analysis.contactmap.get_necessary_memory", + return_value=1.0, + ) + monkeypatch.chdir(contactmap.path) + + # Run the module - should skip execution due to low memory + contactmap.run() + + # Check that the directory is empty + ls = list(glob.glob(f"{contactmap.path}/*")) + assert len(ls) == 0 diff --git a/pyproject.toml b/pyproject.toml index 5e898ec392..f2618052c2 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -48,6 +48,7 @@ dependencies = [ 'prodigy-lig==1.1.4', 'plotly==6.5.2', 'freesasa>=2.2.1', + 'psutil>=7.2.2', ] [project.optional-dependencies] diff --git a/src/haddock/libs/libutil.py b/src/haddock/libs/libutil.py index 410cf89b02..fb186c8759 100644 --- a/src/haddock/libs/libutil.py +++ b/src/haddock/libs/libutil.py @@ -13,6 +13,8 @@ from importlib.resources import files from pathlib import Path +import psutil + import haddock from haddock import EmptyPath, log from haddock.core.exceptions import SetupError @@ -524,3 +526,53 @@ def get_prodrg_exec() -> tuple[Optional[Path], Optional[Path]]: return None, None return prodrg_exec, prodrg_param + + +def get_available_memory() -> float: + """ + Get the available system memory in GB. + + Get the available virtual memory in Bytes + then divide it by (1024 ^ 3) to get this value in GigaBytes + + Returns + ------- + float + Available memory in gigabytes (GB). + """ + return psutil.virtual_memory().available / (1024**3) + + +def get_necessary_memory(models: list) -> float: + """ + Estimate the memory required to compute contact maps. + + Calculates memory based on the largest model's estimated atom count. + The memory estimate assumes a distance matrix of size (atoms x atoms) * 8 bytes. + + Parameters + ---------- + models : list + List of model objects with file_name attribute. + + Returns + ------- + float + Estimated memory requirement in gigabytes (GB). + """ + # Compute an approximation of the matrix size + # Most models will be similar, so just use the first one + try: + file_size = os.path.getsize(models[0].file_name) + estimated_atoms = file_size // 10 + except Exception: + estimated_atoms = 10000 + + if estimated_atoms == 0: + estimated_atoms = 10000 + + matrix_size_bytes = (estimated_atoms * estimated_atoms) * 8 + # Convert it into GigaBytes + matrix_size_gb = matrix_size_bytes / (1024**3) + + return matrix_size_gb diff --git a/src/haddock/modules/analysis/contactmap/__init__.py b/src/haddock/modules/analysis/contactmap/__init__.py index df328d0df4..c76f206abc 100644 --- a/src/haddock/modules/analysis/contactmap/__init__.py +++ b/src/haddock/modules/analysis/contactmap/__init__.py @@ -27,7 +27,11 @@ get_clusters_sets, make_contactmap_report, topX_models, - ) +) +from haddock.libs.libutil import ( + get_available_memory, + get_necessary_memory, +) RECIPE_PATH = Path(__file__).resolve().parent @@ -40,13 +44,13 @@ class HaddockModule(BaseHaddockModule): name = RECIPE_PATH.name def __init__( - self, - order: int, - path: Path, - *ignore: Any, - init_params: FilePath = DEFAULT_CONFIG, - **everything: Any, - ) -> None: + self, + order: int, + path: Path, + *ignore: Any, + init_params: FilePath = DEFAULT_CONFIG, + **everything: Any, + ) -> None: """Initialize class.""" super().__init__(order, path, init_params) @@ -61,11 +65,39 @@ def _run(self) -> None: if type(self.previous_io) == iter: _e = "This module cannot come after one that produced an iterable." self.finish_with_error(_e) + models = [] try: models = self.previous_io.retrieve_models(individualize=True) except AttributeError as e: self.finish_with_error(e) + # === IMPORTANT ================================================================ + # This modules uses a NxN distance matrix, this means that the memory + # requirement will increase quadratically and can fail with an out-of-memory + # error. Changing this behaviour would require a total re-write of the module + # as of 04-2026 so instead we apply the following workaround: + # - Check what is the total size of the models (size is faster than reading) + # - Guesstimate how many atoms in total it would have based on the size + # - Calculate the expected matrix size and its memory requirements + # - Get how much memory the current host system has + # - If the system has less memory than needed, fail graciously + current_memory = get_available_memory() + needed_memory = get_necessary_memory(models) * self.params["ncores"] + if current_memory < needed_memory: + self.log( + msg=( + f"Not enough memory to execute `contactmap` " + f"(needs {needed_memory:.2f}Gb has {current_memory:.2f}Gb). " + "! Skipping this module !" + ), + level="warning", + ) + self.output_models = models + self.export_io_models() + return + + # ============================================================================== + # Obtain clusters clusters_sets = get_clusters_sets(models) @@ -92,7 +124,7 @@ def _run(self) -> None: Path(model.rel_path), Path(jobname), single_models_params, - ) + ) contact_jobs.append(contmap_job) # For clustered models @@ -106,7 +138,7 @@ def _run(self) -> None: [Path(model.rel_path) for model in clt_models], Path(name), self.params, - ) + ) contact_jobs.append(contmap_job) # Find execution engine diff --git a/tests/test_module_contmap.py b/tests/test_module_contmap.py index 5189cf4613..5f1011955e 100644 --- a/tests/test_module_contmap.py +++ b/tests/test_module_contmap.py @@ -11,8 +11,7 @@ from haddock.libs.libontology import PDBFile from haddock.modules.analysis.contactmap import DEFAULT_CONFIG -from haddock.modules.analysis.contactmap import \ - HaddockModule as ContactMapModule +from haddock.modules.analysis.contactmap import HaddockModule as ContactMapModule from haddock.modules.analysis.contactmap.contmap import ( PI, ClusteredContactMap, @@ -35,7 +34,11 @@ topX_models, within_2PI, write_res_contacts, - ) +) +from haddock.libs.libutil import ( + get_available_memory, + get_necessary_memory, +) from . import golden_data @@ -625,3 +628,65 @@ def test_make_ideogram_arc_moduloAB(): assert arc_positions.shape == excpected_output.shape for i in range(nb_points): assert np.isclose(arc_positions[i], excpected_output[i], atol=0.0001) + + +def test_get_available_memory(): + """Test get_available_memory function.""" + memory = get_available_memory() + # Should return a positive float (system memory) + assert isinstance(memory, float) + assert memory > 0 + + +def test_get_necessary_memory(): + """Test get_necessary_memory function with valid models.""" + models = [ + PDBFile(Path(golden_data, "protprot_complex_1.pdb"), path=golden_data), + PDBFile(Path(golden_data, "protprot_complex_2.pdb"), path=golden_data), + ] + memory = get_necessary_memory(models) + # Should return a positive float + assert isinstance(memory, float) + assert memory > 0 + + +def test_get_necessary_memory_empty_list(): + """Test get_necessary_memory with empty list.""" + models = [] + memory = get_necessary_memory(models) + # Should fall back to default estimate + assert isinstance(memory, float) + assert memory > 0 + + +def test_get_necessary_memory_invalid_files(): + """Test get_necessary_memory with invalid file paths.""" + from unittest.mock import patch + + # Mock os.path.getsize to raise an exception + with patch("os.path.getsize") as mock_getsize: + mock_getsize.side_effect = FileNotFoundError("File not found") + + models = [ + PDBFile(Path(golden_data, "protprot_complex_1.pdb"), path=golden_data), + ] + memory = get_necessary_memory(models) + # Should fall back to default estimate + assert isinstance(memory, float) + assert memory > 0 + + +def test_get_necessary_memory_zero_bytes(): + """Test get_necessary_memory with zero-byte files.""" + with tempfile.TemporaryDirectory() as tempdir: + # Create a zero-byte file + zero_file = Path(tempdir, "empty.pdb") + zero_file.write_text("") + + models = [ + PDBFile(zero_file, path=str(zero_file.parent)), + ] + memory = get_necessary_memory(models) + # Should use default fallback (10000 atoms) + assert isinstance(memory, float) + assert memory > 0