haddocking · AnnaKravchenko · Oct 14, 2025 · Oct 14, 2025 · Oct 14, 2025 · Oct 14, 2025
diff --git a/src/haddock/libs/libalign.py b/src/haddock/libs/libalign.py
@@ -11,6 +11,7 @@
 * :py:func:`pdb2fastadic`
 * :py:func:`get_atoms`
 * :py:func:`get_align`
+* :pt:func:`align_custom`
 * :py:func:`align_struct`
 * :py:func:`align_seq`
 * :py:func:`make_range`
@@ -770,7 +771,7 @@ def pdb2fastadic(pdb_f: PDBPath) -> dict[str, dict[int, str]]:
 
 
 def get_align(
-    method: str, lovoalign_exec: FilePath
+    method: str, lovoalign_exec: FilePath, alig_fname: Optional[FilePath] = None
 ) -> partial[dict[str, dict[int, int]]]:
     """
     Get the alignment function.
@@ -783,15 +784,25 @@ def get_align(
     lovoalign_exec : str
         Path to the lovoalign executable.
 
+    alig_fname : str
+        Path to the custom alignment file, optional
+
     Returns
     -------
     align_func : functools.partial
         desired alignment function
     """
-    if method == "structure":
+    if alig_fname:
+        if not Path(alig_fname).exists():
+            raise FileNotFoundError(f"Custom alignment file {alig_fname} not found")
+        align_func = partial(align_custom, custom_alig_file = Path(alig_fname))
+
+    elif method == "structure":
         align_func = partial(align_strct, lovoalign_exec=lovoalign_exec)
+
     elif method == "sequence":
         align_func = partial(align_seq)
+
     else:
         available_alns = ("sequence", "structure")
         raise ValueError(
@@ -801,6 +812,73 @@ def get_align(
     return align_func
 
 
+def align_custom(
+    reference: PDBFile,
+    model: PDBFile,
+    output_path: FilePath,
+    custom_alig_file: FilePath,
+) -> tuple[dict[str, dict[int, int]], dict[str, str]]: 
+    """
+    Parse custom alignment file to extract numbering relationship.
+
+        Parameters
+    ----------
+    reference : :py:class:`haddock.libs.libontology.PDBFile`
+
+    model : :py:class:`haddock.libs.libontology.PDBFile`
+
+    output_path : Path
+
+    custom_alig_file : Path 
+        Path to the custom .izone alignment file
+
+    Returns
+    -------
+    numbering_dic : dict
+        dictionary of residue mapping (one per chain)
+
+    model2ref_chain_dict : dict 
+        dictionary of chain mapping
+
+    """
+    numbering_dic: dict[str, dict[int, int]] = {}
+    model2ref_chain_dict: dict[str, str] = {}
+
+    # read align file, skip everything that is not 'ZONE'
+    with open(custom_alig_file, 'r') as f:
+        zone_lines = (line.strip() for line in f if line.strip().startswith('ZONE'))
+
+        for line in zone_lines:
+            if len(line.split()) <2:
+                log.warning(f"Unexpected {line} in {custom_alig_file}, skipping")
+                continue
+
+            mapping = line.split()[1]
+            if ':' not in mapping:
+                log.warning(f"Missing colon in mapping: {line}")
+                continue
+
+            # constructs dictionaries
+            try:
+                model_side, ref_side = mapping.split(':')
+                model_chain = model_side[0]
+                model_resid = int(model_side[1:])
+                ref_chain = ref_side[0]
+                ref_resid = int(ref_side[1:])
+
+                if ref_chain not in numbering_dic:
+                    numbering_dic[ref_chain] = {}
+
+                numbering_dic[ref_chain][model_resid] = ref_resid
+                model2ref_chain_dict[model_chain] = ref_chain
+
+            except (ValueError, IndexError) as e:
+                log.warning(f"Could not parse line: {line} ({e})")
+                continue
+
+    return numbering_dic, model2ref_chain_dict
+
+
 def align_strct(
     reference: PDBFile,
     model: PDBFile,

diff --git a/src/haddock/modules/analysis/caprieval/capri.py b/src/haddock/modules/analysis/caprieval/capri.py
@@ -612,6 +612,7 @@ def run(self) -> Union[None, "CAPRI"]:
             align_func = get_align(
                 method=self.params["alignment_method"],
                 lovoalign_exec=self.params["lovoalign_exec"],
+                alig_fname=self.params["alig_fname"],
             )
             self.model2ref_numbering, self.model2ref_chain_dict = align_func(
                 self.reference, self.model, self.path

diff --git a/src/haddock/modules/analysis/caprieval/defaults.yaml b/src/haddock/modules/analysis/caprieval/defaults.yaml
@@ -151,6 +151,18 @@ sort_ascending:
   group: analysis
   explevel: easy
 
+alig_fname:
+  default: ''
+  type: file
+  title: Alignment filename
+  short: Filename of the file containing alignment information, izone format. 
+  long: Alignment file in izone format containing the mapping between residues. E.g. "ZONE A1:A5" means that 
+    residue 1 of chain A of the model corresponds to residue 5 of chain A or the reference. 
+    Note that if this parameter is provided, the 'alignment_method' parameter is ignored. 
+    Note that all caprieval metrics will be calculated using exclusively the chains and residues f srom izone file. 
+  group: analysis
+  explevel: expert
+
 alignment_method:
   default: sequence
   type: string

diff --git a/tests/golden_data/protdna_complex.izone b/tests/golden_data/protdna_complex.izone
@@ -0,0 +1,88 @@
+# protein
+ZONE A-1:A-1
+ZONE A0:A0
+ZONE A1:A1
+ZONE A2:A2
+ZONE A3:A3
+ZONE A4:A4
+ZONE A5:A5
+ZONE A6:A6
+ZONE A7:A7
+ZONE A8:A8
+ZONE A9:A9
+ZONE A10:A10
+ZONE A11:A11
+ZONE A12:A12
+ZONE A13:A13
+ZONE A14:A14
+ZONE A15:A15
+ZONE A16:A16
+ZONE A17:A17
+ZONE A18:A18
+ZONE A19:A19
+ZONE A20:A20
+ZONE A21:A21
+ZONE A22:A22
+ZONE A23:A23
+ZONE A24:A24
+ZONE A25:A25
+ZONE A26:A26
+ZONE A27:A27
+ZONE A28:A28
+ZONE A29:A29
+ZONE A30:A30
+ZONE A31:A31
+ZONE A32:A32
+ZONE A33:A33
+ZONE A34:A34
+ZONE A35:A35
+ZONE A36:A36
+ZONE A37:A37
+ZONE A38:A38
+ZONE A39:A39
+ZONE A40:A40
+ZONE A41:A41
+ZONE A42:A42
+ZONE A43:A43
+ZONE A44:A44
+ZONE A45:A45
+ZONE A46:A46
+ZONE A47:A47
+ZONE A48:A48
+ZONE A49:A49
+ZONE A50:A50
+ZONE A51:A51
+ZONE A52:A52
+ZONE A53:A53
+ZONE A54:A54
+ZONE A55:A55
+ZONE A56:A56
+ZONE A57:A57
+ZONE A58:A58
+ZONE A59:A59
+ZONE A60:A60
+ZONE A61:A61
+ZONE A62:A62
+# DNA 
+ZONE B1:B1
+ZONE B2:B2
+ZONE B3:B3
+ZONE B4:B4
+ZONE B5:B5
+ZONE B6:B6
+ZONE B7:B7
+ZONE B8:B8
+ZONE B9:B9
+ZONE B10:B10
+ZONE B11:B11
+ZONE B28:B28
+ZONE B29:B29
+ZONE B30:B30
+ZONE B31:B31
+ZONE B32:B32
+ZONE B33:B33
+ZONE B34:B34
+ZONE B35:B35
+ZONE B36:B36
+ZONE B37:B37
+ZONE B38:B38
diff --git a/tests/test_libalign.py b/tests/test_libalign.py
@@ -22,6 +22,7 @@
     make_range,
     pdb2fastadic,
     rearrange_xyz_files,
+    align_custom,
     )
 
 from . import golden_data
@@ -567,3 +568,47 @@ def test_check_chains():
         assert obs_r_chain == exp_r_chain
         assert obs_l_chain == exp_l_chain
 
+### new
+def test_align_custom_basic():
+    """Test basic custom alignment file parsing.
+    Use protdna_complex_1.pdb and protdna_complex_2.pdb and a 1:1
+    custom alignment file protdna_complex.izone
+    """
+    ref = Path(golden_data, "protdna_complex_1.pdb")
+    mod = Path(golden_data, "protdna_complex_2.pdb")
+    custom_izone = Path(golden_data, "protdna_complex.izone")
+
+    observed_numb_dic, observed_chm_dict = align_custom(
+        reference=ref,
+        model=mod,
+        output_path=golden_data,
+        custom_alig_file=custom_izone
+    )
+
+    expected_chm_dict = {"A": "A", "B": "B"}
+
+    assert isinstance(observed_numb_dic, dict)
+    assert "A" in observed_numb_dic
+    assert "B" in observed_numb_dic
+    assert isinstance(observed_numb_dic["A"], dict)
+    assert isinstance(observed_numb_dic["B"], dict)
+    # protein has 64 residues, each should be in mapping
+    assert len(observed_numb_dic["A"]) == 64
+    # same for dna
+    assert len(observed_numb_dic["B"]) == 22
+    # several random residues 
+    assert observed_numb_dic["A"][-1] == -1
+    assert observed_numb_dic["B"][38] == 38
+    assert isinstance(observed_chm_dict, dict)
+    assert observed_chm_dict == expected_chm_dict
+
+    # all mapped residues are integers
+    for chain, mappings in observed_numb_dic.items():
+        for model_res, ref_res in mappings.items():
+            assert isinstance(model_res, int)
+            assert isinstance(ref_res, int)
+
+    # all mapped chains are strings
+    for model_chain, ref_chain in observed_chm_dict.items():
+        assert isinstance(model_chain, str)
+        assert isinstance(ref_chain, str)
diff --git a/tests/test_module_caprieval.py b/tests/test_module_caprieval.py
@@ -855,6 +855,7 @@ def test_capri_run(mocker, monkeypatch):
                 "fnat_cutoff": 10,
                 "irmsd_cutoff": 10,
                 "keep_hetatm": False,
+                "alig_fname": None,
             },
         )
         rand_fnat = random.random()