Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
82 changes: 80 additions & 2 deletions src/haddock/libs/libalign.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
* :py:func:`pdb2fastadic`
* :py:func:`get_atoms`
* :py:func:`get_align`
* :pt:func:`align_custom`
* :py:func:`align_struct`
* :py:func:`align_seq`
* :py:func:`make_range`
Expand Down Expand Up @@ -770,7 +771,7 @@ def pdb2fastadic(pdb_f: PDBPath) -> dict[str, dict[int, str]]:


def get_align(
method: str, lovoalign_exec: FilePath
method: str, lovoalign_exec: FilePath, alig_fname: Optional[FilePath] = None
) -> partial[dict[str, dict[int, int]]]:
"""
Get the alignment function.
Expand All @@ -783,15 +784,25 @@ def get_align(
lovoalign_exec : str
Path to the lovoalign executable.

alig_fname : str
Path to the custom alignment file, optional

Returns
-------
align_func : functools.partial
desired alignment function
"""
if method == "structure":
if alig_fname:
if not Path(alig_fname).exists():
raise FileNotFoundError(f"Custom alignment file {alig_fname} not found")
align_func = partial(align_custom, custom_alig_file = Path(alig_fname))

elif method == "structure":
align_func = partial(align_strct, lovoalign_exec=lovoalign_exec)

elif method == "sequence":
align_func = partial(align_seq)

else:
available_alns = ("sequence", "structure")
raise ValueError(
Expand All @@ -801,6 +812,73 @@ def get_align(
return align_func


def align_custom(
reference: PDBFile,
model: PDBFile,
output_path: FilePath,
custom_alig_file: FilePath,
) -> tuple[dict[str, dict[int, int]], dict[str, str]]:
"""
Parse custom alignment file to extract numbering relationship.

Parameters
----------
reference : :py:class:`haddock.libs.libontology.PDBFile`

model : :py:class:`haddock.libs.libontology.PDBFile`

output_path : Path

custom_alig_file : Path
Path to the custom .izone alignment file

Returns
-------
numbering_dic : dict
dictionary of residue mapping (one per chain)

model2ref_chain_dict : dict
dictionary of chain mapping

"""
numbering_dic: dict[str, dict[int, int]] = {}
model2ref_chain_dict: dict[str, str] = {}

# read align file, skip everything that is not 'ZONE'
with open(custom_alig_file, 'r') as f:
zone_lines = (line.strip() for line in f if line.strip().startswith('ZONE'))

for line in zone_lines:
if len(line.split()) <2:
log.warning(f"Unexpected {line} in {custom_alig_file}, skipping")
continue

mapping = line.split()[1]
if ':' not in mapping:
log.warning(f"Missing colon in mapping: {line}")
continue

# constructs dictionaries
try:
model_side, ref_side = mapping.split(':')
model_chain = model_side[0]
model_resid = int(model_side[1:])
ref_chain = ref_side[0]
ref_resid = int(ref_side[1:])

if ref_chain not in numbering_dic:
numbering_dic[ref_chain] = {}

numbering_dic[ref_chain][model_resid] = ref_resid
model2ref_chain_dict[model_chain] = ref_chain

except (ValueError, IndexError) as e:
log.warning(f"Could not parse line: {line} ({e})")
continue

return numbering_dic, model2ref_chain_dict


def align_strct(
reference: PDBFile,
model: PDBFile,
Expand Down
1 change: 1 addition & 0 deletions src/haddock/modules/analysis/caprieval/capri.py
Original file line number Diff line number Diff line change
Expand Up @@ -612,6 +612,7 @@ def run(self) -> Union[None, "CAPRI"]:
align_func = get_align(
method=self.params["alignment_method"],
lovoalign_exec=self.params["lovoalign_exec"],
alig_fname=self.params["alig_fname"],
)
self.model2ref_numbering, self.model2ref_chain_dict = align_func(
self.reference, self.model, self.path
Expand Down
12 changes: 12 additions & 0 deletions src/haddock/modules/analysis/caprieval/defaults.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -151,6 +151,18 @@ sort_ascending:
group: analysis
explevel: easy

alig_fname:
default: ''
type: file
title: Alignment filename
short: Filename of the file containing alignment information, izone format.
long: Alignment file in izone format containing the mapping between residues. E.g. "ZONE A1:A5" means that
residue 1 of chain A of the model corresponds to residue 5 of chain A or the reference.
Note that if this parameter is provided, the 'alignment_method' parameter is ignored.
Note that all caprieval metrics will be calculated using exclusively the chains and residues f srom izone file.
group: analysis
explevel: expert

alignment_method:
default: sequence
type: string
Expand Down
88 changes: 88 additions & 0 deletions tests/golden_data/protdna_complex.izone
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
# protein
ZONE A-1:A-1
ZONE A0:A0
ZONE A1:A1
ZONE A2:A2
ZONE A3:A3
ZONE A4:A4
ZONE A5:A5
ZONE A6:A6
ZONE A7:A7
ZONE A8:A8
ZONE A9:A9
ZONE A10:A10
ZONE A11:A11
ZONE A12:A12
ZONE A13:A13
ZONE A14:A14
ZONE A15:A15
ZONE A16:A16
ZONE A17:A17
ZONE A18:A18
ZONE A19:A19
ZONE A20:A20
ZONE A21:A21
ZONE A22:A22
ZONE A23:A23
ZONE A24:A24
ZONE A25:A25
ZONE A26:A26
ZONE A27:A27
ZONE A28:A28
ZONE A29:A29
ZONE A30:A30
ZONE A31:A31
ZONE A32:A32
ZONE A33:A33
ZONE A34:A34
ZONE A35:A35
ZONE A36:A36
ZONE A37:A37
ZONE A38:A38
ZONE A39:A39
ZONE A40:A40
ZONE A41:A41
ZONE A42:A42
ZONE A43:A43
ZONE A44:A44
ZONE A45:A45
ZONE A46:A46
ZONE A47:A47
ZONE A48:A48
ZONE A49:A49
ZONE A50:A50
ZONE A51:A51
ZONE A52:A52
ZONE A53:A53
ZONE A54:A54
ZONE A55:A55
ZONE A56:A56
ZONE A57:A57
ZONE A58:A58
ZONE A59:A59
ZONE A60:A60
ZONE A61:A61
ZONE A62:A62
# DNA
ZONE B1:B1
ZONE B2:B2
ZONE B3:B3
ZONE B4:B4
ZONE B5:B5
ZONE B6:B6
ZONE B7:B7
ZONE B8:B8
ZONE B9:B9
ZONE B10:B10
ZONE B11:B11
ZONE B28:B28
ZONE B29:B29
ZONE B30:B30
ZONE B31:B31
ZONE B32:B32
ZONE B33:B33
ZONE B34:B34
ZONE B35:B35
ZONE B36:B36
ZONE B37:B37
ZONE B38:B38
45 changes: 45 additions & 0 deletions tests/test_libalign.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
make_range,
pdb2fastadic,
rearrange_xyz_files,
align_custom,
)

from . import golden_data
Expand Down Expand Up @@ -567,3 +568,47 @@ def test_check_chains():
assert obs_r_chain == exp_r_chain
assert obs_l_chain == exp_l_chain

### new
def test_align_custom_basic():
"""Test basic custom alignment file parsing.
Use protdna_complex_1.pdb and protdna_complex_2.pdb and a 1:1
custom alignment file protdna_complex.izone
"""
ref = Path(golden_data, "protdna_complex_1.pdb")
mod = Path(golden_data, "protdna_complex_2.pdb")
custom_izone = Path(golden_data, "protdna_complex.izone")

observed_numb_dic, observed_chm_dict = align_custom(
reference=ref,
model=mod,
output_path=golden_data,
custom_alig_file=custom_izone
)

expected_chm_dict = {"A": "A", "B": "B"}

assert isinstance(observed_numb_dic, dict)
assert "A" in observed_numb_dic
assert "B" in observed_numb_dic
assert isinstance(observed_numb_dic["A"], dict)
assert isinstance(observed_numb_dic["B"], dict)
# protein has 64 residues, each should be in mapping
assert len(observed_numb_dic["A"]) == 64
# same for dna
assert len(observed_numb_dic["B"]) == 22
# several random residues
assert observed_numb_dic["A"][-1] == -1
assert observed_numb_dic["B"][38] == 38
assert isinstance(observed_chm_dict, dict)
assert observed_chm_dict == expected_chm_dict

# all mapped residues are integers
for chain, mappings in observed_numb_dic.items():
for model_res, ref_res in mappings.items():
assert isinstance(model_res, int)
assert isinstance(ref_res, int)

# all mapped chains are strings
for model_chain, ref_chain in observed_chm_dict.items():
assert isinstance(model_chain, str)
assert isinstance(ref_chain, str)
1 change: 1 addition & 0 deletions tests/test_module_caprieval.py
Original file line number Diff line number Diff line change
Expand Up @@ -855,6 +855,7 @@ def test_capri_run(mocker, monkeypatch):
"fnat_cutoff": 10,
"irmsd_cutoff": 10,
"keep_hetatm": False,
"alig_fname": None,
},
)
rand_fnat = random.random()
Expand Down