diff --git a/setup.py b/setup.py index 62f26dd5c..eee785d2e 100644 --- a/setup.py +++ b/setup.py @@ -81,6 +81,7 @@ def read(*names, **kwargs): 'haddock3-score = haddock.clis.cli_score:maincli', 'haddock3-unpack = haddock.clis.cli_unpack:maincli', 'haddock3-analyse = haddock.clis.cli_analyse:maincli', + 'haddock3-traceback = haddock.clis.cli_traceback:maincli', ] }, # cmdclass={'build_ext': optional_build_ext}, diff --git a/src/haddock/clis/cli_traceback.py b/src/haddock/clis/cli_traceback.py new file mode 100644 index 000000000..c6038450e --- /dev/null +++ b/src/haddock/clis/cli_traceback.py @@ -0,0 +1,249 @@ +""" +Traces back PDB files from a HADDOCK run directory. + +Given an input run directory, haddock3-traceback traces back each model to the +initial input molecules used, providing the rank of each intermediate model. + +USAGE:: + + haddock3-traceback -r + +""" +import argparse +import sys +from pathlib import Path + +import numpy as np +import pandas as pd + +from haddock import log +from haddock.libs import libcli +from haddock.libs.libontology import ModuleIO, PDBFile +from haddock.modules import get_module_steps_folders + + +TRACK_FOLDER = "traceback" # name of the traceback folder + +ANA_MODULES = ["caprieval", + "seletop", + "topoaa", + "rmsdmatrix", + "clustrmsd", + "clustfcc"] + + +def get_ori_names(n: int, pdbfile: PDBFile, max_topo_len: int): + """ + Get the original name(s) of the PDB file. + + Parameters + ---------- + n : int + Step number. + pdbfile : PDBFile + PDBFile object. + max_topo_len : int + Maximum length of the topologies found so far. + + Returns + ------- + ori_names : list + List of original names. + max_topo_len : int + Maximum length of the topologies found so far. + """ + if n != 0: # not the first step, ori_name should be defined + ori_names = [pdbfile.ori_name] + else: # first step, we get topology files instead of ori_name + # topology can either be a list of topologies or a single + # topology + if isinstance(pdbfile.topology, list): + ori_names = [el.file_name for el in pdbfile.topology] + if len(pdbfile.topology) > max_topo_len: + max_topo_len = len(pdbfile.topology) + else: + ori_names = [pdbfile.topology.file_name] + max_topo_len = 1 + return ori_names, max_topo_len + + +def traceback_dataframe(data_dict: dict, + rank_dict: dict, + sel_step: list, + max_topo_len: int): + """ + Create traceback dataframe by combining together ranks and data. + + Parameters + ---------- + data_dict : dict + Dictionary containing the data to be traced back. + rank_dict : dict + Dictionary containing the ranks of the data to be traced back. + sel_step : list + List of selected steps. + max_topo_len : int + Maximum length of the topologies. + + Returns + ------- + df_ord : pandas.DataFrame + Dataframe containing the traceback data. + """ + # get last step of the workflow + last_step = sel_step[-1] + # data dict to dataframe + df_data = pd.DataFrame.from_dict(data_dict, orient="index") + df_data.reset_index(inplace=True) + # assign columns + data_cols = [el for el in reversed(sel_step)] + data_cols.extend([f"00_topo{i+1}" for i in range(max_topo_len)]) + df_data.columns = data_cols + + # same for the rank_dict + df_ranks = pd.DataFrame.from_dict(rank_dict, orient="index") + df_ranks.reset_index(inplace=True) + ranks_col = [last_step] # the key to merge the dataframes + ranks_col.extend([f"{el}_rank" for el in reversed(sel_step)]) + df_ranks.columns = ranks_col + + # merging the data and ranks dataframes + df_merged = pd.merge(df_data, df_ranks, on=last_step) + ordered_cols = sorted(df_merged.columns) + df_ord = df_merged[ordered_cols] + # last thing: substituting unk records with - in the last step + unk_records = df_ord[f'{last_step}'].str.startswith('unk') + df_ord.loc[unk_records, last_step] = "-" + return df_ord + + +# Command line interface parser +ap = argparse.ArgumentParser( + prog="haddock3-traceback", + description=__doc__, + formatter_class=argparse.RawDescriptionHelpFormatter, + ) + +libcli.add_rundir_arg(ap) + + +def load_args(ap): + """Load argument parser args.""" + return ap.parse_args() + + +def cli(ap, main): + """Command-line interface entry point.""" + cmd = vars(load_args(ap)) + main(**cmd) + + +def maincli(): + """Execute main client.""" + cli(ap, main) + + +def main(run_dir): + """ + Analyse CLI. + + Parameters + ---------- + run_dir : str or Path + Path to the original run directory. + """ + log.level = 20 + log.info(f"Running haddock3-traceback on {run_dir}") + + outdir = Path(run_dir, TRACK_FOLDER) + try: + outdir.mkdir(exist_ok=False) + log.info(f"Created directory: {str(outdir.resolve())}") + except FileExistsError: + log.warning(f"Directory {str(outdir.resolve())} already exists.") + + # Reading steps + log.info("Reading input run directory") + # get the module folders from the run_dir input + all_steps = get_module_steps_folders(Path(run_dir)) + log.info(f"All_steps: {', '.join(all_steps)}") + sel_step = [st for st in all_steps if st.split("_")[1] not in ANA_MODULES] + log.info(f"Steps to trace back: {', '.join(sel_step)}") + + data_dict, rank_dict = {}, {} + unk_idx, max_topo_len = 0, 0 + # this cycle goes through the steps in reverse order + for n in range(len(sel_step) - 1, -1, -1): + log.info(f"Tracing back step {sel_step[n]}") + # correcting names in the dictionary. The ori_name must be complemented + # with the step folder name + for key in data_dict.keys(): + if data_dict[key][-1] != "-": + data_dict[key][-1] = f"../{sel_step[n]}/{data_dict[key][-1]}" + + delta = len(sel_step) - n - 1 # how many steps have we gone back? + # loading the .json file + json_path = Path(run_dir, sel_step[n], "io.json") + io = ModuleIO() + io.load(json_path) + # list all the values in the data_dict + ls_values = [x for val in data_dict.values() for x in val] + # getting and sorting the ranks for the current step folder + ranks = [pdbfile.score for pdbfile in io.output] + ranks_argsort = np.argsort(ranks) + + # iterating through the pdbfiles to fill data_dict and rank_dict + for i, pdbfile in enumerate(io.output): + rank = np.where(ranks_argsort == i)[0][0] + 1 + # getting the original names + ori_names, max_topo_len = get_ori_names(n, pdbfile, max_topo_len) + if n != len(sel_step) - 1: + if str(pdbfile.rel_path) not in ls_values: + # this is the first step in which the pdbfile appears. + # This means that it was discarded for the subsequent steps + # We need to add the pdbfile to the data_dict + key = f"unk{unk_idx}" + data_dict[key] = ["-" for el in range(delta - 1)] + data_dict[key].append(str(pdbfile.rel_path)) + rank_dict[key] = ["-" for el in range(delta)] + unk_idx += 1 + else: + # we've already seen this pdb before. + idx = ls_values.index(str(pdbfile.rel_path)) + key = list(data_dict.keys())[idx // delta] + + # assignment + for el in ori_names: + data_dict[key].append(el) + rank_dict[key].append(rank) + else: # last step of the workflow + data_dict[str(pdbfile.rel_path)] = [on for on in ori_names] + rank_dict[str(pdbfile.rel_path)] = [rank] + + # print(f"rank_dict {rank_dict}") + # print(f"data_dict {data_dict}, maxtopo {max_topo_len}") + + # stripping away relative paths + final_data_dict = {} + for key in data_dict.keys(): + new_key = key.split("/")[-1] + final_data_dict[new_key] = [el.split("/")[-1] for el in data_dict[key]] + final_rank_dict = {} + for key in rank_dict.keys(): + new_key = key.split("/")[-1] + final_rank_dict[new_key] = rank_dict[key] + # dumping the data into a dataframe + df_output = traceback_dataframe(final_data_dict, + final_rank_dict, + sel_step, + max_topo_len) + # dumping the dataframe + track_filename = Path(run_dir, TRACK_FOLDER, "traceback.tsv") + log.info(f"Output dataframe {track_filename} " + f"created with shape {df_output.shape}") + df_output.to_csv(track_filename, sep="\t", index=False) + return + + +if __name__ == "__main__": + sys.exit(maincli()) diff --git a/src/haddock/libs/libworkflow.py b/src/haddock/libs/libworkflow.py index 67a1471e6..77de8ad23 100644 --- a/src/haddock/libs/libworkflow.py +++ b/src/haddock/libs/libworkflow.py @@ -6,6 +6,7 @@ from haddock import log from haddock.clis.cli_analyse import main as cli_analyse +from haddock.clis.cli_traceback import main as cli_traceback from haddock.core.exceptions import HaddockError, HaddockTermination, StepError from haddock.gear.clean_steps import clean_output from haddock.gear.config import get_module_name @@ -62,6 +63,8 @@ def postprocess(self): capri_steps.append(step.order) # call cli_analyse (no need for capri_dicts, it's all precalculated) cli_analyse("./", capri_steps, top_cluster=10, format=None, scale=None) + # call cli_traceback + cli_traceback("./") class Workflow: diff --git a/src/haddock/modules/analysis/seletopclusts/__init__.py b/src/haddock/modules/analysis/seletopclusts/__init__.py index e605759b1..c552ad6c6 100644 --- a/src/haddock/modules/analysis/seletopclusts/__init__.py +++ b/src/haddock/modules/analysis/seletopclusts/__init__.py @@ -87,6 +87,7 @@ def _run(self): # changing attributes name_path = Path(name) name_path.write_text(model.rel_path.read_text()) + model.ori_name = model.file_name model.file_name = name model.full_name = name model.rel_path = Path('..', Path(self.path).name, name) diff --git a/src/haddock/modules/refinement/emref/__init__.py b/src/haddock/modules/refinement/emref/__init__.py index 8a22eec59..36b4f0e4a 100644 --- a/src/haddock/modules/refinement/emref/__init__.py +++ b/src/haddock/modules/refinement/emref/__init__.py @@ -85,6 +85,10 @@ def _run(self): model, idx, ".", "emref" ) expected_pdb.restr_fname = ambig_fname + try: + expected_pdb.ori_name = model.file_name + except AttributeError: + expected_pdb.ori_name = None self.output_models.append(expected_pdb) job = CNSJob(inp_file, out_file, envvars=self.envvars) diff --git a/src/haddock/modules/refinement/flexref/__init__.py b/src/haddock/modules/refinement/flexref/__init__.py index cab2602d3..2a4627fd8 100644 --- a/src/haddock/modules/refinement/flexref/__init__.py +++ b/src/haddock/modules/refinement/flexref/__init__.py @@ -87,6 +87,10 @@ def _run(self): model, idx, ".", "flexref" ) expected_pdb.restr_fname = ambig_fname + try: + expected_pdb.ori_name = model.file_name + except AttributeError: + expected_pdb.ori_name = None self.output_models.append(expected_pdb) job = CNSJob(inp_file, out_file, envvars=self.envvars) diff --git a/src/haddock/modules/refinement/mdref/__init__.py b/src/haddock/modules/refinement/mdref/__init__.py index fa8473124..9bb07bc2c 100644 --- a/src/haddock/modules/refinement/mdref/__init__.py +++ b/src/haddock/modules/refinement/mdref/__init__.py @@ -85,6 +85,10 @@ def _run(self): model, idx, ".", "mdref" ) expected_pdb.restr_fname = ambig_fname + try: + expected_pdb.ori_name = model.file_name + except AttributeError: + expected_pdb.ori_name = None self.output_models.append(expected_pdb) job = CNSJob(inp_file, out_file, envvars=self.envvars) diff --git a/tests/golden_data/io_flexref.json b/tests/golden_data/io_flexref.json new file mode 100644 index 000000000..d5f6ed570 --- /dev/null +++ b/tests/golden_data/io_flexref.json @@ -0,0 +1,203 @@ +{ + "input": [], + "output": [ + { + "clt_id": null, + "clt_model_rank": null, + "clt_rank": null, + "created": "2023-06-26 11:15:03", + "file_name": "flexref_1.pdb", + "file_type": { + "py/reduce": [ + { + "py/type": "haddock.libs.libontology.Format" + }, + { + "py/tuple": [ + "pdb" + ] + } + ] + }, + "full_name": "flexref_1.pdb", + "len": NaN, + "md5": null, + "ori_name": "rigidbody_2.pdb", + "path": "/trinity/login/mgiulini/haddock3/examples/docking-antibody-antigen/run1-CDR-NMR-CSP-test-new/5_flexref", + "py/object": "haddock.libs.libontology.PDBFile", + "rel_path": { + "py/reduce": [ + { + "py/type": "pathlib.PosixPath" + }, + { + "py/tuple": [ + "..", + "5_flexref", + "flexref_1.pdb" + ] + } + ] + }, + "restr_fname": { + "py/reduce": [ + { + "py/type": "pathlib.PosixPath" + }, + { + "py/tuple": [ + "..", + "data", + "5_flexref", + "ambig-CDR-NMR-CSP.tbl" + ] + } + ] + }, + "score": -242.31202000000002, + "topology": [ + { + "created": "2023-06-26 11:14:32", + "file_name": "4G6K_fv_haddock.psf", + "file_type": { + "py/reduce": [ + { + "py/type": "haddock.libs.libontology.Format" + }, + { + "py/tuple": [ + "psf" + ] + } + ] + }, + "full_name": "4G6K_fv_haddock.psf", + "md5": null, + "path": "/trinity/login/mgiulini/haddock3/examples/docking-antibody-antigen/run1-CDR-NMR-CSP-test-new/0_topoaa", + "py/object": "haddock.libs.libontology.TopologyFile", + "rel_path": { + "py/reduce": [ + { + "py/type": "pathlib.PosixPath" + }, + { + "py/tuple": [ + "..", + "0_topoaa", + "4G6K_fv_haddock.psf" + ] + } + ] + }, + "restr_fname": null + }, + { + "created": "2023-06-26 11:14:32", + "file_name": "4I1B-matched_haddock.psf", + "file_type": { + "py/id": 9 + }, + "full_name": "4I1B-matched_haddock.psf", + "md5": null, + "path": "/trinity/login/mgiulini/haddock3/examples/docking-antibody-antigen/run1-CDR-NMR-CSP-test-new/0_topoaa", + "py/object": "haddock.libs.libontology.TopologyFile", + "rel_path": { + "py/reduce": [ + { + "py/type": "pathlib.PosixPath" + }, + { + "py/tuple": [ + "..", + "0_topoaa", + "4I1B-matched_haddock.psf" + ] + } + ] + }, + "restr_fname": null + } + ], + "unw_energies": { + "air": 117.512, + "angles": 0.0, + "bonds": 0.0, + "bsa": 1363.37, + "cdih": 0.0, + "coup": 0.0, + "dani": 0.0, + "desolv": 11.779, + "dihe": 0.0, + "elec": -244.859, + "improper": 0.0, + "rdcs": 0.0, + "rg": 0.0, + "total": -134.696, + "vdw": -7.34952, + "vean": 0.0, + "xpcs": 0.0 + } + }, + { + "clt_id": null, + "clt_model_rank": null, + "clt_rank": null, + "created": "2023-06-26 11:15:03", + "file_name": "flexref_2.pdb", + "file_type": { + "py/id": 4 + }, + "full_name": "flexref_2.pdb", + "len": NaN, + "md5": null, + "ori_name": "rigidbody_4.pdb", + "path": "/trinity/login/mgiulini/haddock3/examples/docking-antibody-antigen/run1-CDR-NMR-CSP-test-new/5_flexref", + "py/object": "haddock.libs.libontology.PDBFile", + "rel_path": { + "py/reduce": [ + { + "py/type": "pathlib.PosixPath" + }, + { + "py/tuple": [ + "..", + "5_flexref", + "flexref_2.pdb" + ] + } + ] + }, + "restr_fname": { + "py/id": 6 + }, + "score": -191.67547, + "topology": [ + { + "py/id": 8 + }, + { + "py/id": 11 + } + ], + "unw_energies": { + "air": 180.926, + "angles": 0.0, + "bonds": 0.0, + "bsa": 1475.06, + "cdih": 0.0, + "coup": 0.0, + "dani": 0.0, + "desolv": 2.32073, + "dihe": 0.0, + "elec": -175.102, + "improper": 0.0, + "rdcs": 0.0, + "rg": 0.0, + "total": -16.4121, + "vdw": -22.2362, + "vean": 0.0, + "xpcs": 0.0 + } + } + ] +} \ No newline at end of file diff --git a/tests/golden_data/io_rigid.json b/tests/golden_data/io_rigid.json new file mode 100644 index 000000000..57452ab39 --- /dev/null +++ b/tests/golden_data/io_rigid.json @@ -0,0 +1,325 @@ +{ + "input": [], + "output": [ + { + "clt_id": null, + "clt_model_rank": null, + "clt_rank": null, + "created": "2023-06-26 11:14:34", + "file_name": "rigidbody_1.pdb", + "file_type": { + "py/reduce": [ + { + "py/type": "haddock.libs.libontology.Format" + }, + { + "py/tuple": [ + "pdb" + ] + } + ] + }, + "full_name": "1_rigidbody/rigidbody_1.pdb", + "len": NaN, + "md5": null, + "ori_name": null, + "path": "/trinity/login/mgiulini/haddock3/examples/docking-antibody-antigen/run1-CDR-NMR-CSP-test-new/1_rigidbody/1_rigidbody", + "py/object": "haddock.libs.libontology.PDBFile", + "rel_path": { + "py/reduce": [ + { + "py/type": "pathlib.PosixPath" + }, + { + "py/tuple": [ + "..", + "1_rigidbody", + "rigidbody_1.pdb" + ] + } + ] + }, + "restr_fname": { + "py/reduce": [ + { + "py/type": "pathlib.PosixPath" + }, + { + "py/tuple": [ + "..", + "data", + "1_rigidbody", + "ambig-CDR-NMR-CSP.tbl" + ] + } + ] + }, + "score": 9.95317401, + "topology": [ + { + "created": "2023-06-26 11:14:32", + "file_name": "4G6K_fv_haddock.psf", + "file_type": { + "py/reduce": [ + { + "py/type": "haddock.libs.libontology.Format" + }, + { + "py/tuple": [ + "psf" + ] + } + ] + }, + "full_name": "4G6K_fv_haddock.psf", + "md5": null, + "path": "/trinity/login/mgiulini/haddock3/examples/docking-antibody-antigen/run1-CDR-NMR-CSP-test-new/0_topoaa", + "py/object": "haddock.libs.libontology.TopologyFile", + "rel_path": { + "py/reduce": [ + { + "py/type": "pathlib.PosixPath" + }, + { + "py/tuple": [ + "..", + "0_topoaa", + "4G6K_fv_haddock.psf" + ] + } + ] + }, + "restr_fname": null + }, + { + "created": "2023-06-26 11:14:32", + "file_name": "4I1B-matched_haddock.psf", + "file_type": { + "py/id": 9 + }, + "full_name": "4I1B-matched_haddock.psf", + "md5": null, + "path": "/trinity/login/mgiulini/haddock3/examples/docking-antibody-antigen/run1-CDR-NMR-CSP-test-new/0_topoaa", + "py/object": "haddock.libs.libontology.TopologyFile", + "rel_path": { + "py/reduce": [ + { + "py/type": "pathlib.PosixPath" + }, + { + "py/tuple": [ + "..", + "0_topoaa", + "4I1B-matched_haddock.psf" + ] + } + ] + }, + "restr_fname": null + } + ], + "unw_energies": { + "air": 1443.5, + "angles": 0.0, + "bonds": 0.0, + "bsa": 1413.94, + "cdih": 0.0, + "coup": 0.0, + "dani": 0.0, + "desolv": 14.0989, + "dihe": 0.0, + "elec": -4.44414, + "improper": 0.0, + "rdcs": 0.0, + "rg": 0.0, + "total": 1439.34, + "vdw": 0.281401, + "vean": 0.0, + "xpcs": 0.0 + } + }, + { + "clt_id": null, + "clt_model_rank": null, + "clt_rank": null, + "created": "2023-06-26 11:14:34", + "file_name": "rigidbody_2.pdb", + "file_type": { + "py/id": 4 + }, + "full_name": "1_rigidbody/rigidbody_2.pdb", + "len": NaN, + "md5": null, + "ori_name": null, + "path": "/trinity/login/mgiulini/haddock3/examples/docking-antibody-antigen/run1-CDR-NMR-CSP-test-new/1_rigidbody/1_rigidbody", + "py/object": "haddock.libs.libontology.PDBFile", + "rel_path": { + "py/reduce": [ + { + "py/type": "pathlib.PosixPath" + }, + { + "py/tuple": [ + "..", + "1_rigidbody", + "rigidbody_2.pdb" + ] + } + ] + }, + "restr_fname": { + "py/id": 6 + }, + "score": 0.7690929999999998, + "topology": [ + { + "py/id": 8 + }, + { + "py/id": 11 + } + ], + "unw_energies": { + "air": 853.966, + "angles": 0.0, + "bonds": 0.0, + "bsa": 1193.51, + "cdih": 0.0, + "coup": 0.0, + "dani": 0.0, + "desolv": 11.3613, + "dihe": 0.0, + "elec": -7.52969, + "improper": 0.0, + "rdcs": 0.0, + "rg": 0.0, + "total": 879.729, + "vdw": 33.2923, + "vean": 0.0, + "xpcs": 0.0 + } + }, + { + "clt_id": null, + "clt_model_rank": null, + "clt_rank": null, + "created": "2023-06-26 11:14:34", + "file_name": "rigidbody_3.pdb", + "file_type": { + "py/id": 4 + }, + "full_name": "1_rigidbody/rigidbody_3.pdb", + "len": NaN, + "md5": null, + "ori_name": null, + "path": "/trinity/login/mgiulini/haddock3/examples/docking-antibody-antigen/run1-CDR-NMR-CSP-test-new/1_rigidbody/1_rigidbody", + "py/object": "haddock.libs.libontology.PDBFile", + "rel_path": { + "py/reduce": [ + { + "py/type": "pathlib.PosixPath" + }, + { + "py/tuple": [ + "..", + "1_rigidbody", + "rigidbody_3.pdb" + ] + } + ] + }, + "restr_fname": { + "py/id": 6 + }, + "score": 19.477193999999997, + "topology": [ + { + "py/id": 8 + }, + { + "py/id": 11 + } + ], + "unw_energies": { + "air": 1968.47, + "angles": 0.0, + "bonds": 0.0, + "bsa": 1062.13, + "cdih": 0.0, + "coup": 0.0, + "dani": 0.0, + "desolv": 12.9516, + "dihe": 0.0, + "elec": -2.87223, + "improper": 0.0, + "rdcs": 0.0, + "rg": 0.0, + "total": 1999.04, + "vdw": 33.4424, + "vean": 0.0, + "xpcs": 0.0 + } + }, + { + "clt_id": null, + "clt_model_rank": null, + "clt_rank": null, + "created": "2023-06-26 11:14:34", + "file_name": "rigidbody_4.pdb", + "file_type": { + "py/id": 4 + }, + "full_name": "1_rigidbody/rigidbody_4.pdb", + "len": NaN, + "md5": null, + "ori_name": null, + "path": "/trinity/login/mgiulini/haddock3/examples/docking-antibody-antigen/run1-CDR-NMR-CSP-test-new/1_rigidbody/1_rigidbody", + "py/object": "haddock.libs.libontology.PDBFile", + "rel_path": { + "py/reduce": [ + { + "py/type": "pathlib.PosixPath" + }, + { + "py/tuple": [ + "..", + "1_rigidbody", + "rigidbody_4.pdb" + ] + } + ] + }, + "restr_fname": { + "py/id": 6 + }, + "score": 1.1810980000000004, + "topology": [ + { + "py/id": 8 + }, + { + "py/id": 11 + } + ], + "unw_energies": { + "air": 886.669, + "angles": 0.0, + "bonds": 0.0, + "bsa": 1241.54, + "cdih": 0.0, + "coup": 0.0, + "dani": 0.0, + "desolv": 12.0165, + "dihe": 0.0, + "elec": -7.6465, + "improper": 0.0, + "rdcs": 0.0, + "rg": 0.0, + "total": 915.003, + "vdw": 35.9808, + "vean": 0.0, + "xpcs": 0.0 + } + } + ] +} \ No newline at end of file diff --git a/tests/test_cli_traceback.py b/tests/test_cli_traceback.py new file mode 100644 index 000000000..2908f865c --- /dev/null +++ b/tests/test_cli_traceback.py @@ -0,0 +1,64 @@ +"""Test haddock3-traceback client.""" + +import os +import shutil +from pathlib import Path + +import pandas as pd +import pytest + +from haddock.clis.cli_traceback import main + +from . import golden_data + + +@pytest.fixture +def rigid_json(): + """Provide example rigidbody io.json file.""" + return Path(golden_data, "io_rigid.json") + + +@pytest.fixture +def flexref_json(): + """Provide example flexref io.json file.""" + return Path(golden_data, "io_flexref.json") + + +def test_main(rigid_json, flexref_json): + """Test haddock3-traceback client.""" + # build fake run_dir + run_dir = "example_dir" + step_dirs = [os.path.join(run_dir, "1_rigidbody"), + os.path.join(run_dir, "4_flexref")] + + if os.path.isdir(run_dir): + shutil.rmtree(run_dir) + # Loop over directories to be created + for d in [run_dir, *step_dirs]: + os.mkdir(d) + shutil.copy(rigid_json, os.path.join(step_dirs[0], "io.json")) + shutil.copy(flexref_json, os.path.join(step_dirs[1], "io.json")) + + # run haddock3-traceback + main(run_dir) + + # check traceback folder exists + assert os.path.isdir(os.path.join(run_dir, "traceback")) + + # check traceback files exist + tr_file = os.path.join(run_dir, "traceback", "traceback.tsv") + assert os.path.isfile(tr_file) + + obs_tr = pd.read_csv(tr_file, sep="\t", dtype=str) + exp_tr = [["00_topo1", "00_topo2", "1_rigidbody", "1_rigidbody_rank", "4_flexref", "4_flexref_rank"], # noqa: E501 + ["4G6K_fv_haddock.psf", "4I1B-matched_haddock.psf", "rigidbody_2.pdb", "1", "flexref_1.pdb", "1"], # noqa: E501 + ["4G6K_fv_haddock.psf", "4I1B-matched_haddock.psf", "rigidbody_4.pdb", "2", "flexref_2.pdb", "2"], # noqa: E501 + ["4G6K_fv_haddock.psf", "4I1B-matched_haddock.psf", "rigidbody_1.pdb", "3", "-", "-"], # noqa: E501 + ["4G6K_fv_haddock.psf", "4I1B-matched_haddock.psf", "rigidbody_3.pdb", "4", "-", "-"]] # noqa: E501 + exp_tr_df = pd.DataFrame(exp_tr[1:], columns=exp_tr[0]) + + assert obs_tr.columns.tolist() == exp_tr_df.columns.tolist() + assert obs_tr.equals(exp_tr_df) + + # clean up + shutil.rmtree(run_dir)