diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100644 index 000000000..c92e672f5 --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,65 @@ +--- +name: ci + +on: + push: + branches: [main] + pull_request: + +jobs: + ci: + runs-on: ubuntu-latest + strategy: + matrix: + python-version: ["3.9", "3.10"] + fail-fast: false + + steps: + - uses: actions/checkout@v4 + + - uses: actions/setup-python@v5 + with: + python-version: ${{ matrix.python-version }} + + - name: install system dependencies + run: | + sudo apt-get update + sudo apt-get install -y openmpi-bin libopenmpi3 libopenmpi-dev + + - name: install test dependencies + run: >- + pip install coverage pytest pytest-cov hypothesis pytest-mock mypy + fastapi==0.110.1 httpx==0.27.0 mpi4py==3.1.6 + + - name: install haddock3 + run: pip install -v . + + ## Disabled for now until we figure out a good configuration ## + # - name: check types + # run: mypy src/ + ############################################################### + + - name: run unit tests + run: >- + pytest tests/ + --cov --cov-report=term-missing --cov-append + --hypothesis-show-statistics + + - name: run integration tests + run: >- + pytest integration_tests/ + --cov --cov-report=term-missing --cov-append + --hypothesis-show-statistics + + - name: generate coverage report + run: | + coverage report + coverage xml + + - uses: codacy/codacy-coverage-reporter-action@v1 + if: >- + ${{ github.event_name != 'pull_request' || + github.event.pull_request.head.repo.full_name == github.repository }} + with: + project-token: ${{ secrets.CODACY_PROJECT_TOKEN }} + coverage-reports: ./coverage.xml diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml index e77c25e1e..e6fc6d62d 100644 --- a/.github/workflows/docs.yml +++ b/.github/workflows/docs.yml @@ -4,28 +4,25 @@ on: push: branches: [main] pull_request: - branches: [main] jobs: build: - runs-on: ${{ matrix.platform }} - strategy: - matrix: - platform: [ubuntu-latest, macos-latest] - python-version: [3.9] + runs-on: ubuntu-latest steps: - uses: actions/checkout@v2 - - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v2 + - uses: actions/setup-python@v2 with: - python-version: ${{ matrix.python-version }} + python-version: 3.9 - name: Install dependencies run: | python -m pip install pip==23.1.2 setuptools==67.7.2 wheel==0.40.0 pip install virtualenv==20.23.0 tox==4.5.1.1 + - name: install haddock3 + run: pip install -v . + - name: docs run: tox -e docs diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml deleted file mode 100644 index 37d74bb82..000000000 --- a/.github/workflows/tests.yml +++ /dev/null @@ -1,117 +0,0 @@ -name: tests - -on: - push: - branches: [main] - pull_request: - branches: [main] - -env: - CNS_EXEC: ${{ github.workspace }}/bin/cns - PYTHONPATH: ${{ github.workspace }}/src - -jobs: - unit: - runs-on: ubuntu-latest - strategy: - matrix: - python-version: [3.9] - - steps: - - uses: actions/checkout@v2 - with: - submodules: recursive - - - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v2 - with: - python-version: ${{ matrix.python-version }} - - - name: Install system dependencies - run: | - sudo apt-get update - sudo apt-get install openmpi-bin libopenmpi3 libopenmpi-dev - - - name: install dependencies - run: | - python -m pip install pip==23.1.2 setuptools==67.7.2 wheel==0.40.0 - pip install virtualenv==20.23.0 tox==4.5.1.1 - - - name: install HADDOCK - run: | - pwd - ls -lsa - mkdir bin - touch bin/cns - cd src/fcc/src - chmod u+x Makefile - ./Makefile 2>%1 >/dev/null || true - cd - - cd src/fast-rmsdmatrix/src - chmod u+x Makefile - make fast-rmsdmatrix - cd - - - - - name: types - run: tox -e types - - - name: unit tests - run: tox -e test - - - uses: codacy/codacy-coverage-reporter-action@v1 - if: ${{ github.event_name != 'pull_request' || github.event.pull_request.head.repo.full_name == github.repository }} - with: - project-token: ${{ secrets.CODACY_PROJECT_TOKEN }} - coverage-reports: ./coverage.xml - - integration: - needs: unit - runs-on: ubuntu-latest - if: ${{ github.event_name != 'pull_request' || github.event.pull_request.head.repo.full_name == github.repository }} - strategy: - matrix: - python-version: [3.9] - - steps: - - uses: actions/checkout@v2 - with: - submodules: recursive - - - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v2 - with: - python-version: ${{ matrix.python-version }} - - - name: Install system dependencies - run: | - sudo apt-get update - sudo apt-get install openmpi-bin libopenmpi3 libopenmpi-dev - - - name: install dependencies - run: | - python -m pip install pip==23.1.2 setuptools==67.7.2 wheel==0.40.0 - pip install virtualenv==20.23.0 tox==4.5.1.1 - - - name: install HADDOCK - run: | - pwd - ls -lsa - mkdir bin - touch bin/cns - cd src/fcc/src - chmod u+x Makefile - ./Makefile 2>%1 >/dev/null || true - cd - - cd src/fast-rmsdmatrix/src - chmod u+x Makefile - make fast-rmsdmatrix - cd - - - - name: install integration dependencies - run: | - curl ${{ secrets.CNS_LINK }} -o $CNS_EXEC -s - chmod +x $CNS_EXEC - - - name: run integration tests - run: tox -e integration diff --git a/.gitignore b/.gitignore index 7c34e505b..fb841385f 100644 --- a/.gitignore +++ b/.gitignore @@ -154,6 +154,5 @@ docs/haddock.modules.rst docs/haddock.modules.base_cns_module.rst docs/setup.rst docs/clients/*rst -src/fast-rmsdmatrix -src/fcc +src/haddock/bin/ log diff --git a/.gitmodules b/.gitmodules deleted file mode 100644 index 7b813623c..000000000 --- a/.gitmodules +++ /dev/null @@ -1,7 +0,0 @@ -[submodule "src/fcc"] - path = src/fcc - url = https://github.com/haddocking/fcc.git - -[submodule "src/fast-rmsdmatrix"] - path = src/fast-rmsdmatrix - url = https://github.com/mgiulini/fast-rmsdmatrix.git diff --git a/MANIFEST.in b/MANIFEST.in index 370d15e4a..0dbcc6531 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -34,3 +34,5 @@ recursive-include varia *.f recursive-include varia *.inc recursive-include varia *.lua recursive-include varia *.md + +include src/haddock/bin/* diff --git a/setup.py b/setup.py index 08ce8a5c2..c1bbe77d5 100644 --- a/setup.py +++ b/setup.py @@ -1,107 +1,231 @@ #!/usr/bin/env python # -*- encoding: utf-8 -*- """Setup dot py.""" -import warnings +import os +import platform +import subprocess +import sys +import urllib.request from os.path import dirname, join +from pathlib import Path -from setuptools import SetuptoolsDeprecationWarning, find_packages, setup -# Import warnings object for later filtering out -from setuptools.command.easy_install import EasyInstallDeprecationWarning +from setuptools import Extension, find_packages, setup +from setuptools.command.build_ext import build_ext +from setuptools.command.install import install -# Add warnings filtering to the Setup Deprecation Warnings -warnings.filterwarnings("ignore", category=SetuptoolsDeprecationWarning) -warnings.filterwarnings("ignore", category=EasyInstallDeprecationWarning) +CNS_BINARIES = { + "x86_64-linux": "https://surfdrive.surf.nl/files/index.php/s/BWa5OimzbNliTi6/download", + "x86_64-darwin": "https://surfdrive.surf.nl/files/index.php/s/3Fzzte0Zx0L8GTY/download", + "arm64-darwin": "https://surfdrive.surf.nl/files/index.php/s/bYB3xPWf7iwo07X/download", + "aarch64-linux": "https://surfdrive.surf.nl/files/index.php/s/3rHpxcufHGrntHn/download", +} +cpp_extensions = [ + Extension( + "haddock.bin.contact_fcc", + sources=["src/haddock/deps/contact_fcc.cpp"], + extra_compile_args=["-O2"], + ), + Extension( + "haddock.bin.fast_rmsdmatrix", + sources=["src/haddock/deps/fast-rmsdmatrix.c"], + extra_compile_args=["-Wall", "-O3", "-march=native", "-std=c99"], + extra_link_args=["-lm"], + ), +] -def read(*names, **kwargs) -> str: + +class CustomBuild(build_ext): + """Custom build handles the C/C++ dependencies""" + + def run(self): + print("Building HADDOCK3 C/C++ binary dependencies...") + self.build_executable( + name="contact_fcc", + cmd=["g++", "-O2", "-o", "contact_fcc", "src/haddock/deps/contact_fcc.cpp"], + ) + self.build_executable( + name="fast-rmsdmatrix", + cmd=[ + "gcc", + "-Wall", + "-O3", + "-march=native", + "-std=c99", + "-o", + "fast-rmsdmatrix", + "src/haddock/deps/fast-rmsdmatrix.c", + "-lm", + ], + ) + + # Run the standard build_ext + build_ext.run(self) + + def build_executable(self, name, cmd): + """Helper function to execute the build command""" + try: + subprocess.check_call(cmd) + # Ensure the source bin directory exists + src_bin_dir = Path("src", "haddock", "bin") + src_bin_dir.mkdir(exist_ok=True, parents=True) + + # Move the built executable to the source bin directory + src_bin_exec = Path(src_bin_dir, name) + if src_bin_exec.exists(): + src_bin_exec.unlink() + + self.move_file(name, src_bin_exec) + + # If build_lib exists, also copy to there + if hasattr(self, "build_lib"): + build_bin_dir = Path(self.build_lib, "haddock", "bin") + build_bin_dir.mkdir(exist_ok=True, parents=True) + self.copy_file(Path(src_bin_dir, name), Path(build_bin_dir, name)) + + print(f"Successfully built and moved {name}") + except subprocess.CalledProcessError as e: + print(f"Error building {name}: {e}") + raise + + +class CustomInstall(install): + """Custom class to handle the download of the CNS binary""" + + def run(self): + install.run(self) + + if self.install_lib is None: + print("Something went wrong during installation") + sys.exit(1) + + arch = self.get_arch() + + if arch not in CNS_BINARIES: + print(f"Unknown architecture: {arch}") + sys.exit(1) + + cns_binary_url = CNS_BINARIES[arch] + + src_bin_dir = Path("src", "haddock", "bin") + src_bin_dir.mkdir(exist_ok=True, parents=True) + + cns_exec = Path(src_bin_dir, "cns") + status, msg = self.download_file(cns_binary_url, cns_exec) + if not status: + print(msg) + sys.exit(1) + + # Make it executable + os.chmod(cns_exec, 0o755) + + # check if this is being done via `pip install .` + if hasattr(self, "install_lib"): + install_bin_dir = Path(self.install_lib, "haddock", "bin") + install_bin_dir.mkdir(exist_ok=True, parents=True) + self.copy_file(cns_exec, Path(install_bin_dir, "cns")) + + @staticmethod + def download_file(url, dest) -> tuple[bool, str]: + """Download a file from a URL""" + try: + urllib.request.urlretrieve(url, dest) + return True, "Download successful" + except Exception as e: # pylint: disable=broad-except + return False, f"Download failed: {e}" + + @staticmethod + def get_arch(): + """Helper function to figure out the architetchure""" + system = platform.system().lower() + machine = platform.machine().lower() + + return f"{machine}-{system}" + + +with open("requirements.txt", "r", encoding="utf-8") as f: + requirements = f.read().splitlines() + + +def read_description(*names, **kwargs) -> str: """Read description files.""" path = join(dirname(__file__), *names) - with open(path, encoding=kwargs.get('encoding', 'utf8')) as fh: + with open(path, encoding=kwargs.get("encoding", "utf8")) as fh: return fh.read() -# activate once added, do not remove -long_description = '{}\n{}'.format( - read('README.md'), - read('CHANGELOG.md'), - ) +readme = read_description("README.md") +changelog = read_description("CHANGELOG.md") +long_description = f"{readme}{os.linesep}{changelog}" setup( - name='haddock3', - version='3.0.0', - description='Haddock 3.', + name="haddock3", + version="3.0.0", + description="HADDOCK3", long_description=long_description, - long_description_content_type='text/markdown', - license='Apache License 2.0', - author='HADDOCK', - author_email='A.M.J.J.Bonvin@uu.nl', - url='https://github.com/haddocking/haddock3', - packages=find_packages('src'), - package_dir={'': 'src'}, - # py_modules=[splitext(basename(i))[0] for i in glob("src/*.py")], + long_description_content_type="text/markdown", + license="Apache License 2.0", + author="BonvinLab", + author_email="bonvinlab.support@uu.nl", + url="https://github.com/haddocking/haddock3", + packages=find_packages("src"), + package_dir={"": "src"}, + package_data={"haddock": ["bin/*"]}, include_package_data=True, zip_safe=False, classifiers=[ - # complete classifier list: - # http://pypi.python.org/pypi?%3Aaction=list_classifiers - 'Development Status :: 4 - Beta', - 'License :: OSI Approved :: Apache Software License', - 'Natural Language :: English', - 'Operating System :: POSIX', - 'Operating System :: POSIX :: Linux', - 'Operating System :: MacOS', - 'Programming Language :: Python :: 3.9', - ], + # TODO: Update the classifiers - http://pypi.python.org/pypi?%3Aaction=list_classifiers + "Development Status :: 4 - Beta", + "License :: OSI Approved :: Apache Software License", + "Natural Language :: English", + "Operating System :: POSIX", + "Operating System :: POSIX :: Linux", + "Operating System :: MacOS", + "Intended Audience :: Science/Research", + "Topic :: Scientific/Engineering :: Bio-Informatics", + "Topic :: Scientific/Engineering :: Chemistry", + "Topic :: Scientific/Engineering :: Physics", + "Programming Language :: Python :: 3.9", + "Programming Language :: Python :: 3.10", + ], project_urls={ - 'webpage': 'https://github.com/haddocking/haddock3', - 'Documentation': 'https://github.com/haddocking/haddock3#readme', - 'Changelog': '', - 'Issue Tracker': 'https://github.com/haddocking/haddock3/issues', - 'Discussion Forum': 'https://github.com/haddocking/haddock3/issues', - }, + "webpage": "https://bonvinlab.org/haddock3", + "Documentation": "https://github.com/haddocking/haddock3#readme", + "Changelog": "", + "Issue Tracker": "https://github.com/haddocking/haddock3/issues", + "Discussion Forum": "https://github.com/haddocking/haddock3/issues", + }, keywords=[ - 'Structural Biology', - 'Biochemistry', - 'Docking', - 'Protein docking', - 'Proteins', - ], - python_requires='>=3.9, <3.10', - install_requires=[ - # not added on purpose - ], - extras_require={ - }, - setup_requires=[ - ], + "Structural Biology", + "Biochemistry", + "Docking", + "Protein docking", + "Proteins", + ], + python_requires=">=3.9, <3.11", + install_requires=[requirements], + extras_require={}, + setup_requires=[], entry_points={ - 'console_scripts': [ - 'haddock3 = haddock.clis.cli:maincli', + "console_scripts": [ + "haddock3 = haddock.clis.cli:maincli", "haddock3-mpitask = haddock.clis.cli_mpi:maincli", - 'haddock3-bm = haddock.clis.cli_bm:maincli', - 'haddock3-cfg = haddock.clis.cli_cfg:maincli', - 'haddock3-clean = haddock.clis.cli_clean:maincli', - 'haddock3-copy = haddock.clis.cli_cp:maincli', - 'haddock3-dmn = haddock.clis.cli_dmn:maincli', - 'haddock3-pp = haddock.clis.cli_pp:maincli', - 'haddock3-score = haddock.clis.cli_score:maincli', - 'haddock3-unpack = haddock.clis.cli_unpack:maincli', - 'haddock3-analyse = haddock.clis.cli_analyse:maincli', - 'haddock3-traceback = haddock.clis.cli_traceback:maincli', - 'haddock3-re = haddock.clis.cli_re:maincli', - 'haddock3-restraints = haddock.clis.cli_restraints:maincli' - ] - }, - # cmdclass={'build_ext': optional_build_ext}, - # ext_modules=[ - # Extension( - # splitext(relpath(path, 'src').replace(os.sep, '.'))[0], - # sources=[path], - # include_dirs=[dirname(path)] - # ) - # for root, _, _ in os.walk('src') - # for path in glob(join(root, '*.c')) - # ], - ) + "haddock3-bm = haddock.clis.cli_bm:maincli", + "haddock3-cfg = haddock.clis.cli_cfg:maincli", + "haddock3-clean = haddock.clis.cli_clean:maincli", + "haddock3-copy = haddock.clis.cli_cp:maincli", + "haddock3-dmn = haddock.clis.cli_dmn:maincli", + "haddock3-pp = haddock.clis.cli_pp:maincli", + "haddock3-score = haddock.clis.cli_score:maincli", + "haddock3-unpack = haddock.clis.cli_unpack:maincli", + "haddock3-analyse = haddock.clis.cli_analyse:maincli", + "haddock3-traceback = haddock.clis.cli_traceback:maincli", + "haddock3-re = haddock.clis.cli_re:maincli", + "haddock3-restraints = haddock.clis.cli_restraints:maincli", + ] + }, + cmdclass={"build_ext": CustomBuild, "install": CustomInstall}, + ext_modules=cpp_extensions, +) diff --git a/src/fast-rmsdmatrix b/src/fast-rmsdmatrix deleted file mode 160000 index 4580d1c89..000000000 --- a/src/fast-rmsdmatrix +++ /dev/null @@ -1 +0,0 @@ -Subproject commit 4580d1c89a0ec162ca9b32c67d2806afcac52f13 diff --git a/src/fcc b/src/fcc deleted file mode 160000 index 3a1626de3..000000000 --- a/src/fcc +++ /dev/null @@ -1 +0,0 @@ -Subproject commit 3a1626de3366f6db8b45caed3bd9fbc6b2881286 diff --git a/src/haddock/clis/re/clustfcc.py b/src/haddock/clis/re/clustfcc.py index 36fb5186a..7d7a2d685 100644 --- a/src/haddock/clis/re/clustfcc.py +++ b/src/haddock/clis/re/clustfcc.py @@ -6,6 +6,7 @@ from haddock import log from haddock.core.defaults import INTERACTIVE_RE_SUFFIX from haddock.core.typing import Union +from haddock.fcc import cluster_fcc from haddock.gear.config import load as read_config from haddock.gear.config import save as save_config from haddock.libs.libclust import ( diff --git a/src/haddock/core/defaults.py b/src/haddock/core/defaults.py index 003889a54..a0e965f08 100644 --- a/src/haddock/core/defaults.py +++ b/src/haddock/core/defaults.py @@ -1,22 +1,34 @@ """All default parameters used by the framework.""" + +import importlib.resources +import os import string import sys from pathlib import Path import yaml +from pkg_resources import resource_filename + +import haddock +from haddock import core_path, log -from haddock import core_path, haddock3_repository_path, log +BINARY_DIR = Path(importlib.resources.files(haddock) / "bin") # type: ignore -# Locate the CNS binary -cns_exec = Path(haddock3_repository_path, "bin", "cns") +cns_exec = Path(resource_filename("haddock", "bin/cns")) if not cns_exec.exists(): - log.error( - 'CNS executable `bin/cns` not found. ' - 'Please check the install instructions.' + log.warning("CNS executable not found at %s", cns_exec) + _cns_exec = os.environ.get("CNS_EXEC") + if _cns_exec is None: + log.error( + "Please define the location the CNS binary by setting a CNS_EXEC system variable" ) - sys.exit() + sys.exit(1) + else: + cns_exec = Path(_cns_exec) +CONTACT_FCC_EXEC = Path(resource_filename("haddock", "bin/contact_fcc")) +FAST_RMSDMATRIX_EXEC = Path(resource_filename("haddock", "bin/fast-rmsdmatrix")) MODULE_PATH_NAME = "step_" """ @@ -40,16 +52,11 @@ MODULE_DEFAULT_YAML = "defaults.yaml" """Default name of the yaml default parameters file.""" -CNS_MODULES = ["rigidbody", - "flexref", - "emscoring", - "mdscoring", - "mdref", - "emref"] +CNS_MODULES = ["rigidbody", "flexref", "emscoring", "mdscoring", "mdref", "emref"] """List of CNS modules available in HADDOCK3.""" -with open(Path(core_path, "mandatory.yaml"), 'r') as fin: +with open(Path(core_path, "mandatory.yaml"), "r") as fin: _ycfg = yaml.safe_load(fin) max_molecules_allowed = _ycfg["molecules"]["maxitems"] del _ycfg diff --git a/src/haddock/deps/contact_fcc.cpp b/src/haddock/deps/contact_fcc.cpp new file mode 100755 index 000000000..50382d213 --- /dev/null +++ b/src/haddock/deps/contact_fcc.cpp @@ -0,0 +1,124 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +using namespace std; + +struct Coor3f { + float x; + float y; + float z; +}; + +struct Residue { + int nr; + vector coor; + vector atom; + int seg; +}; + +vector res; + +bool seg_sorter (Residue res_a, Residue res_b) { + int segA = res_a.seg; + int segB = res_b.seg; + return (segA < segB); +}; + +int main(int argc, char *argv[]) { + char buf[2000]; + + if (argc < 3) { + fprintf(stderr,"ERROR: Too few arguments\n"); + fprintf(stderr, "Usage: contact \n"); + return 1; + } + + char *filename = argv[1]; + float cutoff = atof(argv[2]); + + if (cutoff < 0 || cutoff > 100) { + fprintf(stderr,"ERROR: Cutoff out of range\n"); + fprintf(stderr, "Usage: contact \n"); + return 1; + } + + FILE *fil = fopen(filename, "r"); + if (fil == NULL) { + fprintf(stderr, "ERROR: PDB file %s does not exist\n", filename); + return 1; + } + + int currnr = -99999; + char currseg; + int segid = 0; + + set nonconv; + while (!feof(fil)) { + char code[10]; + char atom[5]; + if (!fgets(buf, 2000, fil)) break; + sscanf(buf, "%s %*d %s", code, atom); + + // Ignore HETATM and hydrogens + if (!strncmp(code,"ATOM", 4) && ( atom[0] != 'H' && !( isdigit(atom[0]) && atom[1] == 'H' ) ) ) { + int nr = atoi(buf + 22); + char seg = buf[72]; + if (seg != currseg) { + currseg = seg; + segid++; + } + if (nr != currnr) { + Residue r; + r.nr = nr+10000; + r.seg = segid; + res.push_back(r); + currnr = r.nr; + } + Residue &rcurr = res[res.size() -1]; + Coor3f ccoor; + ccoor.x = atof(buf+27); + ccoor.y = atof(buf+38); + ccoor.z = atof(buf+46); + rcurr.coor.push_back(ccoor); + string atom2(atom); + rcurr.atom.push_back(atom2); + } + } + + if (!res.size()) {fprintf(stderr, "ERROR: PDB file %s contains no residues\n", filename); return 1;} + + // Sort the residues by segment to avoid random chain ordering problems + sort (res.begin(), res.end(), seg_sorter); + + double cutoffsq = cutoff * cutoff; + + for (int n = 0; n < res.size(); n++) { + vector &c1 = res[n].coor; + int seg1 = res[n].seg; + for (int nn = n + 1; nn < res.size(); nn++) { + int seg2 = res[nn].seg; + if (seg1 == seg2) continue; + vector &c2 = res[nn].coor; + for (int i = 0; i < res[n].coor.size(); i++) { + for (int ii = 0; ii < res[nn].coor.size(); ii++) { + double currdissq = + (c1[i].x - c2[ii].x) * (c1[i].x - c2[ii].x) + + (c1[i].y - c2[ii].y) * (c1[i].y - c2[ii].y) + + (c1[i].z - c2[ii].z) * (c1[i].z - c2[ii].z); + if (currdissq < cutoffsq) { + printf ("%d%d%d%d\n", res[n].nr, res[n].seg, res[nn].nr, res[nn].seg); + } + } + } + } + } + fclose(fil); +} diff --git a/src/haddock/deps/fast-rmsdmatrix.c b/src/haddock/deps/fast-rmsdmatrix.c new file mode 100644 index 000000000..d352bbabc --- /dev/null +++ b/src/haddock/deps/fast-rmsdmatrix.c @@ -0,0 +1,883 @@ +#define _GNU_SOURCE +#include +#include +#include +#include +#include +#include +#include +#include +#include + +typedef struct traj{ + int frames; /*!< number of frames in the trajectory */ + double **traj_coords; /*!< 2D array of xyz coordinates */ + int n_at; /*!< number of atoms in the atomistic structure */ + int pairs; /*!< number of possible pairs of structures */ +}traj; + +typedef struct alignments{ + double *rmsd_mat; /*!< condensed pairwise RMSD matrix */ + double **coms; /*!< array of centers of mass */ + int *ref_structs; /*!< array of reference structures */ + int *mod_structs; /*!< array of model structures */ +}alignments; + +void failed(char message[]) { + printf("\n FAILED *** %s ***\n \n", message); + exit(1); +} + +struct arguments; + +void zero_vec_d(double *a, int dim) { + + int i; + for (i = 0; i < dim; i++) { + a[i] = 0; + } +} + +int *i1t(int n1) { + int *p, *a; + int i; + if ((p = (int *) malloc((size_t) n1 * sizeof(int))) == NULL) + failed("i1t: failed"); + for (i = 0, a = p; i < n1; i++) + *a++ = 0; + return p; +} + +double *d1t(int n1) { + double *p, *a; + int i; + if ((p = (double *) malloc((size_t) n1 * sizeof(double))) == NULL) + failed("d1t: failed n1 "); + for (i = 0, a = p; i < n1; i++) + *a++ = 0; + return p; +} + +double **d2t(int n1, int n2) { + double **p, *a; + int i; + if ((p = (double **) malloc((size_t) n1 * sizeof(double *))) == NULL) + failed("d2t: failed n1"); + if ((p[0] = (double *) malloc((size_t) n1 * n2 * sizeof(double))) == NULL) + failed("d2t: failed n2"); + for (i = 0; i < n1 - 1; i++) + p[i + 1] = p[i] + n2; + for (i = 0, a = p[0]; i < n1 * n2; i++) + *a++ = 0; + return p; +} + +void free_d2t(double **p) { + + free(p[0]); + free(p); +} + +void free_d1t(double *p) { + + free(p); +} + +double scal_d(double *a, double *b, int dim) { + + int i; + double temp; + + temp = 0.0; + for (i = 0; i < dim; i++) { + temp += a[i] * b[i]; + } + return (temp); +} + +double norm_d(double *a, int dim) { + + return (sqrt(scal_d(a, a, dim))); +} + +void normalize_d(double *a, int dim) { + int i; + double temp; + + temp = norm_d(a, dim); + for (i = 0; i < dim; i++) { + a[i] = a[i] / temp; + } +} + +void vecprod_d(double *a, double *b, double *c) { + + c[0] = a[1] * b[2] - a[2] * b[1]; + c[1] = a[2] * b[0] - a[0] * b[2]; + c[2] = a[0] * b[1] - a[1] * b[0]; +} + +void myjacobi(double a[][3], int n, double *d, double v[][3], int *nrot) { + + int j, iq, ip, i; + double tresh, theta, tau, t, sm, s, h, g, c; + double b[3], z[3]; + +#define ROTATE(a, i, j, k, l) g=a[i][j];h=a[k][l];a[i][j]=g-s*(h+g*tau);a[k][l]=h+s*(g-h*tau); + + for (ip = 0; ip <= (n - 1); ip++) { + for (iq = 0; iq <= (n - 1); iq++) + v[ip][iq] = 0.0; + v[ip][ip] = 1.0; + } + for (ip = 0; ip <= (n - 1); ip++) { + b[ip] = d[ip] = a[ip][ip]; + z[ip] = 0.0; + } + *nrot = 0; + for (i = 1; i <= 500; i++) { + sm = 0.0; + for (ip = 0; ip <= n - 2; ip++) { + for (iq = ip + 1; iq <= (n - 1); iq++) + sm += fabs(a[ip][iq]); + } + if (sm == 0.0) { + return; + } + if (i < 4) + tresh = 0.2 * sm / (n * n); + else + tresh = 0.0; + for (ip = 0; ip <= n - 2; ip++) { + for (iq = ip + 1; iq <= (n - 1); iq++) { + g = 100.0 * fabs(a[ip][iq]); + if (i > 4 && (fabs((fabs(d[ip]) + g) - fabs(d[ip])) < 1.0e-6) + && (fabs((fabs(d[iq]) + g) - fabs(d[iq])) < 1.0e-6)) + a[ip][iq] = 0.0; + else if (fabs(a[ip][iq]) > tresh) { + h = d[iq] - d[ip]; + if (fabs((fabs(h) + g) - fabs(h)) < 1.0e-6) + t = (a[ip][iq]) / h; + else { + theta = 0.5 * h / (a[ip][iq]); + t = 1.0 / (fabs(theta) + sqrt(1.0 + theta * theta)); + if (theta < 0.0) + t = -t; + } + c = 1.0 / sqrt(1 + t * t); + s = t * c; + tau = s / (1.0 + c); + h = t * a[ip][iq]; + z[ip] -= h; + z[iq] += h; + d[ip] -= h; + d[iq] += h; + a[ip][iq] = 0.0; + for (j = 0; j <= ip - 1; j++) { + ROTATE (a, j, ip, j, iq) + } + for (j = ip + 1; j <= iq - 1; j++) { + ROTATE (a, ip, j, j, iq) + } + for (j = iq + 1; j <= (n - 1); j++) { + ROTATE (a, ip, j, iq, j) + } + for (j = 0; j <= (n - 1); j++) { + ROTATE (v, j, ip, j, iq) + } + ++(*nrot); + } + } + } + for (ip = 0; ip <= (n - 1); ip++) { + b[ip] += z[ip]; + d[ip] = b[ip]; + z[ip] = 0.0; + } + } + printf("Too many iterations in routine JACOBI %lf\n", sm); + /* exit (1); */ +#undef ROTATE +} + +int columns(char* string){ + /** + * routine that returns the number of columns for each row inside the file chosen. + * + * Parameter + * ---------- + * + * `string` : string token in account + */ + + int counter = 0; + + while(string){ + counter++; + string = strtok(NULL, " \t\v\r\n"); + } + return counter; +} + +int n_rows (FILE *f){ + /** + * routine that returns the number of rows in a file. It counts correctly this number even if the last row does not present \n + * + * Parameter + * ---------- + * + * `f` : FILE structure that represents the file opened. + */ + + size_t rows_i = 0, n = 0; + char *buf = NULL; + + fseek(f, 0, SEEK_SET); + + while(getline(&buf, &n, f) != -1) rows_i++; + + free(buf); + + return rows_i; +} + +int get_pair(int nmodels, int idx, int ij_arr[2]){ + if (nmodels < 0 || idx < 0){ + printf("get_pair cannot accept negative numbers"); + printf("Input is %d , %d", nmodels, idx); + exit(EXIT_FAILURE); + } + // solve the second degree equation + int b = 1 - (2 * nmodels); + int i = (-b - sqrt(b * b - 8 * idx)) / 2; + int j = idx + i * (b + i + 2) / 2 + 1; + ij_arr[0] = i; + ij_arr[1] = j; + return 0; +} + +double optimal_alignment(double **x, double **y, int cgnum, double u[][3]) { + /** + * routine that computes the Kabsch alignment and the rmsd between two configurations + * + * + * Parameters + * ---------- + * + * `x`, `y` : CG structures + * + * `cgnum` : length of CG mapping + * + * `u` : rotation matrix + */ + void myjacobi(double a[][3], int n, double *d, double v[][3], int *nrot); + int i, j, k, sign[3], order[3], nrot; + double e, e0; + double r[3][3], rt[3][3], temp; //, **x, **y; + double a[3][3], eval[3], evec[3][3]; + double eigenvalues[3], eigenvectors[3][3], b[3][3]; + int speak = 1; + + e0 = 0.0; + for (i = 0; i < cgnum; i++) { + e0 += 0.5 * norm_d(x[i], 3) * norm_d(x[i], 3); + e0 += 0.5 * norm_d(y[i], 3) * norm_d(y[i], 3); + } + + for (i = 0; i < 3; i++) { + for (j = 0; j < 3; j++) { + r[i][j] = 0.0; + for (k = 0; k < cgnum; k++) { + r[i][j] += y[k][i] * x[k][j]; + } + rt[j][i] = r[i][j]; + } + } + + if (isnan(e0) == 1) printf("Found a NaN in Kabsch alignment at checkpoint 1\n"); + + for (i = 0; i < 3; i++) { + for (j = 0; j < 3; j++) { + a[i][j] = 0; + for (k = 0; k < 3; k++) { + a[i][j] += rt[i][k] * r[k][j]; + } + } + } + + myjacobi(a, 3, eval, evec, &nrot); + /* we add small quantities in order to remove potentially dangerous degeneracies */ + if (isnan(eval[0]) == 1) printf("Found a NaN in Kabsch alignment at checkpoint 2\n"); + if (isnan(eval[1]) == 1) printf("Found a NaN in Kabsch alignment at checkpoint 3\n"); + if (isnan(eval[2]) == 1) printf("Found a NaN in Kabsch alignment at checkpoint 4\n"); + + eval[0] += 0.0000000000000001; + eval[1] += 0.00000000000000001; + eval[2] += 0.00000000000000002; + + if ((eval[0] < eval[1]) && (eval[0] < eval[2])) { + order[0] = 1; + order[1] = 2; + order[2] = 0; + } + + if ((eval[1] < eval[0]) && (eval[1] < eval[2])) { + order[0] = 0; + order[1] = 2; + order[2] = 1; + } + + if ((eval[2] < eval[0]) && (eval[2] < eval[1])) { + order[0] = 0; + order[1] = 1; + order[2] = 2; + } + + for (i = 0; i < 3; i++) { + eigenvalues[i] = eval[order[i]]; + for (j = 0; j < 3; j++) { + eigenvectors[i][j] = evec[j][order[i]]; + } + } + + normalize_d(eigenvectors[0], 3); + normalize_d(eigenvectors[1], 3); + vecprod_d(eigenvectors[0], eigenvectors[1], eigenvectors[2]); + normalize_d(eigenvectors[2], 3); + + for (i = 0; i < 2; i++) { + for (j = 0; j < 3; j++) { + b[i][j] = 0; + for (k = 0; k < 3; k++) { + b[i][j] += r[j][k] * eigenvectors[i][k]; + } + } + normalize_d(b[i], 3); + } + + vecprod_d(b[0], b[1], b[2]); + normalize_d(b[2], 3); + + temp = 0.0; + for (i = 0; i < 3; i++) { + for (j = 0; j < 3; j++) { + temp += b[2][i] * r[i][j] * eigenvectors[2][j]; + } + } + sign[2] = +1; + if (temp < 0) + sign[2] = -1; + sign[0] = sign[1] = 1; + + if (fabs(eigenvalues[2]) < 0.0000001) { + e = e0 - sqrt(eigenvalues[0]) - sqrt(eigenvalues[1]); + } else { + e = e0 - sqrt(eigenvalues[0]) - sqrt(eigenvalues[1]) - sign[2] * sqrt(eigenvalues[2]); + } + + if (isnan(e) == 1) { + printf("Found a NaN in Kabsch alignment at checkpoint 5 | \n"); + printf("e %lf e0 %lf e1 %lf e2 %lf e3 %lf\n", e, e0, eigenvalues[0], eigenvalues[1], eigenvalues[2]); + } +/********************/ + e = 2.0 * e / cgnum; + if (e < 0.0) { + if (fabs(e) < 1.0e-3) { + if (speak == 1) { + //printf("Warning. In Kabsch alignment found slightly negative value of e (%e). Identical structures? Setting it to 0.000001.\n", + // e); + e = 0.000001; + } + } + /* occasionally, when dealing with two practically identical configurations + the value of e may be slightly negative due to the small offsets and roundoff errors. + In this case we set it equal to zero. */ + else { + FILE *fe; + fe = fopen("error.dat", "w"); + fprintf(fe, "Error. In Kabsch alignment found negative value of e: (%lf)\n", e); + printf("Error. In Kabsch alignment found negative value of e: (%lf)\n", e); + fclose(fe); + exit(EXIT_FAILURE); + } + } + e = sqrt(e); + if (isnan(e) == 1) printf("Found a NaN in Kabsch alignment at checkpoint 6\n"); +/********************/ + // filling rotation_matrix + for (i = 0; i < 3; i++) { + for (j = 0; j < 3; j++) { + u[i][j] = 0.0; + for (k = 0; k < 3; k++) { + u[i][j] += b[k][i] * eigenvectors[k][j]; + } + } + } + return (e); +} + +void cycle_ilrmsd(traj *Trajectory, traj *Ligand_Trajectory, alignments *align, int start_pair[2]) { + /** + * routine that cycles over pairs of frames in a trajectory, aligning on the receptor + * and computing the interface ligand rmsd + * + * Parameters + * ---------- + * + * `Trajectory` : traj object + * + * `align` : alignments object + * + * `start_pair` : pair of structures to begin with + */ + int n; + int i, j; + double **x, **y, **xlig, **ylig, **ylig_rot; + double u[3][3], cm1[3], cm2[3]; + int rot_idx; + // allocating vectors + x = d2t(Trajectory->n_at, 3); + xlig = d2t(Ligand_Trajectory->n_at, 3); + y = d2t(Trajectory->n_at, 3); + ylig = d2t(Ligand_Trajectory->n_at, 3); + ylig_rot = d2t(Ligand_Trajectory->n_at, 3); + int ref = start_pair[0]; + int mod = start_pair[1]; + int old_ref = ref - 1; + + for (n = 0; n < Trajectory->pairs; n++) { + if (ref != old_ref) { + // recalculate com and translate structure + zero_vec_d(cm1, 3); + for (i = 0; i < Trajectory->n_at; i++) { + for (j = 0; j < 3; j++) { + cm1[j] += Trajectory->traj_coords[ref][i * 3 + j] / Trajectory->n_at; + } + } + // filling structure + for (i = 0; i < Trajectory->n_at; i++) { + for (j = 0; j < 3; j++) { + x[i][j] = Trajectory->traj_coords[ref][i * 3 + j] - cm1[j]; + } + } + // now the ligand + for (i = 0; i < Ligand_Trajectory->n_at; i++) { + for (j = 0; j < 3; j++) { + xlig[i][j] = Ligand_Trajectory->traj_coords[ref][i * 3 + j] - cm1[j]; + } + } + old_ref += 1; + } + + // second structure + zero_vec_d(cm2, 3); + for (i = 0; i < Trajectory->n_at; i++) { + for (j = 0; j < 3; j++) { + cm2[j] += Trajectory->traj_coords[mod][i * 3 + j] / Trajectory->n_at; + } + } + for (i = 0; i < Trajectory->n_at; i++) { + for (j = 0; j < 3; j++) { + y[i][j] = Trajectory->traj_coords[mod][i * 3 + j] - cm2[j]; + } + } + // now ylig + for (i = 0; i < Ligand_Trajectory->n_at; i++) { + for (j = 0; j < 3; j++) { + ylig[i][j] = Ligand_Trajectory->traj_coords[mod][i * 3 + j] - cm2[j]; + } + } + + // Alignment AND RMSD computation + align->rmsd_mat[n] = optimal_alignment(x, y, Trajectory->n_at, u); + align->ref_structs[n] = ref + 1; + align->mod_structs[n] = mod + 1; + + // now correcting the coordinates of the ligand + for (i = 0; i < Ligand_Trajectory->n_at; i++) { + for (j = 0; j < 3; j++) { + ylig_rot[i][j] = 0.0; + for (rot_idx = 0; rot_idx < 3; rot_idx++) { + ylig_rot[i][j] += u[rot_idx][j] * ylig[i][rot_idx]; + } + } + } + // calculate rmsd between ligands + double ilrmsd = 0.0; + double delta; + for (i = 0; i < Ligand_Trajectory->n_at; i++) { + for (j = 0; j < 3; j++) { + delta = ylig_rot[i][j] - xlig[i][j]; + ilrmsd += delta * delta; + } + } + + ilrmsd = sqrt(ilrmsd / Ligand_Trajectory->n_at); + // the value before was the receptor interface rmsd, now let's change it to the ligand interface rmsd + align->rmsd_mat[n] = ilrmsd; + + // update idx + if (mod == Trajectory->frames - 1) { + ref += 1; + mod = ref + 1; + } + else { + mod += 1; + } + + } + free_d2t(x); + free_d2t(y); + free_d2t(xlig); + free_d2t(ylig); + free_d2t(ylig_rot); +} + +void cycle_rmsd(traj *Trajectory, alignments *align, int start_pair[2]) { + /** + * routine that cycles over all pairs of frames in a trajectory, calling `optimal_alignment` + * + * Parameters + * ---------- + * + * `Trajectory` : traj object + * + * `align` : alignments object + * + * `start_pair` : pair of structures to begin with + */ + int n; + int i, j; + double **x, **y; + double u[3][3], cm1[3], cm2[3]; + printf("calculating rotation matrices..\n"); + // allocating vectors + x = d2t(Trajectory->n_at, 3); + y = d2t(Trajectory->n_at, 3); + int ref = start_pair[0]; + int mod = start_pair[1]; + int old_ref = ref - 1; + //for (n = 0; n < Trajectory->frames; n++) { + for (n = 0; n < Trajectory->pairs; n++) { + if (ref != old_ref) { + // recalculate com and translate structure + + zero_vec_d(cm1, 3); + for (i = 0; i < Trajectory->n_at; i++) { + for (j = 0; j < 3; j++) { + cm1[j] += Trajectory->traj_coords[ref][i * 3 + j] / Trajectory->n_at; + } + } + // filling structure + for (i = 0; i < Trajectory->n_at; i++) { + for (j = 0; j < 3; j++) { + x[i][j] = Trajectory->traj_coords[ref][i * 3 + j] - cm1[j]; + } + } + old_ref += 1; + } + // second structure + zero_vec_d(cm2, 3); + for (i = 0; i < Trajectory->n_at; i++) { + for (j = 0; j < 3; j++) { + cm2[j] += Trajectory->traj_coords[mod][i * 3 + j] / Trajectory->n_at; + } + } + for (i = 0; i < Trajectory->n_at; i++) { + for (j = 0; j < 3; j++) { + y[i][j] = Trajectory->traj_coords[mod][i * 3 + j] - cm2[j]; + } + } + // Alignment AND RMSD computation + align->rmsd_mat[n] = optimal_alignment(x, y, Trajectory->n_at, u); + align->ref_structs[n] = ref + 1; + align->mod_structs[n] = mod + 1; + + // update idx + if (mod == Trajectory->frames - 1) { + ref += 1; + mod = ref + 1; + } + else { + mod += 1; + } + + } + free_d2t(x); + free_d2t(y); +} + +void read_TrajectoryFile(char *TrajFileName, traj *Trajectory){ + /** + * routine that reads the input xyz coordinate file + * + * Parameters + * ---------- + * + * `TrajFileName` : trajectory filename + * + * `Trajectory` : traj object + */ + + FILE *ft; // trajectory file; + FILE *fe; // declaring error file; + + int rows_i, cols_i, i, p, frame_idx, j, count; + size_t line_buf_size = 0; + + char *token; + char *token_arr[100]; //*string, *line; + char *string = NULL; + char *line; + + /* Opening and check if it exist and if it is empty */ + ft = fopen(TrajFileName, "r"); + printf("Reading Trajectory FILE %s\n", TrajFileName); + + //check_empty_file(ft, TrajFileName); + + rows_i = n_rows(ft); // Number of rows in trajectory file "ft". + fseek(ft, 0, SEEK_SET); + + line = (char *) malloc (200); // Allocate char line with size (lenght) = 200; + // We are sure that the lenght of each line is less than 200 + + /* Initialize the 2D-array Trajectory->traj_coords[][] */ + for(i = 0; i < Trajectory->frames; i++){ + for(j = 0; j < Trajectory->n_at; j++){ + Trajectory->traj_coords[i][3 * j + 0] = 0.0; + Trajectory->traj_coords[i][3 * j + 1] = 0.0; + Trajectory->traj_coords[i][3 * j + 2] = 0.0; + } + } + + /* Checking for empty rows; + * Checking that there are ONLY rows with one column and three columns + * Checking that the rows with one column correspond to an integer number i.e. the number of atoms; + * Checking that the rows with three columns are float corresponding to x,y,z trajectory coordinate. */ + + frame_idx = 0; // Initialize frame index to 0 + p = 0; // Initialize counter "p" to 0 + + for(i=0; i=3*Trajectory->n_at) // p increases from 0 to 3*atomnum i.e. p=[0;3*230) i.e. p = [0;689] + p = 0; + + getline(&string, &line_buf_size, ft); // Reading entire line; + + strcpy(line, string); // Copying "string" in "line"; we need it in case of three columns. + + //if( i != (Trajectory->n_at + 2)*(frame_idx-1) + 1) // Checking for empty rows except for the 2nd row of each frame representing the title + // check_empty_rows(string); // that could also be an empty string + + token = strtok(string, " \t\v\r\n"); // Splitting the string-line in columns separated by spaces, tabs, or \n or \v or \r + + cols_i = columns(token); // Counting the number of columns in each row of file. + + + if(i!=(Trajectory->n_at + 2)*(frame_idx-1) + 1){ // exclude the 2nd row of each frame + + if(cols_i == 1){ + + if(i != (Trajectory->n_at + 2)*frame_idx) { + printf("Error. The %dth frame contains a different number of rows. Each frame must have %d rows\n", frame_idx+1, Trajectory->n_at + 2); + exit(EXIT_FAILURE); + } + else{ + frame_idx++; + if(frame_idx>Trajectory->frames){ + fe = fopen("error.dat", "w"); + fprintf(fe, "Error. The number of trajectory frames is higher than the declared one in parameter file (%d).\nAborting\n", Trajectory->frames); + fclose(fe); + printf("Error. The number of trajectory frames is higher than the declared one in parameter file (%d).\nAborting\n", Trajectory->frames); + exit(EXIT_FAILURE); + } + //check_int_string(string, i, TrajFileName); // Checking that each row is an INTEGER number. + if(atoi(string) != Trajectory->n_at){ + fe = fopen("error.dat","w"); + fprintf(fe,"Error. The number of atoms at %dth row has length (%d) different from atomnum(%d). Aborting\n",\ + i+1,atoi(string),Trajectory->n_at); + printf("Error. The number of atoms at %dth row has length (%d) different from atomnum(%d). Aborting\n",\ + i+1,atoi(string),Trajectory->n_at); + fclose(fe); + exit(EXIT_FAILURE); + } + } + } + + if(cols_i == 2){ + + if(i != (Trajectory->n_at + 2)*frame_idx + 1){ + fe = fopen("error.dat", "w"); + fprintf(fe,"Error. Each row must not contain 2 columns (except the title in the 2nd row of each frame that can contain N columns).\n\ + ONLY 1 or 4 columns are allowed in Trajectory (%dth row has 2 columns)\n", i+1); + + printf("Error. Each row must not contain 2 columns (except the title in the 2nd row of each frame that can contain N columns)\n\ + ONLY 1 or 4 columns are allowed in Trajectory (%dth row has 2 columns.\n", i+1); + fclose(fe); + exit(EXIT_FAILURE); + } + } + + if(cols_i == 3){ + + if(i != (Trajectory->n_at + 2)*frame_idx + 1){ + fe = fopen("error.dat", "w"); + fprintf(fe,"Error. Each row must not contain 3 columns (except the title in the 2nd row of each frame that can contain N columns). \ + ONLY 1 or 4 columns are allowed in Trajectory (%dth row has 3 columns)\n", i+1); + + printf("Error. Each row must not contain 3 columns (except the title in the 2nd row of each frame that can contain N columns) \ + ONLY 1 or 4 columns are allowed in Trajectory (%dth row has 3 columns.\n", i+1); + fclose(fe); + exit(EXIT_FAILURE); + } + } + + if(cols_i == 4){ + if(i != (Trajectory->n_at + 2)*frame_idx){ + token_arr[0] = strtok(line, " \t\v\r\n"); + + count = 0; + while(token_arr[count]){ + count++; + token_arr[count] = strtok(NULL, " \t\v\r\n"); + } + + for (j=1; j <= count-1 ; j++){ + //check_float_string(token_arr[j], i, TrajFileName); // Checking that each row is an FLOAT number. + Trajectory->traj_coords[frame_idx-1][p] = atof(token_arr[j]); // Assigning each float-string to traj_coords[i][j] 2D-array + p++; + } + + } + + else{ + printf("Error. Each row of the frame (except the 1st row containing the number of atoms and the 2nd row containing the title),\n\ + must contain 4 columns corresponding to at_name, x, y, and z coodinates. Check also if there is one extra row in %dth frame\n", + frame_idx); + exit(EXIT_FAILURE); + } + } + + if(cols_i > 4 ){ + if(i != (Trajectory->n_at + 2)*frame_idx){ + fe = fopen("error.dat","w"); + fprintf(fe, "Error. The maximum number of columns allowed is 4. The %dth row of your file contains %d columns\n", i+1, cols_i); + printf("Error. The maximum number of columns allowed is 4. The %dth row of your file contains %d columns\n", i+1, cols_i); + fclose(fe); + exit(EXIT_FAILURE); + } + } + } + + } // END FOR LOOP + + free(string); + //free(token); + free(line); + fclose(ft);// Close trajectory file. + // 1st check: frame_idx must be = frames + if (frame_idx != Trajectory->frames) { + fe = fopen("error.dat", "w"); + fprintf(fe, "Error. Frames completed: %d, while declared frames in parameter file are %d. Trajectory incomplete.\nAborting\n",frame_idx,\ + Trajectory->frames); + fclose(fe); + printf("Error. Frames completed: %d, while declared frames in parameter file are %d. Trajectory incomplete.\nAborting\n",frame_idx,\ + Trajectory->frames); + exit(EXIT_FAILURE); + } + + // final check: frame_idx corresponds with what we expect but the number of row is not the same => It means that + // the frame completed are frame_idx - 1 + else{ + if(rows_i != (Trajectory->n_at + 2)*Trajectory->frames){ + fe = fopen("error.dat", "w"); + fprintf(fe,"Error. Total number of rows: %d, while expected number of rows are %d. Trajectory incomplete.\nAborting\n",rows_i,\ + (Trajectory->n_at + 2)*Trajectory->frames); + fclose(fe); + printf("Error. Total number of rows: %d, while expected number of rows are %d. Trajectory incomplete.\nAborting\n", rows_i,\ + (Trajectory->n_at + 2)*Trajectory->frames); + exit(EXIT_FAILURE); + } + } +} + + +int main(int argc, char *argv[]) { + /** + * main file of the program + */ + time_t seconds; + time_t seconds_ref = time(NULL); + printf("Starting program at %s\n", ctime(&seconds_ref)); + + printf("input trajectory %s\n", argv[1]); + printf("core index is %s\n", argv[2]); + printf("number of pairs is %s\n", argv[3]); + printf("starting pair is %s-%s\n", argv[4], argv[5]); + traj *Trajectory = malloc (sizeof(traj)); + + int npairs = atoi(argv[3]); + int start_pair[2]; + start_pair[0] = atoi(argv[4]); + start_pair[1] = atoi(argv[5]); + + int rec_frames = atoi(argv[6]); + int rec_atomnum = atoi(argv[7]); + + Trajectory->frames = rec_frames; + Trajectory->n_at = rec_atomnum; + Trajectory->traj_coords = d2t(rec_frames, 3 * rec_atomnum); + Trajectory->pairs = npairs; + + printf("frames = %d\n", Trajectory->frames); + printf("overall pairs = %d\n", Trajectory->pairs); + + // read trajectory + read_TrajectoryFile(argv[1], Trajectory); + alignments *align = malloc (sizeof(alignments)); + align->rmsd_mat = d1t(Trajectory->pairs); + align->ref_structs = i1t(Trajectory->pairs); + align->mod_structs = i1t(Trajectory->pairs); + align->coms = d2t(rec_frames, 3); + + char out_filename[100]; + if (argc == 10){ + // if there are arguments 8 to 10, it is the ligand trajectory, frames and atomnum + traj *Ligand_Trajectory = malloc (sizeof(traj)); + printf("loading ligand data\n"); + + int ligand_atomnum = atoi(argv[9]); + Ligand_Trajectory->frames = rec_frames; + Ligand_Trajectory->n_at = ligand_atomnum; + Ligand_Trajectory->traj_coords = d2t(rec_frames, 3 * ligand_atomnum); + read_TrajectoryFile(argv[8], Ligand_Trajectory); + cycle_ilrmsd(Trajectory, Ligand_Trajectory, align, start_pair); + sprintf(out_filename, "ilrmsd_%s.matrix", argv[2]); + } + else{ + cycle_rmsd(Trajectory, align, start_pair); + sprintf(out_filename, "rmsd_%s.matrix", argv[2]); + } + // write the rmsd matrix + int i; + // check if file exists + if (access(out_filename, F_OK) != -1){ + printf("Warning: file %s already exists.\n", out_filename); + } + // write to file + FILE *frmsd; + frmsd = fopen(out_filename, "w"); + for (i = 0; i < Trajectory->pairs; i++){ + fprintf(frmsd, "%d %d %.3lf\n",align->ref_structs[i],align->mod_structs[i],align->rmsd_mat[i]); + } + + seconds = time(NULL); + printf("\nOverall execution time: %ld seconds\n", seconds-seconds_ref); + free_d2t(align->coms); + free_d1t(align->rmsd_mat); + free(align); + free_d2t(Trajectory->traj_coords); + free(Trajectory); + return 0; +} diff --git a/src/haddock/fcc/__init__.py b/src/haddock/fcc/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/src/haddock/fcc/calc_fcc_matrix.py b/src/haddock/fcc/calc_fcc_matrix.py new file mode 100755 index 000000000..c1e7e2749 --- /dev/null +++ b/src/haddock/fcc/calc_fcc_matrix.py @@ -0,0 +1,96 @@ +#!/usr/bin/env python +# -*- coding: UTF-8 -*- + +""" +Calculates a matrix of fraction of common contacts between two or more structures. + +Authors: + RODRIGUES Joao + TRELLET Mikael + MELQUIOND Adrien +""" + + +# Contact Parsing routines +def parse_contact_file(f_list, ignore_chain): + """Parses a list of contact files.""" + + if ignore_chain: + contacts = [ + [int(l[0:5] + l[6:-1]) for l in open(f)] for f in f_list if f.strip() + ] + else: + contacts = [set([int(l) for l in open(f)]) for f in f_list if f.strip()] + + return contacts + + +# FCC Calculation Routine +def calculate_fcc(listA, listB): + """ + Calculates the fraction of common elements between two lists + taking into account chain IDs + """ + + cc = len(listA.intersection(listB)) + cc_v = len(listB.intersection(listA)) + + return (cc, cc_v) + + +def calculate_fcc_nc(listA, listB): + """ + Calculates the fraction of common elements between two lists + not taking into account chain IDs. Much Slower. + """ + + largest, smallest = sorted([listA, listB], key=len) + ncommon = len([ele for ele in largest if ele in smallest]) + return (ncommon, ncommon) + + +# Matrix Calculation + + +def calculate_pairwise_matrix(contacts, ignore_chain): + """Calculates a matrix of pairwise fraction of common contacts (FCC). + Outputs numeric indexes. + + contacts: list_of_unique_pairs_of_residues [set/list] + + Returns pairwise matrix as an iterator, each entry in the form: + FCC(cplx_1/cplx_2) FCC(cplx_2/cplx_1) + """ + + contact_lengths = [] + for c in contacts: + try: + ic = 1.0 / len(c) + except ZeroDivisionError: + ic = 0 + contact_lengths.append(ic) + + if ignore_chain: + calc_fcc = calculate_fcc_nc + else: + calc_fcc = calculate_fcc + + for i in range(len(contacts)): + + for k in range(i + 1, len(contacts)): + cc, cc_v = calc_fcc(contacts[i], contacts[k]) + fcc, fcc_v = cc * contact_lengths[i], cc * contact_lengths[k] + yield (i + 1, k + 1, fcc, fcc_v) + + +def _output_fcc(output, values, f_buffer): + + buf = [] + for i in values: + buf.append(i) + if len(buf) == f_buffer: + output( + "".join(["%s %s %1.3f %1.3f\n" % (i[0], i[1], i[2], i[3]) for i in buf]) + ) + buf = [] + output("".join(["%s %s %1.3f %1.3f\n" % (i[0], i[1], i[2], i[3]) for i in buf])) diff --git a/src/haddock/fcc/cluster_fcc.py b/src/haddock/fcc/cluster_fcc.py new file mode 100755 index 000000000..a5fc5e365 --- /dev/null +++ b/src/haddock/fcc/cluster_fcc.py @@ -0,0 +1,187 @@ +#!/usr/bin/env python +# -*- coding: UTF-8 -*- + +""" +Asymmetric Taylor-Butina Disjoint Clustering Algorithm. + +Authors: + RODRIGUES Joao + TRELLET Mikael + MELQUIOND Adrien +""" + + +class Element(object): + """Defines a 'clusterable' Element""" + + __slots__ = ["name", "cluster", "neighbors"] + + def __init__(self, name): + self.name = name + self.cluster = 0 + self.neighbors = set() + + def add_neighbor(self, neighbor): + """Adds another element to the neighbor list""" + self.neighbors.add(neighbor) + + def assign_cluster(self, clust_id): + """Assigns the Element to Cluster. 0 if unclustered""" + self.cluster = clust_id + + +class Cluster(object): + """Defines a Cluster. A Cluster is created with a name and a center (Element class)""" + + __slots__ = ["name", "center", "members"] + + def __init__(self, name, center): + + self.name = name + self.center = center + + self.members = [] + + self.populate() + + def __len__(self): + return len(self.members) + 1 # +1 Center + + def populate(self): + """ + Populates the Cluster member list through the + neighbor list of its center. + """ + + name = self.name + # Assign center + ctr = self.center + ctr.assign_cluster(name) + + mlist = self.members + # Assign members + ctr_nlist = (n for n in ctr.neighbors if not n.cluster) + for e in ctr_nlist: + mlist.append(e) + e.assign_cluster(name) + + def add_member(self, element): + """ + Adds one single element to the cluster. + """ + l = self.members + l.append(element) + element.assign_cluster(self.name) + + +def read_matrix(path, cutoff, strictness): + """ + Reads in a four column matrix (1 2 0.123 0.456\n) + and creates an dictionary of Elements. + + The strictness factor is a that multiplies by the cutoff + to produce a new cutoff for the second half of the matrix. Used to + allow some variability while keeping very small interfaces from clustering + with anything remotely similar. + """ + + cutoff = float(cutoff) + partner_cutoff = float(cutoff) * float(strictness) + + elements = {} + + f = open(path, "r") + for line in f: + ref, mobi, dRM, dMR = line.split() + ref = int(ref) + mobi = int(mobi) + dRM = float(dRM) + dMR = float(dMR) + + # Create or Retrieve Elements + if ref not in elements: + r = Element(ref) + elements[ref] = r + else: + r = elements[ref] + + if mobi not in elements: + m = Element(mobi) + elements[mobi] = m + else: + m = elements[mobi] + + # Assign neighbors + if dRM >= cutoff and dMR >= partner_cutoff: + r.add_neighbor(m) + if dMR >= cutoff and dRM >= partner_cutoff: + m.add_neighbor(r) + + f.close() + + return elements + + +def remove_true_singletons(element_pool): + """Removes from the pool elements without any neighbor""" + + ep = element_pool + + ts = set([e for e in ep if not ep[e].neighbors]) + + # Remove ts from everybody's neighbor list + ts_e = set(ep[e] for e in ts) + for e in element_pool: + ep[e].neighbors = ep[e].neighbors.difference(ts_e) + + # Remove ts from pool + for e in ts: + del ep[e] + + return (ts, ep) + + +def cluster_elements(element_pool, threshold): + """ + Groups Elements within a given threshold + together in the same cluster. + """ + + clusters = [] + threshold -= 1 # Account for center + ep = element_pool + cn = 1 # Cluster Number + while 1: + # Clusterable elements + ce = [e for e in ep if not ep[e].cluster] + if not ce: # No more elements to cluster + break + + # Select Cluster Center + # Element with largest neighbor list + ctr_nlist, ctr = sorted( + [(len([se for se in ep[e].neighbors if not se.cluster]), e) for e in ce] + )[-1] + + # Cluster until length of remaining elements lists are above threshold + if ctr_nlist < threshold: + break + + # Create Cluster + c = Cluster(cn, ep[ctr]) + cn += 1 + clusters.append(c) + + return (ep, clusters) + + +def output_clusters(handle, clusters): + """Outputs the cluster name, center, and members.""" + + write = handle.write + + for c in clusters: + write("Cluster %s -> %s " % (c.name, c.center.name)) + for m in sorted(c.members, key=lambda k: k.name): + write("%s " % m.name) + write("\n") diff --git a/src/haddock/gear/greetings.py b/src/haddock/gear/greetings.py index 10db12a46..df3f2b2d3 100644 --- a/src/haddock/gear/greetings.py +++ b/src/haddock/gear/greetings.py @@ -5,7 +5,7 @@ import sys from datetime import datetime from functools import partial -from typing import Sequence, Callable +from typing import Callable, Sequence from haddock import contact_us, version @@ -32,7 +32,16 @@ "GitHub issues": "https://github.com/haddocking/haddock3/issues", "BioExcel feedback": "https://www.bonvinlab.org/feedback", "BioExcel survey": "https://bioexcel.eu/bioexcel-survey-2024/", - } +} + + +DISCLAIMER = ( + "!! Some of the HADDOCK3 components use CNS (Crystallographic and NMR System)" + f" which is free of use for non-profit applications. !!{os.linesep}" + "!! For commercial use it is your own responsibility" + f" to have a proper license. !!{os.linesep}" + "!! For details refer to the DISCLAIMER file in the HADDOCK3 repository. !!" +) def get_initial_greeting() -> str: @@ -47,6 +56,8 @@ def get_initial_greeting() -> str: f"""# #{os.linesep}""" f"""##############################################{os.linesep}""" f"""{os.linesep}""" + f"""{DISCLAIMER}{os.linesep}""" + f"""{os.linesep}""" f"""Starting HADDOCK {version} on {now}{os.linesep}""" f"""{os.linesep}""" f"""Python {python_version}{os.linesep}""" @@ -81,7 +92,7 @@ def get_goodbye_help() -> str: def gen_feedback_messages(print_function: Callable) -> None: """Print list of feedbacks urls. - + Parameters ---------- print_function : Callable @@ -92,8 +103,8 @@ def gen_feedback_messages(print_function: Callable) -> None: ( "Your feedback matters in Haddock3!" " Share your experience and help us grow:" - ) ) + ) for name, url in feedback_urls.items(): print_function(f"{name}: {url}") diff --git a/src/haddock/modules/analysis/clustfcc/__init__.py b/src/haddock/modules/analysis/clustfcc/__init__.py index a97819e5b..952892866 100644 --- a/src/haddock/modules/analysis/clustfcc/__init__.py +++ b/src/haddock/modules/analysis/clustfcc/__init__.py @@ -7,12 +7,18 @@ For more details please check *Rodrigues, J. P. et al. Proteins: Struct. Funct. Bioinform. 80, 1810–1817 (2012)* """ # noqa: E501 +import importlib.resources import os from pathlib import Path from haddock import FCC_path, log -from haddock.core.defaults import MODULE_DEFAULT_YAML +from haddock.core.defaults import ( + BINARY_DIR, + CONTACT_FCC_EXEC, + MODULE_DEFAULT_YAML, + ) from haddock.core.typing import Union +from haddock.fcc import calc_fcc_matrix, cluster_fcc from haddock.libs.libclust import ( add_cluster_info, get_cluster_matrix_plot_clt_dt, @@ -56,16 +62,10 @@ def __init__( @classmethod def confirm_installation(cls) -> None: """Confirm if FCC is installed and available.""" - dcfg = read_from_yaml_config(DEFAULT_CONFIG) - exec_path = Path(FCC_path, dcfg["executable"]) - - if not os.access(exec_path, mode=os.F_OK): - raise Exception(f"Required {str(exec_path)} file does not exist.") + # The FCC binary can be either in the default binary path or in the - if not os.access(exec_path, mode=os.X_OK): - raise Exception(f"Required {str(exec_path)} file is not executable") - - return + dcfg = read_from_yaml_config(DEFAULT_CONFIG) + dcfg["executable"] = CONTACT_FCC_EXEC def _run(self) -> None: """Execute module.""" @@ -83,7 +83,7 @@ def _run(self) -> None: job = JobInputFirst( pdb_f, contact_f, - contact_executable, + CONTACT_FCC_EXEC, self.params["contact_distance_cutoff"], ) contact_jobs.append(job) diff --git a/src/haddock/modules/analysis/ilrmsdmatrix/__init__.py b/src/haddock/modules/analysis/ilrmsdmatrix/__init__.py index e6595869c..ce1e64595 100644 --- a/src/haddock/modules/analysis/ilrmsdmatrix/__init__.py +++ b/src/haddock/modules/analysis/ilrmsdmatrix/__init__.py @@ -15,13 +15,14 @@ chains, as no alignment is performed. The user must ensure that the numbering is coherent. """ + import os from pathlib import Path import numpy as np -from haddock import log, RMSD_path -from haddock.core.defaults import MODULE_DEFAULT_YAML +from haddock import RMSD_path, log +from haddock.core.defaults import FAST_RMSDMATRIX_EXEC, MODULE_DEFAULT_YAML from haddock.libs.libalign import ( check_chains, check_common_atoms, @@ -32,32 +33,24 @@ from haddock.libs.libontology import ModuleIO, RMSDFile from haddock.libs.libparallel import get_index_list from haddock.libs.libutil import parse_ncores -from haddock.modules import BaseHaddockModule -from haddock.modules import get_engine +from haddock.modules import BaseHaddockModule, get_engine from haddock.modules.analysis import get_analysis_exec_mode -from haddock.modules.analysis.ilrmsdmatrix.ilrmsd import ( - Contact, - ContactJob, - ) -from haddock.modules.analysis.rmsdmatrix import rmsd_dispatcher, RMSDJob -from haddock.modules.analysis.rmsdmatrix.rmsd import ( - XYZWriter, - XYZWriterJob, - ) +from haddock.modules.analysis.ilrmsdmatrix.ilrmsd import Contact, ContactJob +from haddock.modules.analysis.rmsdmatrix import RMSDJob, rmsd_dispatcher +from haddock.modules.analysis.rmsdmatrix.rmsd import XYZWriter, XYZWriterJob RECIPE_PATH = Path(__file__).resolve().parent DEFAULT_CONFIG = Path(RECIPE_PATH, MODULE_DEFAULT_YAML) -EXEC_PATH = Path(RMSD_path, "src/fast-rmsdmatrix") +EXEC_PATH = FAST_RMSDMATRIX_EXEC class HaddockModule(BaseHaddockModule): """HADDOCK3 module for clustering with RMSD.""" name = RECIPE_PATH.name - - def __init__(self, order, path, *ignore, init_params=DEFAULT_CONFIG, - **everything): + + def __init__(self, order, path, *ignore, init_params=DEFAULT_CONFIG, **everything): super().__init__(order, path, init_params) @classmethod @@ -67,12 +60,10 @@ def confirm_installation(cls) -> None: raise Exception( f"Required {str(EXEC_PATH)} file does not exist.{os.linesep}" "Old HADDOCK3 installation? Please follow the new installation instructions at https://github.com/haddocking/haddock3/blob/main/docs/INSTALL.md" # noqa : E501 - ) + ) if not os.access(EXEC_PATH, mode=os.X_OK): - raise Exception( - f"Required {str(EXEC_PATH)} file is not executable" - ) + raise Exception(f"Required {str(EXEC_PATH)} file is not executable") return @@ -82,7 +73,7 @@ def _rearrange_output(output_name, path, ncores): output_fname = Path(path, output_name) log.info(f"rearranging output files into {output_fname}") # Combine files - with open(output_fname, 'w') as out_file: + with open(output_fname, "w") as out_file: for core in range(ncores): tmp_file = Path(path, "ilrmsd_" + str(core) + ".matrix") with open(tmp_file) as infile: @@ -95,7 +86,7 @@ def _rearrange_output(output_name, path, ncores): @staticmethod def _rearrange_contact_output(output_name, path, ncores): """Combine different contact outputs in a single file. - + Parameters ---------- output_name : str @@ -104,7 +95,7 @@ def _rearrange_contact_output(output_name, path, ncores): Path to the output file. ncores : int Number of cores used in the calculation. - + Returns ------- res_resdic_npu : dict @@ -124,7 +115,7 @@ def _rearrange_contact_output(output_name, path, ncores): else: res_resdic[ch].append(resids) tmp_file.unlink() - + # Unique residues npu_resdic = {} for ch in res_resdic.keys(): @@ -134,7 +125,7 @@ def _rearrange_contact_output(output_name, path, ncores): log.info(f"Overall interface residues: {npu_resdic}") # Write to file - with open(output_fname, 'w') as out_file: + with open(output_fname, "w") as out_file: for chain in npu_resdic.keys(): out_file.write(f"{chain} ") out_file.write(" ".join([str(el) for el in npu_resdic[chain]])) @@ -152,36 +143,32 @@ def _run(self) -> None: self.finish_with_error(_e) # Get the models generated in previous step - models = self.previous_io.retrieve_models( - individualize=True - ) + models = self.previous_io.retrieve_models(individualize=True) # Parallelisation : optimal dispatching of models nmodels = len(models) - ncores = parse_ncores(n=self.params['ncores'], njobs=nmodels) - - if nmodels > self.params['max_models']: + ncores = parse_ncores(n=self.params["ncores"], njobs=nmodels) + + if nmodels > self.params["max_models"]: # too many input models : ilRMSD matrix would be too big => Abort! raise Exception( f"Too many models ({nmodels} > {self.params['max_models']}) " "for ilRMSD matrix calculation" - ) + ) # find the existing chains model_atoms = get_atoms(models[0]) mod_coord_dic, _ = load_coords( - models[0], - model_atoms, + models[0], + model_atoms, ) obs_chains = np.unique([el[0] for el in mod_coord_dic.keys()]) obs_chains = list(obs_chains) log.info(f"Observed chains: {obs_chains}") # assigning the chains to the receptor and ligand r_chain, l_chains = check_chains( - obs_chains, - self.params["receptor_chain"], - self.params["ligand_chains"] - ) + obs_chains, self.params["receptor_chain"], self.params["ligand_chains"] + ) log.info(f"Receptor chain: {r_chain}") log.info(f"Ligand chains: {l_chains}") self.params["receptor_chain"] = r_chain @@ -194,13 +181,13 @@ def _run(self) -> None: for core in range(ncores): output_name = "interface_contacts_" + str(core) + ".con" contact_obj = Contact( - model_list=models[index_list[core]:index_list[core + 1]], + model_list=models[index_list[core] : index_list[core + 1]], output_name=output_name, core=core, path=Path("."), contact_distance_cutoff=self.params["contact_distance_cutoff"], params=self.params, - ) + ) # running now the ContactJob job_f = Path(output_name) # init RMSDJob @@ -208,7 +195,7 @@ def _run(self) -> None: job_f, self.params, contact_obj, - ) + ) contact_jobs.append(job) exec_mode = get_analysis_exec_mode(self.params["mode"]) @@ -222,23 +209,22 @@ def _run(self) -> None: for job in contact_jobs: if not job.output.exists(): not_found.append(job.output.name) - wrn = f'Contacts were not calculated for {job.output.name}' + wrn = f"Contacts were not calculated for {job.output.name}" log.warning(wrn) else: contact_file_l.append(str(job.output)) - + if not_found: # Not all contacts were calculated, cannot proceed - self.finish_with_error("Several contact files were not generated:" - f" {not_found}") - + self.finish_with_error( + "Several contact files were not generated:" f" {not_found}" + ) + # Post-processing : single file output_name = "receptor_contacts.con" res_resdic = self._rearrange_contact_output( - output_name, - path=contact_obj.path, - ncores=ncores - ) + output_name, path=contact_obj.path, ncores=ncores + ) # if the receptor chain in res_resdic is empty, then the receptor has made no contacts and # the ilrmsd matrix cannot be calculated. This probably means that single chains structures @@ -247,97 +233,83 @@ def _run(self) -> None: _msg = f"No contacts found for receptor chain {r_chain}. Impossible to calculate ilRMSD matrix." _msg += " Please check your input and make sure that there are at least two chains in contact." self.finish_with_error(_msg) - + rec_traj_filename = Path("traj_rec.xyz") lig_traj_filename = Path("traj_lig.xyz") - res_resdic_rec = { - k: res_resdic[k] - for k in res_resdic - if k[0] == r_chain - } + res_resdic_rec = {k: res_resdic[k] for k in res_resdic if k[0] == r_chain} # ligand_chains is a list of chains - res_resdic_lig = { - k: res_resdic[k] - for k in l_chains - } + res_resdic_lig = {k: res_resdic[k] for k in l_chains} - log.info(f"Check common atoms for receptor (chain {list(res_resdic_rec.keys())})") + log.info( + f"Check common atoms for receptor (chain {list(res_resdic_rec.keys())})" + ) n_atoms_rec, common_keys_rec = check_common_atoms( models, res_resdic_rec, self.params["allatoms"], - self.params["atom_similarity"] - ) - - log.info(f"Check common atoms for ligand (chains {list(res_resdic_lig.keys())})") + self.params["atom_similarity"], + ) + + log.info( + f"Check common atoms for ligand (chains {list(res_resdic_lig.keys())})" + ) n_atoms_lig, common_keys_lig = check_common_atoms( models, res_resdic_lig, self.params["allatoms"], - self.params["atom_similarity"] - ) - + self.params["atom_similarity"], + ) + xyzwriter_jobs: list[XYZWriterJob] = [] for core in range(ncores): output_name_rec = Path("traj_rec_" + str(core) + ".xyz") # init XYZWriter xyzwriter_obj_rec = XYZWriter( - model_list=models[index_list[core]:index_list[core + 1]], + model_list=models[index_list[core] : index_list[core + 1]], output_name=output_name_rec, core=core, n_atoms=n_atoms_rec, common_keys=common_keys_rec, filter_resdic=res_resdic_rec, allatoms=self.params["allatoms"], - ) + ) # job_rec job_rec = XYZWriterJob( xyzwriter_obj_rec, - ) - + ) + xyzwriter_jobs.append(job_rec) output_name_lig = Path("traj_lig_" + str(core) + ".xyz") # init XYZWriter xyzwriter_obj_lig = XYZWriter( - model_list=models[index_list[core]:index_list[core + 1]], + model_list=models[index_list[core] : index_list[core + 1]], output_name=output_name_lig, core=core, n_atoms=n_atoms_lig, common_keys=common_keys_lig, filter_resdic=res_resdic_lig, allatoms=self.params["allatoms"], - ) + ) # job_lig job_lig = XYZWriterJob( xyzwriter_obj_lig, - ) + ) xyzwriter_jobs.append(job_lig) - + # run jobs engine = Engine(xyzwriter_jobs) engine.run() - rearrange_xyz_files( - rec_traj_filename, - path=Path("."), - ncores=ncores - ) - rearrange_xyz_files( - lig_traj_filename, - path=Path("."), - ncores=ncores - ) + rearrange_xyz_files(rec_traj_filename, path=Path("."), ncores=ncores) + rearrange_xyz_files(lig_traj_filename, path=Path("."), ncores=ncores) # Parallelisation : optimal dispatching of models tot_npairs = nmodels * (nmodels - 1) // 2 - ncores = parse_ncores(n=self.params['ncores'], njobs=tot_npairs) + ncores = parse_ncores(n=self.params["ncores"], njobs=tot_npairs) log.info(f"total number of pairs {tot_npairs}") - npairs, ref_structs, mod_structs = rmsd_dispatcher( - nmodels, - tot_npairs, - ncores) + npairs, ref_structs, mod_structs = rmsd_dispatcher(nmodels, tot_npairs, ncores) # Calculate the rmsd for each set of models ilrmsd_jobs: list[RMSDJob] = [] @@ -358,7 +330,7 @@ def _run(self) -> None: n_atoms_rec, lig_traj_filename, n_atoms_lig, - ) + ) ilrmsd_jobs.append(job) ilrmsd_engine = Engine(ilrmsd_jobs) @@ -371,23 +343,18 @@ def _run(self) -> None: # NOTE: If there is no output, most likely the RMSD calculation # timed out not_found.append(j.output.name) - wrn = f'ilRMSD results were not calculated for {j.output.name}' + wrn = f"ilRMSD results were not calculated for {j.output.name}" log.warning(wrn) else: ilrmsd_file_l.append(str(j.output)) if not_found: # Not all distances were calculated, cannot create the full matrix - self.finish_with_error("Several files were not generated:" - f" {not_found}") + self.finish_with_error("Several files were not generated:" f" {not_found}") # Post-processing : single file output_name = "ilrmsd.matrix" - self._rearrange_output( - output_name, - path=Path("."), - ncores=ncores - ) + self._rearrange_output(output_name, path=Path("."), ncores=ncores) # Delete the trajectory files if rec_traj_filename.exists(): os.unlink(rec_traj_filename) @@ -399,9 +366,6 @@ def _run(self) -> None: self.export_io_models() # Sending matrix path to the next step of the workflow matrix_io = ModuleIO() - ilrmsd_matrix_file = RMSDFile( - output_name, - npairs=tot_npairs - ) + ilrmsd_matrix_file = RMSDFile(output_name, npairs=tot_npairs) matrix_io.add(ilrmsd_matrix_file) matrix_io.save(filename="rmsd_matrix.json") diff --git a/src/haddock/modules/analysis/rmsdmatrix/__init__.py b/src/haddock/modules/analysis/rmsdmatrix/__init__.py index 4d8cc5fa5..07e4047ac 100644 --- a/src/haddock/modules/analysis/rmsdmatrix/__init__.py +++ b/src/haddock/modules/analysis/rmsdmatrix/__init__.py @@ -25,34 +25,34 @@ thus telling the module to consider residues from 1 to 4 of chain A and from 2 to 4 of chain B for the alignment and RMSD calculation. """ + import contextlib -from pathlib import Path import os +from pathlib import Path -from haddock import log, RMSD_path -from haddock.core.defaults import MODULE_DEFAULT_YAML +from haddock import RMSD_path, log +from haddock.core.defaults import FAST_RMSDMATRIX_EXEC, MODULE_DEFAULT_YAML from haddock.core.typing import Any, AtomsDict, FilePath -from haddock.libs.libalign import rearrange_xyz_files, check_common_atoms +from haddock.libs.libalign import check_common_atoms, rearrange_xyz_files from haddock.libs.libontology import ModuleIO, RMSDFile from haddock.libs.libparallel import get_index_list from haddock.libs.libutil import parse_ncores -from haddock.modules import BaseHaddockModule -from haddock.modules import get_engine +from haddock.modules import BaseHaddockModule, get_engine from haddock.modules.analysis import ( confirm_resdic_chainid_length, get_analysis_exec_mode, ) from haddock.modules.analysis.rmsdmatrix.rmsd import ( RMSDJob, - rmsd_dispatcher, XYZWriter, - XYZWriterJob + XYZWriterJob, + rmsd_dispatcher, ) RECIPE_PATH = Path(__file__).resolve().parent DEFAULT_CONFIG = Path(RECIPE_PATH, MODULE_DEFAULT_YAML) -EXEC_PATH = Path(RMSD_path, "src/fast-rmsdmatrix") +EXEC_PATH = FAST_RMSDMATRIX_EXEC class HaddockModule(BaseHaddockModule): @@ -60,10 +60,9 @@ class HaddockModule(BaseHaddockModule): name = RECIPE_PATH.name - def __init__(self, - order: int, - path: Path, - initial_params: FilePath = DEFAULT_CONFIG) -> None: + def __init__( + self, order: int, path: Path, initial_params: FilePath = DEFAULT_CONFIG + ) -> None: super().__init__(order, path, initial_params) @classmethod @@ -71,21 +70,24 @@ def confirm_installation(cls) -> None: """Confirm if fast-rmsdmatrix is installed and available.""" if not os.access(EXEC_PATH, mode=os.F_OK): - raise Exception(f"Required {str(EXEC_PATH)} file does not exist.{os.linesep}" - "Old HADDOCK3 installation? Please follow the new installation instructions at https://github.com/haddocking/haddock3/blob/main/docs/INSTALL.md") + raise Exception( + f"Required {str(EXEC_PATH)} file does not exist.{os.linesep}" + "Old HADDOCK3 installation? Please follow the new installation instructions at https://github.com/haddocking/haddock3/blob/main/docs/INSTALL.md" + ) if not os.access(EXEC_PATH, mode=os.X_OK): raise Exception(f"Required {str(EXEC_PATH)} file is not executable") return - def _rearrange_output(self, output_name: FilePath, path: FilePath, - ncores: int) -> None: + def _rearrange_output( + self, output_name: FilePath, path: FilePath, ncores: int + ) -> None: """Combine different rmsd outputs in a single file.""" output_fname = Path(path, output_name) self.log(f"rearranging output files into {output_fname}") # Combine files - with open(output_fname, 'w') as out_file: + with open(output_fname, "w") as out_file: for core in range(ncores): tmp_file = Path(path, "rmsd_" + str(core) + ".matrix") with open(tmp_file) as infile: @@ -111,72 +113,65 @@ def _run(self) -> None: self.finish_with_error(_e) # Get the models generated in previous step - models = self.previous_io.retrieve_models( - individualize=True - ) + models = self.previous_io.retrieve_models(individualize=True) nmodels = len(models) if nmodels > self.params["max_models"]: # too many input models : RMSD matrix would be too big => Abort! raise Exception("Too many models for RMSD matrix calculation") - + # index_list for the jobs with linear scaling - ncores = parse_ncores(n=self.params['ncores'], njobs=len(models)) + ncores = parse_ncores(n=self.params["ncores"], njobs=len(models)) index_list = get_index_list(nmodels, ncores) - + traj_filename = Path("traj.xyz") filter_resdic = { - key[-1]: value for key, value - in self.params.items() + key[-1]: value + for key, value in self.params.items() if key.startswith("resdic") - } - + } + # check common atoms - n_atoms, common_keys = check_common_atoms(models, - filter_resdic, - self.params["allatoms"], - self.params["atom_similarity"]) - + n_atoms, common_keys = check_common_atoms( + models, + filter_resdic, + self.params["allatoms"], + self.params["atom_similarity"], + ) + xyzwriter_jobs: list[XYZWriterJob] = [] for core in range(ncores): output_name = Path("traj_" + str(core) + ".xyz") # init RMSDJobFast xyzwriter_obj = XYZWriter( - model_list=models[index_list[core]:index_list[core + 1]], + model_list=models[index_list[core] : index_list[core + 1]], output_name=output_name, core=core, n_atoms=n_atoms, common_keys=common_keys, filter_resdic=filter_resdic, allatoms=self.params["allatoms"], - ) - #job_f = output_name + ) + # job_f = output_name job = XYZWriterJob( xyzwriter_obj, - ) + ) xyzwriter_jobs.append(job) - + # run jobs exec_mode = get_analysis_exec_mode(self.params["mode"]) Engine = get_engine(exec_mode, self.params) engine = Engine(xyzwriter_jobs) engine.run() - rearrange_xyz_files( - traj_filename, - path=Path("."), - ncores=ncores - ) - + rearrange_xyz_files(traj_filename, path=Path("."), ncores=ncores) + # Parallelisation : optimal dispatching of models tot_npairs = nmodels * (nmodels - 1) // 2 log.info(f"total number of pairs {tot_npairs}") - ncores = parse_ncores(n=self.params['ncores'], njobs=tot_npairs) - npairs, ref_structs, mod_structs = rmsd_dispatcher( - nmodels, - tot_npairs, - ncores) + ncores = parse_ncores(n=self.params["ncores"], njobs=tot_npairs) + npairs, ref_structs, mod_structs = rmsd_dispatcher(nmodels, tot_npairs, ncores) # Calculate the rmsd for each set of models rmsd_jobs: list[RMSDJob] = [] @@ -194,9 +189,9 @@ def _run(self) -> None: mod_structs[core], len(models), n_atoms, - ) + ) rmsd_jobs.append(job) - + engine = Engine(rmsd_jobs) engine.run() @@ -207,23 +202,18 @@ def _run(self) -> None: # NOTE: If there is no output, most likely the RMSD calculation # timed out not_found.append(job.output.name) - wrn = f'Rmsd results were not calculated for {job.output.name}' + wrn = f"Rmsd results were not calculated for {job.output.name}" log.warning(wrn) else: rmsd_file_l.append(str(job.output)) if not_found: # Not all distances were calculated, cannot create the full matrix - self.finish_with_error("Several files were not generated:" - f" {not_found}") + self.finish_with_error("Several files were not generated:" f" {not_found}") # Post-processing : single file final_output_name = "rmsd.matrix" - self._rearrange_output( - final_output_name, - path=Path("."), - ncores=ncores - ) + self._rearrange_output(final_output_name, path=Path("."), ncores=ncores) # Delete the trajectory file if traj_filename.exists(): os.unlink(traj_filename) @@ -233,9 +223,6 @@ def _run(self) -> None: self.export_io_models() # Sending matrix path to the next step of the workflow matrix_io = ModuleIO() - rmsd_matrix_file = RMSDFile( - final_output_name, - npairs=tot_npairs - ) + rmsd_matrix_file = RMSDFile(final_output_name, npairs=tot_npairs) matrix_io.add(rmsd_matrix_file) matrix_io.save(filename="rmsd_matrix.json") diff --git a/src/haddock/modules/topology/topoaa/__init__.py b/src/haddock/modules/topology/topoaa/__init__.py index b8b5ed111..29f556b3d 100644 --- a/src/haddock/modules/topology/topoaa/__init__.py +++ b/src/haddock/modules/topology/topoaa/__init__.py @@ -29,7 +29,7 @@ from functools import partial from pathlib import Path -from haddock.core.defaults import MODULE_DEFAULT_YAML +from haddock.core.defaults import MODULE_DEFAULT_YAML, cns_exec from haddock.core.typing import FilePath, Optional, ParamDict, ParamMap, Union from haddock.libs import libpdb from haddock.libs.libcns import ( @@ -37,7 +37,7 @@ load_workflow_params, prepare_output, prepare_single_input, -) + ) from haddock.libs.libontology import Format, PDBFile, TopologyFile from haddock.libs.libstructure import make_molecules from haddock.libs.libsubprocess import CNSJob @@ -262,13 +262,12 @@ def _run(self) -> None: # Add new job to the pool output_filename = Path(f"{model.stem}.{Format.CNS_OUTPUT}") err_fname = f"{model.stem}.cnserr" - job = CNSJob( topoaa_input, output_filename, err_fname, envvars=self.envvars, - cns_exec=self.params["cns_exec"], + cns_exec=cns_exec, ) jobs.append(job)