diff --git a/.github/PULL_REQUEST_TEMPLATE/release_template.md b/.github/PULL_REQUEST_TEMPLATE/release_template.md index 7b3585114..1c5020590 100644 --- a/.github/PULL_REQUEST_TEMPLATE/release_template.md +++ b/.github/PULL_REQUEST_TEMPLATE/release_template.md @@ -9,6 +9,7 @@ Make the PR: * [ ] Verify that`docs/CHANGELOG.md` looks correct. * [ ] Make the PR and verify that CI/CD passes. * [ ] Merge the PR into `main`. +* [ ] Make a PR into the [example notebooks repository](https://github.com/OpenFreeEnergy/ExampleNotebooks) to update the version used in `showcase/openfe_showcase.ipynb` and `.binder/environment.yml` After Merging the PR [follow this guide](https://github.com/OpenFreeEnergy/openfe/wiki/How-to-create-a-new-release) diff --git a/.github/workflows/change.yaml b/.github/workflows/change.yaml new file mode 100644 index 000000000..acc25733a --- /dev/null +++ b/.github/workflows/change.yaml @@ -0,0 +1,41 @@ +name: Check for API breaks + +on: + pull_request_target: + branches: + - main + +jobs: + check: + runs-on: ubuntu-latest + permissions: + pull-requests: write + + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: 0 + + - uses: actions/setup-python@v5 + with: + python-version: "3.12" + + - name: Check for API breaks + continue-on-error: true + id: check + run: | + pip install griffe + griffe check "openfe" --verbose + + - name: Post Comment on Failure + if: steps.check.outcome == 'failure' + uses: actions/github-script@v7 + with: + script: | + const prNumber = context.payload.pull_request.number; + github.rest.issues.createComment({ + owner: context.repo.owner, + repo: context.repo.repo, + issue_number: prNumber, + body: '🚨 API breaking changes detected! 🚨' + }); diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index d7bd615b6..2a54bd2e7 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -30,7 +30,7 @@ defaults: jobs: tests: runs-on: ${{ matrix.os }} - name: "💻-${{matrix.os }} 🐍-${{ matrix.python-version }} 🗃️${{ matrix.pydantic-version }} oechem: ${{ matrix.openeye }}" + name: "💻-${{matrix.os }} 🐍-${{ matrix.python-version }} 🗃️${{ matrix.pydantic-version }} oechem-${{ matrix.openeye }} gufe-${{ matrix.gufe-version }}" strategy: fail-fast: false matrix: @@ -41,22 +41,34 @@ jobs: - "3.11" - "3.12" openeye: ["no"] + gufe-version: ['a9b5982471eb3644ca8c4423c7001e6309a3f544'] include: - - os: "macos-12" + - os: "macos-latest" python-version: "3.12" pydantic-version: ">1" + gufe-version: 'a9b5982471eb3644ca8c4423c7001e6309a3f544' - os: "ubuntu-latest" python-version: "3.11" pydantic-version: "<2" + gufe-version: 'a9b5982471eb3644ca8c4423c7001e6309a3f544' - os: "ubuntu-latest" python-version: "3.11" pydantic-version: ">1" openeye: "yes" + gufe-version: 'a9b5982471eb3644ca8c4423c7001e6309a3f544' - os: "macos-latest" python-version: "3.12" pydantic-version: ">1" omff-version: ">0.13" openeye: ["no"] + gufe-version: 'a9b5982471eb3644ca8c4423c7001e6309a3f544' + + # temporary test against gufe main while we're pinned to a9b5 + - os: "ubuntu-latest" + python-version: "3.11" + pydantic-version: ">1" + openeye: "yes" + gufe-version: 'main' env: OE_LICENSE: ${{ github.workspace }}/oe_license.txt @@ -96,8 +108,9 @@ jobs: micromamba install -c openeye openeye-toolkits python -c "import openeye; assert openeye.oechem.OEChemIsLicensed(), 'oechem license check failed!'" - - name: "Install GUFE from main@HEAD" - run: python -m pip install --no-deps git+https://github.com/OpenFreeEnergy/gufe@main + - name: "Install GUFE at {{ matrix.gufe-version }}" + if: ${{ matrix.gufe-version != 'main' }} + run: python -m pip install --no-deps git+https://github.com/OpenFreeEnergy/gufe@${{ matrix.gufe-version }} - name: "Install" run: python -m pip install --no-deps -e . diff --git a/.github/workflows/conda_cron.yaml b/.github/workflows/conda_cron.yaml index 0aa542198..af4eaa535 100644 --- a/.github/workflows/conda_cron.yaml +++ b/.github/workflows/conda_cron.yaml @@ -20,15 +20,11 @@ jobs: strategy: fail-fast: false matrix: - os: ['ubuntu-latest', 'macos-latest', 'macos-12'] + os: ['ubuntu-latest', 'macos-latest'] python-version: - - "3.9" - "3.10" - "3.11" - "3.12" - exclude: - - os: 'macos-latest' - python-version: '3.9' steps: - name: Setup Micromamba diff --git a/.github/workflows/installer.yaml b/.github/workflows/installer.yaml index 4fac5c433..bbe0dbfed 100644 --- a/.github/workflows/installer.yaml +++ b/.github/workflows/installer.yaml @@ -14,7 +14,7 @@ jobs: strategy: fail-fast: false matrix: - os: [macos-14, macos-12, ubuntu-latest] + os: [macos-latest, ubuntu-latest] steps: - name: Checkout Code diff --git a/news/986_input_validation.rst b/news/986_input_validation.rst new file mode 100644 index 000000000..adeed8591 --- /dev/null +++ b/news/986_input_validation.rst @@ -0,0 +1,23 @@ +**Added:** + +* + +**Changed:** + +* ``openfe quickrun`` now fails up-front when the user-supplied output file (``-o``) already exists or has a non-existent parent directory. + +**Deprecated:** + +* + +**Removed:** + +* + +**Fixed:** + +* + +**Security:** + +* diff --git a/news/compute_selection_fixes.rst b/news/compute_selection_fixes.rst new file mode 100644 index 000000000..6ab5f6c7c --- /dev/null +++ b/news/compute_selection_fixes.rst @@ -0,0 +1,26 @@ +**Added:** + +* OpenMMEngineSettings now has a `gpu_device_index` attribute + allowing users to pass through a list of ints to select the + GPU devices to run their simulations on. + +**Changed:** + +* `openfe.protocols.openmm_rfe._rfe_utils.compute` has been moved + to `openfe.protocols.openmm_utils.omm_compute`. + +**Deprecated:** + +* + +**Removed:** + +* + +**Fixed:** + +* OpenMM CPU vacuum calculations now enforce the use of a single CPU to avoid large performance losses. + +**Security:** + +* diff --git a/news/no_more_macos12.rst b/news/no_more_macos12.rst new file mode 100644 index 000000000..88e2dc8bf --- /dev/null +++ b/news/no_more_macos12.rst @@ -0,0 +1,23 @@ +**Added:** + +* + +**Changed:** + +* + +**Deprecated:** + +* + +**Removed:** + +* openfe is no longer tested against macos-12. macos support is, for now, limited to osx-arm64 (macos-14+). + +**Fixed:** + +* + +**Security:** + +* diff --git a/openfe/analysis/plotting.py b/openfe/analysis/plotting.py index da880dc85..c7f3aad8f 100644 --- a/openfe/analysis/plotting.py +++ b/openfe/analysis/plotting.py @@ -33,7 +33,7 @@ def plot_lambda_transition_matrix(matrix: npt.NDArray) -> Axes: Notes ----- Borrowed from `alchemlyb `_ - which itself borrows from `alchemical-analysis `_. + which itself borrows from `alchemical-analysis `_. """ num_states = len(matrix) diff --git a/openfe/protocols/openmm_afe/base.py b/openfe/protocols/openmm_afe/base.py index 4e201d220..633ec884a 100644 --- a/openfe/protocols/openmm_afe/base.py +++ b/openfe/protocols/openmm_afe/base.py @@ -58,12 +58,13 @@ MultiStateSimulationSettings, OpenMMEngineSettings, IntegratorSettings, LambdaSettings, MultiStateOutputSettings, ThermoSettings, OpenFFPartialChargeSettings, + OpenMMSystemGeneratorFFSettings, ) -from openfe.protocols.openmm_rfe._rfe_utils import compute from openfe.protocols.openmm_md.plain_md_methods import PlainMDProtocolUnit from ..openmm_utils import ( settings_validation, system_creation, - multistate_analysis, charge_generation + multistate_analysis, charge_generation, + omm_compute, ) from openfe.utils import without_oechem_backend @@ -175,6 +176,7 @@ def _pre_equilibrate( settings : dict[str, SettingsBaseModel] A dictionary of settings objects. Expects the following entries: + * `forcefield_settings` * `engine_settings` * `thermo_settings` * `integrator_settings` @@ -189,8 +191,12 @@ def _pre_equilibrate( Equilibrated system positions """ # Prep the simulation object - platform = compute.get_openmm_platform( - settings['engine_settings'].compute_platform + # Restrict CPU count if running vacuum simulation + restrict_cpu = settings['forcefield_settings'].nonbonded_method.lower() == 'nocutoff' + platform = omm_compute.get_openmm_platform( + platform_name=settings['engine_settings'].compute_platform, + gpu_device_index=settings['engine_settings'].gpu_device_index, + restrict_cpu_count=restrict_cpu ) integrator = openmm.LangevinMiddleIntegrator( @@ -710,6 +716,7 @@ def _get_reporter( def _get_ctx_caches( self, + forcefield_settings: OpenMMSystemGeneratorFFSettings, engine_settings: OpenMMEngineSettings ) -> tuple[openmmtools.cache.ContextCache, openmmtools.cache.ContextCache]: """ @@ -717,7 +724,8 @@ def _get_ctx_caches( Parameters ---------- - engine_settings : OpenMMEngineSettings, + forcefield_settings: OpenMMSystemGeneratorFFSettings + engine_settings : OpenMMEngineSettings Returns ------- @@ -726,8 +734,13 @@ def _get_ctx_caches( sampler_context_cache : openmmtools.cache.ContextCache The sampler state context cache. """ - platform = compute.get_openmm_platform( - engine_settings.compute_platform, + # Get the compute platform + # Set the number of CPUs to 1 if running a vacuum simulation + restrict_cpu = forcefield_settings.nonbonded_method.lower() == 'nocutoff' + platform = omm_compute.get_openmm_platform( + platform_name=engine_settings.compute_platform, + gpu_device_index=engine_settings.gpu_device_index, + restrict_cpu_count=restrict_cpu ) energy_context_cache = openmmtools.cache.ContextCache( @@ -1026,6 +1039,7 @@ def run(self, dry=False, verbose=True, try: # 12. Get context caches energy_ctx_cache, sampler_ctx_cache = self._get_ctx_caches( + settings['forcefield_settings'], settings['engine_settings'] ) diff --git a/openfe/protocols/openmm_md/plain_md_methods.py b/openfe/protocols/openmm_md/plain_md_methods.py index 1ac147714..25ae1953b 100644 --- a/openfe/protocols/openmm_md/plain_md_methods.py +++ b/openfe/protocols/openmm_md/plain_md_methods.py @@ -43,10 +43,9 @@ ) from openff.toolkit.topology import Molecule as OFFMolecule -from openfe.protocols.openmm_rfe._rfe_utils import compute from openfe.protocols.openmm_utils import ( system_validation, settings_validation, system_creation, - charge_generation, + charge_generation, omm_compute ) logger = logging.getLogger(__name__) @@ -623,8 +622,11 @@ def run(self, *, dry=False, verbose=True, ) # 10. Get platform - platform = compute.get_openmm_platform( - protocol_settings.engine_settings.compute_platform + restrict_cpu = forcefield_settings.nonbonded_method.lower() == 'nocutoff' + platform = omm_compute.get_openmm_platform( + platform_name=protocol_settings.engine_settings.compute_platform, + gpu_device_index=protocol_settings.engine_settings.gpu_device_index, + restrict_cpu_count=restrict_cpu ) # 11. Set the integrator diff --git a/openfe/protocols/openmm_rfe/_rfe_utils/__init__.py b/openfe/protocols/openmm_rfe/_rfe_utils/__init__.py index 81fb48a67..b8cca820d 100644 --- a/openfe/protocols/openmm_rfe/_rfe_utils/__init__.py +++ b/openfe/protocols/openmm_rfe/_rfe_utils/__init__.py @@ -1,5 +1,4 @@ from . import ( - compute, lambdaprotocol, multistate, relative, diff --git a/openfe/protocols/openmm_rfe/equil_rfe_methods.py b/openfe/protocols/openmm_rfe/equil_rfe_methods.py index ae3f19619..2b80370e0 100644 --- a/openfe/protocols/openmm_rfe/equil_rfe_methods.py +++ b/openfe/protocols/openmm_rfe/equil_rfe_methods.py @@ -61,7 +61,7 @@ ) from ..openmm_utils import ( system_validation, settings_validation, system_creation, - multistate_analysis, charge_generation + multistate_analysis, charge_generation, omm_compute, ) from . import _rfe_utils from ...utils import without_oechem_backend, log_system_probe @@ -933,9 +933,13 @@ def run(self, *, dry=False, verbose=True, bfactors=bfactors, ) - # 10. Get platform - platform = _rfe_utils.compute.get_openmm_platform( - protocol_settings.engine_settings.compute_platform + # 10. Get compute platform + # restrict to a single CPU if running vacuum + restrict_cpu = forcefield_settings.nonbonded_method.lower() == 'nocutoff' + platform = omm_compute.get_openmm_platform( + platform_name=protocol_settings.engine_settings.compute_platform, + gpu_device_index=protocol_settings.engine_settings.gpu_device_index, + restrict_cpu_count=restrict_cpu ) # 11. Set the integrator diff --git a/openfe/protocols/openmm_rfe/_rfe_utils/compute.py b/openfe/protocols/openmm_utils/omm_compute.py similarity index 55% rename from openfe/protocols/openmm_rfe/_rfe_utils/compute.py rename to openfe/protocols/openmm_utils/omm_compute.py index b3bee28f6..af48a96a6 100644 --- a/openfe/protocols/openmm_rfe/_rfe_utils/compute.py +++ b/openfe/protocols/openmm_utils/omm_compute.py @@ -1,23 +1,40 @@ # This code is part of OpenFE and is licensed under the MIT license. # For details, see https://github.com/OpenFreeEnergy/openfe # Adapted Perses' perses.app.setup_relative_calculation.get_openmm_platform +from typing import Optional import warnings import logging +import os logger = logging.getLogger(__name__) -def get_openmm_platform(platform_name=None): +def get_openmm_platform( + platform_name: Optional[str] = None, + gpu_device_index: Optional[list[int]] = None, + restrict_cpu_count: bool = False +): """ Return OpenMM's platform object based on given name. Setting to mixed precision if using CUDA or OpenCL. Parameters ---------- - platform_name : str, optional, default=None + platform_name : Optional[str] String with the platform name. If None, it will use the fastest platform supporting mixed precision. + Default ``None``. + gpu_device_index : Optional[list[str]] + GPU device index selection. If ``None`` the default OpenMM + GPU selection will be used. + See the `OpenMM platform properties documentation `_ + for more details. + Default ``None``. + restrict_cpu_count : bool + Optional hint to restrict the CPU count to 1 when + ``platform_name`` is CPU. This allows Protocols to ensure + that no large performance in cases like vacuum simulations. Returns ------- @@ -44,16 +61,23 @@ def get_openmm_platform(platform_name=None): # Set precision and properties name = platform.getName() if name in ['CUDA', 'OpenCL']: - platform.setPropertyDefaultValue( - 'Precision', 'mixed') + platform.setPropertyDefaultValue('Precision', 'mixed') + if gpu_device_index is not None: + index_list = ','.join(str(i) for i in gpu_device_index) + platform.setPropertyDefaultValue('DeviceIndex', index_list) + if name == 'CUDA': platform.setPropertyDefaultValue( 'DeterministicForces', 'true') if name != 'CUDA': - wmsg = (f"Non-GPU platform selected: {name}, this may significantly " + wmsg = (f"Non-CUDA platform selected: {name}, this may significantly " "impact simulation performance") warnings.warn(wmsg) logging.warning(wmsg) + if name == 'CPU' and restrict_cpu_count: + threads = os.getenv("OPENMM_CPU_THREADS", '1') + platform.setPropertyDefaultValue('Threads', threads) + return platform diff --git a/openfe/protocols/openmm_utils/omm_settings.py b/openfe/protocols/openmm_utils/omm_settings.py index d52db9b41..63cb5789c 100644 --- a/openfe/protocols/openmm_utils/omm_settings.py +++ b/openfe/protocols/openmm_utils/omm_settings.py @@ -311,6 +311,18 @@ class OpenMMEngineSettings(SettingsBaseModel): OpenMM compute platform to perform MD integration with. If ``None``, will choose fastest available platform. Default ``None``. """ + gpu_device_index: Optional[list[int]] = None + """ + List of integer indices for the GPU device to select when + ``compute_platform`` is either set to ``CUDA`` or ``OpenCL``. + + If ``None``, the default OpenMM GPU selection behaviour is used. + + See the `OpenMM platform properties documentation `_ + for more details. + + Default ``None``. + """ class IntegratorSettings(SettingsBaseModel): diff --git a/openfe/tests/protocols/test_openmm_equil_rfe_protocols.py b/openfe/tests/protocols/test_openmm_equil_rfe_protocols.py index a0dcb3b66..9b3183585 100644 --- a/openfe/tests/protocols/test_openmm_equil_rfe_protocols.py +++ b/openfe/tests/protocols/test_openmm_equil_rfe_protocols.py @@ -38,15 +38,15 @@ from openfe.protocols.openmm_rfe.equil_rfe_methods import ( _validate_alchemical_components, _get_alchemical_charge_difference ) -from openfe.protocols.openmm_utils import system_creation +from openfe.protocols.openmm_utils import system_creation, omm_compute from openfe.protocols.openmm_utils.charge_generation import ( HAS_NAGL, HAS_OPENEYE, HAS_ESPALOMA ) def test_compute_platform_warn(): - with pytest.warns(UserWarning, match="Non-GPU platform selected: CPU"): - openmm_rfe._rfe_utils.compute.get_openmm_platform('CPU') + with pytest.warns(UserWarning, match="Non-CUDA platform selected: CPU"): + omm_compute.get_openmm_platform('CPU') def test_append_topology(benzene_complex_system, toluene_complex_system): diff --git a/openfe/tests/protocols/test_rfe_tokenization.py b/openfe/tests/protocols/test_rfe_tokenization.py index 07d6c3c4b..1e765ca0d 100644 --- a/openfe/tests/protocols/test_rfe_tokenization.py +++ b/openfe/tests/protocols/test_rfe_tokenization.py @@ -39,13 +39,20 @@ def instance(self): class TestRelativeHybridTopologyProtocol(GufeTokenizableTestsMixin): cls = openmm_rfe.RelativeHybridTopologyProtocol - key = "RelativeHybridTopologyProtocol-fbc7c8ac0f58808ad4430a155453932f" - repr = f"<{key}>" + key = None + repr = "" + key = None + repr = "AbsoluteSolvationProtocol-" @pytest.fixture() def instance(self, protocol): return protocol + def test_repr(self, instance): + """ + Overwrites the base `test_repr` call. + """ + assert isinstance(repr(instance), str) + assert self.repr in repr(instance) + class TestAbsoluteSolvationSolventUnit(GufeTokenizableTestsMixin): cls = openmm_afe.AbsoluteSolvationSolventUnit @@ -93,9 +100,16 @@ def test_repr(self, instance): class TestAbsoluteSolvationProtocolResult(GufeTokenizableTestsMixin): cls = openmm_afe.AbsoluteSolvationProtocolResult - key = "AbsoluteSolvationProtocolResult-7f80c1cf5a526bde45d385cee7352428" - repr = f"<{key}>" + key = None + repr = "AbsoluteSolvationProtocolResult-" @pytest.fixture() def instance(self, protocol_result): return protocol_result + + def test_repr(self, instance): + """ + Overwrites the base `test_repr` call. + """ + assert isinstance(repr(instance), str) + assert self.repr in repr(instance) diff --git a/openfe/utils/handle_trajectories.py b/openfe/utils/handle_trajectories.py index a10d376ee..1341dbeec 100644 --- a/openfe/utils/handle_trajectories.py +++ b/openfe/utils/handle_trajectories.py @@ -78,7 +78,7 @@ def _create_new_dataset(filename: Path, n_atoms: int, AMBER Conventions compliant NetCDF dataset to store information contained in MultiState reporter generated NetCDF file. """ - ncfile = nc.Dataset(filename, 'w', format='NETCDF3_64BIT') + ncfile = nc.Dataset(filename, 'w', format='NETCDF3_64BIT_OFFSET') ncfile.Conventions = 'AMBER' ncfile.ConventionVersion = "1.0" ncfile.application = "openfe" diff --git a/openfecli/commands/gather.py b/openfecli/commands/gather.py index 165e9a127..52a16a2b2 100644 --- a/openfecli/commands/gather.py +++ b/openfecli/commands/gather.py @@ -2,13 +2,29 @@ # For details, see https://github.com/OpenFreeEnergy/openfe import click -from openfecli import OFECommandPlugin -from openfecli.clicktypes import HyphenAwareChoice +import os import pathlib +from typing import Callable, Literal import warnings +from openfecli import OFECommandPlugin +from openfecli.clicktypes import HyphenAwareChoice + -def _get_column(val): +def _get_column(val:float|int)->int: + """Determine the index (where the 0th index is the decimal) at which the + first non-zero value occurs in a full-precision string representation of a value. + + Parameters + ---------- + val : float|int + The raw value. + + Returns + ------- + int + Column index + """ import numpy as np if val == 0: return 0 @@ -27,6 +43,23 @@ def format_estimate_uncertainty( unc: float, unc_prec: int = 1, ) -> tuple[str, str]: + """Truncate raw estimate and uncertainty values to the appropriate uncertainty. + + Parameters + ---------- + est : float + Raw estimate value. + unc : float + Raw uncertainty value. + unc_prec : int, optional + Precision, by default 1 + + Returns + ------- + tuple[str, str] + The truncated raw and uncertainty values. + """ + import numpy as np # get the last column needed for uncertainty unc_col = _get_column(unc) - (unc_prec - 1) @@ -41,21 +74,45 @@ def format_estimate_uncertainty( return est_str, unc_str -def is_results_json(f): - # sanity check on files before we try and deserialize - return 'estimate' in open(f, 'r').read(20) +def is_results_json(fpath:os.PathLike|str)->bool: + """Sanity check that file is a result json before we try to deserialize""" + return 'estimate' in open(fpath, 'r').read(20) -def load_results(f): - # path to deserialized results +def load_results(fpath:os.PathLike|str)->dict: + """Load the data from a results JSON into a dict + + Parameters + ---------- + fpath : os.PathLike | str + The path to deserialized results. + + Returns + ------- + dict + A dict containing data from the results JSON. + """ + import json from gufe.tokenization import JSON_HANDLER - return json.load(open(f, 'r'), cls=JSON_HANDLER.decoder) + return json.load(open(fpath, 'r'), cls=JSON_HANDLER.decoder) + +def get_names(result:dict) -> tuple[str, str]: + """Get the ligand names from a unit's results data. + + Parameters + ---------- + result : dict + A results dict. + + Returns + ------- + tuple[str, str] + Ligand names corresponding to the results. + """ -def get_names(result) -> tuple[str, str]: - # Result to tuple of ligand names nm = list(result['unit_results'].values())[0]['name'] toks = nm.split() if toks[2] == 'repeat': @@ -64,7 +121,10 @@ def get_names(result) -> tuple[str, str]: return toks[0], toks[2] -def get_type(res): +def get_type(res:dict)->Literal['vacuum','solvent','complex']: + """Determine the simulation type based on the component names.""" + # TODO: use component *types* instead here + list_of_pur = list(res['protocol_result']['data'].values())[0] pur = list_of_pur[0] components = pur['inputs']['stateA']['components'] @@ -77,7 +137,9 @@ def get_type(res): return 'solvent' -def legacy_get_type(res_fn): +def legacy_get_type(res_fn:os.PathLike|str)->Literal['vacuum','solvent','complex']: + # TODO: Deprecate this when we no longer rely on key names in `get_type()` + if 'solvent' in res_fn: return 'solvent' elif 'vacuum' in res_fn: @@ -86,7 +148,7 @@ def legacy_get_type(res_fn): return 'complex' -def _generate_bad_legs_error_message(set_vals, ligpair): +def _generate_bad_legs_error_message(set_vals:set, ligpair)->str: expected_rbfe = {'complex', 'solvent'} expected_rhfe = {'solvent', 'vacuum'} maybe_rhfe = bool(set_vals & expected_rhfe) @@ -139,7 +201,7 @@ def _parse_raw_units(results: dict) -> list[tuple]: for pu in list_of_pur] -def _get_ddgs(legs, error_on_missing=True): +def _get_ddgs(legs:dict, error_on_missing=True): import numpy as np DDGs = [] for ligpair, vals in sorted(legs.items()): @@ -179,7 +241,7 @@ def _get_ddgs(legs, error_on_missing=True): return DDGs -def _write_ddg(legs, writer, allow_partial): +def _write_ddg(legs:dict, writer:Callable, allow_partial:bool): DDGs = _get_ddgs(legs, error_on_missing=not allow_partial) writer.writerow(["ligand_i", "ligand_j", "DDG(i->j) (kcal/mol)", "uncertainty (kcal/mol)"]) @@ -192,7 +254,7 @@ def _write_ddg(legs, writer, allow_partial): writer.writerow([ligA, ligB, DDGhyd, hyd_unc]) -def _write_raw(legs, writer, allow_partial=True): +def _write_raw(legs:dict, writer:Callable, allow_partial=True): writer.writerow(["leg", "ligand_i", "ligand_j", "DG(i->j) (kcal/mol)", "MBAR uncertainty (kcal/mol)"]) @@ -206,7 +268,7 @@ def _write_raw(legs, writer, allow_partial=True): writer.writerow([simtype, *ligpair, m, u]) -def _write_dg_raw(legs, writer, allow_partial): # pragma: no-cover +def _write_dg_raw(legs:dict, writer:Callable, allow_partial): # pragma: no-cover writer.writerow(["leg", "ligand_i", "ligand_j", "DG(i->j) (kcal/mol)", "uncertainty (kcal/mol)"]) for ligpair, vals in sorted(legs.items()): @@ -218,7 +280,7 @@ def _write_dg_raw(legs, writer, allow_partial): # pragma: no-cover writer.writerow([simtype, *ligpair, m, u]) -def _write_dg_mle(legs, writer, allow_partial): +def _write_dg_mle(legs:dict, writer:Callable, allow_partial:bool): import networkx as nx import numpy as np from cinnabar.stats import mle @@ -299,7 +361,11 @@ def _write_dg_mle(legs, writer, allow_partial): "(Skip those edges and issue warning instead.)" ) ) -def gather(rootdir, output, report, allow_partial): +def gather(rootdir:os.PathLike|str, + output:os.PathLike|str, + report:Literal['dg','ddg','raw'], + allow_partial:bool + ): """Gather simulation result jsons of relative calculations to a tsv file This walks ROOTDIR recursively and finds all result JSON files from the diff --git a/openfecli/commands/quickrun.py b/openfecli/commands/quickrun.py index dcd6f9d33..c7dd6f833 100644 --- a/openfecli/commands/quickrun.py +++ b/openfecli/commands/quickrun.py @@ -6,7 +6,7 @@ import pathlib from openfecli import OFECommandPlugin -from openfecli.parameters.output import ensure_file_does_not_exist +from openfecli.parameters.output import validate_outfile from openfecli.utils import write, print_duration, configure_logger @@ -33,10 +33,9 @@ def _format_exception(exception) -> str: ) @click.option( 'output', '-o', default=None, - type=click.Path(dir_okay=False, file_okay=True, writable=True, - path_type=pathlib.Path), - help="output file (JSON format) for the final results", - callback=ensure_file_does_not_exist, + type=click.Path(dir_okay=False, file_okay=True, path_type=pathlib.Path), + help="Filepath at which to create and write the JSON-formatted results.", + callback=validate_outfile, ) @print_duration def quickrun(transformation, work_dir, output): diff --git a/openfecli/parameters/output.py b/openfecli/parameters/output.py index a82652883..c943537df 100644 --- a/openfecli/parameters/output.py +++ b/openfecli/parameters/output.py @@ -9,11 +9,19 @@ def get_file_and_extension(user_input, context): return file, ext -def ensure_file_does_not_exist(ctx, param, value): +def ensure_file_does_not_exist(value): + # TODO: I believe we can replace this with click.option(file_okay=False) if value and value.exists(): raise click.BadParameter(f"File '{value}' already exists.") - return value +def ensure_parent_dir_exists(value): + if value and not value.parent.is_dir(): + raise click.BadParameter(f"Cannot write to {value}, parent directory does not exist.") + +def validate_outfile(ctx, param, value): + ensure_file_does_not_exist(value) + ensure_parent_dir_exists(value) + return value OUTPUT_FILE_AND_EXT = Option( "-o", "--output", diff --git a/openfecli/tests/commands/test_gather.py b/openfecli/tests/commands/test_gather.py index 149de4b84..6508763cf 100644 --- a/openfecli/tests/commands/test_gather.py +++ b/openfecli/tests/commands/test_gather.py @@ -28,7 +28,8 @@ def test_get_column(val, col): @pytest.fixture -def results_dir(tmpdir): +def results_dir_serial(tmpdir): + """Example output data, with replicates run in serial (3 replicates per results JSON).""" with tmpdir.as_cwd(): with resources.files('openfecli.tests.data') as d: t = tarfile.open(d / 'rbfe_results.tar.gz', mode='r') @@ -36,6 +37,16 @@ def results_dir(tmpdir): yield +@pytest.fixture +def results_dir_parallel(tmpdir): + """Identical data to results_dir_serial(), with replicates run in parallel (1 replicate per results JSON).""" + with tmpdir.as_cwd(): + with resources.files('openfecli.tests.data') as d: + t = tarfile.open(d / 'results_parallel.tar.gz', mode='r') + t.extractall('.') + + yield + _EXPECTED_DG = b""" ligand DG(MLE) (kcal/mol) uncertainty (kcal/mol) lig_ejm_31 -0.09 0.05 @@ -146,7 +157,7 @@ def results_dir(tmpdir): @pytest.mark.parametrize('report', ["", "dg", "ddg", "raw"]) -def test_gather(results_dir, report): +def test_gather(results_dir_serial, report): expected = { "": _EXPECTED_DG, "dg": _EXPECTED_DG, @@ -185,7 +196,7 @@ def test_generate_bad_legs_error_message(include): @pytest.mark.xfail -def test_missing_leg_error(results_dir): +def test_missing_leg_error(results_dir_serial): file_to_remove = "easy_rbfe_lig_ejm_31_complex_lig_ejm_42_complex.json" (pathlib.Path("results") / file_to_remove).unlink() @@ -199,7 +210,7 @@ def test_missing_leg_error(results_dir): @pytest.mark.xfail -def test_missing_leg_allow_partial(results_dir): +def test_missing_leg_allow_partial(results_dir_serial): file_to_remove = "easy_rbfe_lig_ejm_31_complex_lig_ejm_42_complex.json" (pathlib.Path("results") / file_to_remove).unlink() diff --git a/openfecli/tests/commands/test_quickrun.py b/openfecli/tests/commands/test_quickrun.py index d78b82827..b82d281e5 100644 --- a/openfecli/tests/commands/test_quickrun.py +++ b/openfecli/tests/commands/test_quickrun.py @@ -54,7 +54,14 @@ def test_quickrun_output_file_exists(json_file): assert result.exit_code == 2 # usage error assert "File 'foo.json' already exists." in result.output - +def test_quickrun_output_file_in_nonexistent_directory(json_file): + """Should catch invalid filepaths up front.""" + runner = CliRunner() + outfile = "not_dir/foo.json" + result = runner.invoke(quickrun, [json_file, '-o', outfile]) + assert result.exit_code == 2 + assert "Cannot write" in result.output + def test_quickrun_unit_error(): with resources.files('openfecli.tests.data') as d: json_file = str(d / 'bad_transformation.json') diff --git a/openfecli/tests/data/restructure_results_data.ipynb b/openfecli/tests/data/restructure_results_data.ipynb new file mode 100644 index 000000000..92aa67d22 --- /dev/null +++ b/openfecli/tests/data/restructure_results_data.ipynb @@ -0,0 +1,245 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "id": "8d1899bc-337a-4024-9fa3-9cfbc452e091", + "metadata": {}, + "outputs": [], + "source": [ + "import json \n", + "from gufe.tokenization import JSON_HANDLER\n", + "import numpy as np\n", + "import os \n", + "import shutil\n", + "from pathlib import Path" + ] + }, + { + "cell_type": "markdown", + "id": "a82b8123-521a-4ca3-a2cf-f73b6504fa14", + "metadata": {}, + "source": [ + "for this dataset, we know we have 3 replicates run in serial for each leg. We want to manipulate the data so that it is equivalent to the output if we re-ran this dataset with each leg run in parallel, with the following directory structure:\n", + "\n", + "```\n", + "results/\n", + " transformations_0/\n", + " rbfe_lig_ejm_31_complex_lig_ejm_42_complex/\n", + " shared_[hashA]_attempt_0/\n", + " rbfe_lig_ejm_31_complex_lig_ejm_42_complex.json\n", + " transformations_1/\n", + " rbfe_lig_ejm_31_complex_lig_ejm_42_complex/\n", + " shared_[hashB]_attempt_0/\n", + " rbfe_lig_ejm_31_complex_lig_ejm_42_complex.json\n", + " transformations_2/\n", + " rbfe_lig_ejm_31_complex_lig_ejm_42_complex/\n", + " shared_[hashC]_attempt_0/\n", + " rbfe_lig_ejm_31_complex_lig_ejm_42_complex.json\n", + "```" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1c6ed7fe-b42c-4781-b356-85799e25356f", + "metadata": {}, + "outputs": [], + "source": [ + "def load_json(fpath):\n", + " return json.load(open(fpath, 'r'), cls=JSON_HANDLER.decoder)\n", + "\n", + "def dump_json(data, fpath):\n", + " with open(fpath, \"w\") as f:\n", + " json.dump(data, f, cls=JSON_HANDLER.encoder)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8eba246a-6123-4d8e-8fd8-2de516fbf881", + "metadata": {}, + "outputs": [], + "source": [ + "orig_dir = Path(\"results/\")\n", + "new_dir = Path(\"results_parallel/\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ab4f2587-9b15-422d-9faa-e11ff98fd491", + "metadata": {}, + "outputs": [], + "source": [ + "leg_names = []\n", + "for name in os.listdir(orig_dir):\n", + " if name.endswith(\".json\"):\n", + " continue\n", + " leg_names.append(name)\n", + "leg_names" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "311a7f0e-9c91-47ae-9e09-0e1bef03aca8", + "metadata": {}, + "outputs": [], + "source": [ + "! rm -rf $new_dir\n", + "for leg in leg_names:\n", + " json_data = load_json(orig_dir/f\"{leg}.json\")\n", + " srckey_to_protocol = {}\n", + " srckey_to_unit_results = {}\n", + " srckey_to_estimate = {}\n", + " ## collect results on a per-replicate basis\n", + " for k in json_data['protocol_result']['data']: \n", + " rep_source_key = json_data['protocol_result']['data'][k][0]['source_key']\n", + " \n", + " # keep only the data for this replicate\n", + " rep_result = json_data['protocol_result'].copy()\n", + " rep_result['data']={k:json_data['protocol_result']['data'][k]}\n", + " srckey_to_protocol[rep_source_key] = rep_result\n", + "\n", + " # pull just the estimate value so we can put it at the top of the output\n", + " srckey_to_estimate[rep_source_key] = rep_result['data'][k][0]['outputs']['unit_estimate']\n", + " \n", + " for k in json_data['unit_results']:\n", + " rep_source_key = json_data['unit_results'][k]['source_key']\n", + "\n", + " rep_unit_result = json_data['unit_results'].copy()\n", + " rep_unit_result = {k: json_data['unit_results'][k]}\n", + " srckey_to_unit_results[rep_source_key] = rep_unit_result\n", + " \n", + " assert srckey_to_protocol.keys() == srckey_to_unit_results.keys()\n", + " \n", + " ## write to the new directory\n", + " for n, sk in enumerate(sorted(srckey_to_protocol.keys())):\n", + " rep_dir = new_dir/f\"replicate_{n}\"\n", + " os.makedirs(rep_dir/leg)\n", + " \n", + " # build up the data for this replicate\n", + " replicate_data = {'estimate': srckey_to_estimate[sk],\n", + " 'uncertainty': np.std(srckey_to_estimate[sk]),\n", + " 'protocol_result': srckey_to_protocol[sk],\n", + " 'unit_results': srckey_to_unit_results[sk]}\n", + " \n", + " # write!\n", + " dump_json(replicate_data, rep_dir/f\"{leg}.json\")\n", + " working_dir_name = f\"shared_{sk}_attempt_0\"\n", + " ## TODO: make this work for arbitrary number of attempts \n", + " # os.symlink(orig_dir/leg/working_dir_name, rep_dir/leg/working_dir_name)\n", + " shutil.copytree(orig_dir/leg/working_dir_name, rep_dir/leg/working_dir_name)\n" + ] + }, + { + "cell_type": "markdown", + "id": "f864dcb3-bebf-425b-9154-bffc2b0e3f07", + "metadata": {}, + "source": [ + "## check that objects reload correctly" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6c20639c-8ba7-457a-bf8a-76c64aef4a38", + "metadata": {}, + "outputs": [], + "source": [ + "import openfe" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9cba8316-5500-4d5e-a84d-d72d09ba2a42", + "metadata": {}, + "outputs": [], + "source": [ + "json_reloaded = load_json(\"results_parallel/replicate_0/easy_rbfe_lig_ejm_31_solvent_lig_ejm_47_solvent.json\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c0e90b45-ae83-41c1-8748-0a8c1466b378", + "metadata": {}, + "outputs": [], + "source": [ + "json_reloaded['estimate'], json_reloaded['uncertainty']" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c0ce2bc6-d960-4521-b71c-316be0557e9d", + "metadata": {}, + "outputs": [], + "source": [ + "pr_reloaded = openfe.ProtocolResult.from_dict(json_reloaded['protocol_result'])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a2fbb695-d4ef-45bd-af53-2ef9d0bc8e0a", + "metadata": {}, + "outputs": [], + "source": [ + "pr_reloaded.data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "19662eaa-46de-4eb0-8c78-ddd6c68b12db", + "metadata": {}, + "outputs": [], + "source": [ + "first_pur_key = list(json_reloaded['unit_results'].keys())[0]\n", + "pur_reloaded = openfe.ProtocolUnit.from_dict(json_reloaded['unit_results'][first_pur_key])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0154fda2-4c1a-4064-8bcc-03aeecf13365", + "metadata": {}, + "outputs": [], + "source": [ + "pur_reloaded" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3f2bbc84-f59c-40b9-a176-9a733ff275c1", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.8" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/openfecli/tests/data/results_parallel.tar.gz b/openfecli/tests/data/results_parallel.tar.gz new file mode 100644 index 000000000..fd033bc17 Binary files /dev/null and b/openfecli/tests/data/results_parallel.tar.gz differ