From 9e61c22fc1e7dad54ae3a0b9bfff35d05901c49c Mon Sep 17 00:00:00 2001 From: Ivy Zhang <35546250+zhang-ivy@users.noreply.github.com> Date: Fri, 24 Jun 2022 14:32:56 -0400 Subject: [PATCH] Fix protein mutation repex consistency tests (#1054) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Iván Pulido --- .github/workflows/self-hosted-gpu-test.yml | 2 +- perses/tests/test_repex.py | 58 +++++++++++----------- 2 files changed, 30 insertions(+), 30 deletions(-) diff --git a/.github/workflows/self-hosted-gpu-test.yml b/.github/workflows/self-hosted-gpu-test.yml index 9d9df737c..3611b095b 100644 --- a/.github/workflows/self-hosted-gpu-test.yml +++ b/.github/workflows/self-hosted-gpu-test.yml @@ -102,7 +102,7 @@ jobs: shell: bash -l {0} run: | export TRAVIS=true - pytest -v --cov-report xml --cov=perses --durations=0 -a "not advanced" -m "gpu_ci or gpu_needed" -k "not test_RESTCapableHybridTopologyFactory_repex_neutral_mutation and not test_RESTCapableHybridTopologyFactory_repex_charge_mutation" perses/tests + pytest -v --cov-report xml --cov=perses --durations=0 -a "not advanced" -m "gpu_ci or gpu_needed" perses/tests - name: Codecov if: ${{ github.repository == 'choderalab/perses' diff --git a/perses/tests/test_repex.py b/perses/tests/test_repex.py index b87f70e70..9b1466f51 100644 --- a/perses/tests/test_repex.py +++ b/perses/tests/test_repex.py @@ -2,16 +2,14 @@ import pytest from perses.tests.utils import enter_temp_directory -# Tolerance for energy differences tests -ENERGY_DIFF_TOLERANCE_KT = 0.5 - - @pytest.mark.gpu_needed def test_RESTCapableHybridTopologyFactory_repex_neutral_mutation(): """ Run ALA->THR and THR->ALA repex with the RESTCapableHybridTopologyFactory and make sure that the free energies are equal and opposite. + Note: We are using 50 steps per iteration here to speed up the test. We expect larger DDGs and dDDGs as as result. + """ from pkg_resources import resource_filename @@ -28,13 +26,13 @@ def test_RESTCapableHybridTopologyFactory_repex_neutral_mutation(): platform = configure_platform(utils.get_fastest_platform().getName()) data = {} - n_iterations = 250 + n_iterations = 3000 mutations = [('ala', 'thr'), ('thr', 'ala')] with enter_temp_directory() as temp_dir: for wt_name, mutant_name in mutations: # Generate htf pdb_filename = resource_filename("perses", f"data/{wt_name}_vacuum.pdb") - solvent_delivery = PointMutationExecutor( + solvent_delivery = PointMutationExecutor( # TODO: Need to be specify larger padding (1.7 nm) to work with openmm >= 7.8 pdb_filename, "1", "2", @@ -54,12 +52,12 @@ def test_RESTCapableHybridTopologyFactory_repex_neutral_mutation(): nonbonded_force.setUseDispersionCorrection(True) # Set up repex simulation - reporter_file = os.path.join(temp_dir, f"{wt_name}-{mutant_name}") + reporter_file = os.path.join(temp_dir, f"{wt_name}-{mutant_name}.nc") reporter = MultiStateReporter(reporter_file, checkpoint_interval=10) hss = HybridRepexSampler(mcmc_moves=mcmc.LangevinSplittingDynamicsMove(timestep=4.0 * unit.femtoseconds, collision_rate=1.0 / unit.picosecond, - n_steps=250, - reassign_velocities=True, + n_steps=50, + reassign_velocities=False, n_restart_attempts=20, splitting="V R R R O R R R V", constraint_tolerance=1e-06), @@ -67,7 +65,7 @@ def test_RESTCapableHybridTopologyFactory_repex_neutral_mutation(): hybrid_factory=htf, online_analysis_interval=None) hss.setup(n_states=12, temperature=300 * unit.kelvin, t_max=300 * unit.kelvin, - storage_file=reporter, endstates=True) + storage_file=reporter, minimisation_steps=0, endstates=True) hss.energy_context_cache = cache.ContextCache(capacity=None, time_to_live=None, platform=platform) hss.sampler_context_cache = cache.ContextCache(capacity=None, time_to_live=None, platform=platform) @@ -81,12 +79,12 @@ def test_RESTCapableHybridTopologyFactory_repex_neutral_mutation(): f_ij, df_ij = analyzer.get_free_energy() data[f"{wt_name}-{mutant_name}"] = {'free_energy': f_ij[0, -1], 'error': df_ij[0, -1]} - forward_data = data['ala-thr']['free_energy'] - reverse_data = data['thr-ala']['free_energy'] * -1 - assert np.isclose([forward_data], [reverse_data], atol=ENERGY_DIFF_TOLERANCE_KT), \ - f"ALA-THR is {forward_data}. THR-ALA is {reverse_data}." + DDG = abs(data['ala-thr']['free_energy'] - data['thr-ala']['free_energy'] * -1) + dDDG = np.sqrt(data['ala-thr']['error'] ** 2 + data['thr-ala']['error'] ** 2) + assert DDG < 6 * dDDG, f"DDG ({DDG}) is greater than 6 * dDDG ({6 * dDDG})" +@pytest.mark.skip(reason="Currently taking too long in CI.") @pytest.mark.gpu_needed def test_RESTCapableHybridTopologyFactory_repex_charge_mutation(): """ @@ -101,6 +99,9 @@ def test_RESTCapableHybridTopologyFactory_repex_charge_mutation(): the use of solvated PDBs as input (vs the neutral mutation test uses vacuum PDBs and requires the PointMutationExecutor to solvate). This difference is because the ARG and LYS dipeptide PDBs were generated using the geometry engine and were therefore clashy, so we needed to run equilibration before using them as inputs. The ALA and THR PDBs were not clashy. + + Note: We are using 50 steps per iteration here to speed up the test. We expect larger DDGs and dDDGs as as result. + """ import tempfile @@ -118,7 +119,7 @@ def test_RESTCapableHybridTopologyFactory_repex_charge_mutation(): platform = configure_platform(utils.get_fastest_platform().getName()) data = {} - n_iterations = 2000 + n_iterations = 3000 d_mutations = {'forward': [('arg', 'ala'), ('lys', 'ala')], 'reverse': [('ala', 'arg'), ('ala', 'lys')]} with enter_temp_directory() as temp_dir: @@ -126,7 +127,7 @@ def test_RESTCapableHybridTopologyFactory_repex_charge_mutation(): for wt_name, mutant_name in mutations: # Generate htf pdb_filename = resource_filename("perses", f"data/{wt_name}_solvated.cif") if mutation_type == 'forward' else os.path.join(temp_dir, f"{wt_name}.cif") - solvent_delivery = PointMutationExecutor( + solvent_delivery = PointMutationExecutor( # TODO: Need to be specify larger padding (1.7 nm) to work with openmm >= 7.8 pdb_filename, "1", "2", @@ -138,7 +139,7 @@ def test_RESTCapableHybridTopologyFactory_repex_charge_mutation(): ) htf = solvent_delivery.get_apo_rest_htf() - # Save the new + # Save the new positions to use for the reverse transformation if mutation_type == 'forward': app.PDBxFile.writeFile(htf._topology_proposal.new_topology, htf.new_positions(htf.hybrid_positions), @@ -158,16 +159,16 @@ def test_RESTCapableHybridTopologyFactory_repex_charge_mutation(): reporter = MultiStateReporter(reporter_file, checkpoint_interval=10) hss = HybridRepexSampler(mcmc_moves=mcmc.LangevinSplittingDynamicsMove(timestep=4.0 * unit.femtoseconds, collision_rate=1.0 / unit.picosecond, - n_steps=250, - reassign_velocities=True, + n_steps=50, + reassign_velocities=False, n_restart_attempts=20, splitting="V R R R O R R R V", constraint_tolerance=1e-06), replica_mixing_scheme='swap-all', hybrid_factory=htf, online_analysis_interval=None) - hss.setup(n_states=12, temperature=300 * unit.kelvin, t_max=300 * unit.kelvin, - storage_file=reporter, endstates=True) + hss.setup(n_states=36, temperature=300 * unit.kelvin, t_max=300 * unit.kelvin, + storage_file=reporter, minimisation_steps=0, endstates=True) hss.energy_context_cache = cache.ContextCache(capacity=None, time_to_live=None, platform=platform) hss.sampler_context_cache = cache.ContextCache(capacity=None, time_to_live=None, platform=platform) @@ -181,12 +182,11 @@ def test_RESTCapableHybridTopologyFactory_repex_charge_mutation(): f_ij, df_ij = analyzer.get_free_energy() data[f"{wt_name}-{mutant_name}"] = {'free_energy': f_ij[0, -1], 'error': df_ij[0, -1]} - arg_ala_forward = data['arg-ala']['free_energy'] - arg_ala_reverse = data['ala-arg']['free_energy'] - lys_ala_forward = data['lys-ala']['free_energy'] - lys_ala_reverse = data['ala-lys']['free_energy'] - arg_ala_arg = arg_ala_forward + arg_ala_reverse - lys_ala_lys = lys_ala_forward + lys_ala_reverse + DDG = data['arg-ala']['free_energy'] + data['ala-arg']['free_energy'] \ + - (data['lys-ala']['free_energy'] + data['ala-lys']['free_energy']) + dDDG = np.sqrt(data['arg-ala']['error'] ** 2 + + data['ala-arg']['error'] ** 2 + + data['lys-ala']['error'] ** 2 + + data['ala-lys']['error'] ** 2) - assert np.isclose([arg_ala_arg], [lys_ala_lys], atol=ENERGY_DIFF_TOLERANCE_KT), \ - f"ARG-ALA-ARG is {arg_ala_arg}. LYS-ALA-LYS is {lys_ala_lys}." + assert DDG < 6 * dDDG, f"DDG ({DDG}) is greater than 6 * dDDG ({6 * dDDG})"