From 3d1dfe6355c50b4d21983b29fbe19a7eb99f5b4d Mon Sep 17 00:00:00 2001 From: Yutong Zhao Date: Thu, 19 Sep 2024 15:22:26 -0400 Subject: [PATCH] Rebased --- examples/validate_solvent_perturbations.py | 163 ++++++++ tests/nonbonded/test_smoothcore_potential.py | 225 ++++++++++ timemachine/cpp/CMakeLists.txt | 1 + timemachine/cpp/src/kernels/k_nonbonded.cuh | 134 ++++++ .../smoothcore_nonbonded_interaction_group.cu | 393 ++++++++++++++++++ ...smoothcore_nonbonded_interaction_group.hpp | 103 +++++ timemachine/cpp/src/wrap_kernels.cpp | 93 +++++ timemachine/fe/free_energy.py | 4 + timemachine/fe/rbfe.py | 3 +- timemachine/fe/single_topology.py | 278 +++++++++---- timemachine/fe/topology.py | 65 ++- timemachine/ff/handlers/nonbonded.py | 1 + timemachine/lib/custom_ops.pyi | 8 + timemachine/potentials/__init__.py | 2 + timemachine/potentials/nonbonded.py | 155 ++++++- timemachine/potentials/potentials.py | 26 +- timemachine/testsystems/data/benzyl.sdf | 274 ++++++++++++ 17 files changed, 1844 insertions(+), 84 deletions(-) create mode 100644 examples/validate_solvent_perturbations.py create mode 100644 tests/nonbonded/test_smoothcore_potential.py create mode 100644 timemachine/cpp/src/smoothcore_nonbonded_interaction_group.cu create mode 100644 timemachine/cpp/src/smoothcore_nonbonded_interaction_group.hpp create mode 100644 timemachine/testsystems/data/benzyl.sdf diff --git a/examples/validate_solvent_perturbations.py b/examples/validate_solvent_perturbations.py new file mode 100644 index 0000000000..8c38be38ec --- /dev/null +++ b/examples/validate_solvent_perturbations.py @@ -0,0 +1,163 @@ +# usage: +# python examples/validate_solvent_perturbations.py --ligands timemachine/testsystems/data/benzyl.sdf --mol_a_name benzene --mol_b_name benzene_bicycle --n_frames 200 --steps_per_frame 200 --n_eq_steps 5000 --seed 2024 + +# 1) using smoothcores +# 2) not-using smoothcores + +# bisection only, (main goal is to check # of windows emitted in the fast bisection stage) +import argparse +import copy +import pickle + +import numpy as np + +from timemachine.constants import DEFAULT_ATOM_MAPPING_KWARGS +from timemachine.fe import atom_mapping +from timemachine.fe.free_energy import MDParams +from timemachine.fe.rbfe import HostConfig, estimate_relative_free_energy_bisection +from timemachine.fe.utils import plot_atom_mapping_grid, read_sdf +from timemachine.ff import Forcefield +from timemachine.md import builders + + +def get_mol_by_name(mols, name): + for m in mols: + if m.GetProp("_Name") == name: + return m + + assert 0, "Mol not found" + + +def run_solvent(mol_a, mol_b, core, forcefield, md_params, use_smoothcore): + box_width = 4.0 + solvent_sys, solvent_conf, solvent_box, solvent_top = builders.build_water_system(box_width, forcefield.water_ff) + solvent_box += np.diag([0.1, 0.1, 0.1]) # remove any possible clashes + solvent_host_config = HostConfig(solvent_sys, solvent_conf, solvent_box, len(solvent_conf)) + solvent_res = estimate_relative_free_energy_bisection( + mol_a, + mol_b, + core, + forcefield, + solvent_host_config, + md_params=md_params, + prefix="solvent", + min_overlap=0.667, + use_smoothcore=use_smoothcore, + ) + + print("==statistics==") + print("lambda_schedule", [x.lamb for x in solvent_res.final_result.initial_states]) + print("dGs", solvent_res.final_result.dGs) + print("num_windows", len(solvent_res.final_result.dGs) + 1) + print("sum(dGs)", np.sum(solvent_res.final_result.dGs)) + print("norm(dG_errs)", np.linalg.norm(solvent_res.final_result.dG_errs)) + + mol_a_name = mol_a.GetProp("_Name") + mol_b_name = mol_b.GetProp("_Name") + + with open(f"solvent_summary_overlap_{mol_a_name}_to_{mol_b_name}_{use_smoothcore}.png", "wb") as fh: + fh.write(solvent_res.plots.overlap_summary_png) + + with open(f"solvent_detail_overlap_{mol_a_name}_to_{mol_b_name}_{use_smoothcore}.png", "wb") as fh: + fh.write(solvent_res.plots.overlap_detail_png) + + with open(f"solvent_debug_{mol_a_name}_to_{mol_b_name}_{use_smoothcore}.pkl", "wb") as f: + pickle.dump(solvent_res, f) + + +def read_from_args(): + parser = argparse.ArgumentParser(description="Estimate relative solvation free energy between two ligands") + parser.add_argument( + "--n_frames", type=int, help="number of frames to use for the free energy estimate", required=True + ) + parser.add_argument("--ligands", type=str, help="SDF file containing the ligands of interest", required=True) + parser.add_argument("--mol_a_name", type=str, help="name of the start molecule", required=True) + parser.add_argument("--mol_b_name", type=str, help="name of the end molecule", required=True) + parser.add_argument("--steps_per_frame", type=int, help="steps per frame", required=True) + parser.add_argument("--n_eq_steps", type=int, help="n eq steps", required=True) + parser.add_argument("--seed", type=int, help="Random number seed", required=True) + + args = parser.parse_args() + mols = read_sdf(str(args.ligands)) + mol_a = get_mol_by_name(mols, args.mol_a_name) + mol_b = get_mol_by_name(mols, args.mol_b_name) + + md_params = MDParams( + n_frames=args.n_frames, n_eq_steps=args.n_eq_steps, steps_per_frame=args.steps_per_frame, seed=args.seed + ) + + ff = Forcefield.load_default() + + atom_mapping_kwargs = copy.copy(DEFAULT_ATOM_MAPPING_KWARGS) + atom_mapping_kwargs["ring_matches_ring_only"] = False + + all_cores = atom_mapping.get_cores(mol_a, mol_b, **atom_mapping_kwargs) + + core = all_cores[0] + + # Hydrogen Only + core = np.array([[8, 8]]) + + res = plot_atom_mapping_grid(mol_a, mol_b, core) + fpath = f"atom_mapping_{args.mol_a_name}_to_{args.mol_b_name}.svg" + print("core mapping written to", fpath) + with open(fpath, "w") as fh: + fh.write(res) + + # debug + from timemachine.fe.single_topology import SingleTopology + + st = SingleTopology(mol_a, mol_b, core, ff, use_smoothcore=True) + all_charges = [] + all_eps = [] + all_ws = [] + all_mols = [] + for lamb in np.linspace(0, 1, 10): + guest_ixn_env_params = st._get_smoothcore_guest_params(ff.q_handle, ff.lj_handle, lamb) + all_charges.append(guest_ixn_env_params[:, 0]) + all_eps.append(guest_ixn_env_params[:, 2]) + all_ws.append(guest_ixn_env_params[:, 3]) + all_mols.append(st.mol(lamb)) + all_charges = np.array(all_charges) + all_eps = np.array(all_eps) + all_ws = np.array(all_ws) + num_atoms = all_charges.shape[1] + dummy_a_atoms = st.dummy_a_idxs() + dummy_b_atoms = st.dummy_b_idxs() + import matplotlib.pyplot as plt + + f, (ax1, ax2, ax3) = plt.subplots(1, 3, figsize=(15, 5)) + for atom_idx in range(num_atoms): + symbol = all_mols[0].GetAtomWithIdx(atom_idx).GetSymbol() + if atom_idx in dummy_a_atoms: + linestyle = "dashed" + elif atom_idx in dummy_b_atoms: + linestyle = "dotted" + else: + linestyle = "solid" + ax1.plot(all_charges[:, atom_idx], label=f"{symbol}{atom_idx}", linestyle=linestyle) + ax2.plot(all_ws[:, atom_idx], label=f"{symbol}{atom_idx}", linestyle=linestyle) + ax3.plot(all_eps[:, atom_idx], label=f"{symbol}{atom_idx}", linestyle=linestyle) + ax1.set_ylabel("charge") + ax1.set_xlabel("lambda_idx") + + ax2.set_ylabel("w") + ax2.set_xlabel("lambda_idx") + + ax3.set_ylabel("eps") + ax3.set_xlabel("lambda_idx") + + ax3.legend() + mol_a_name = mol_a.GetProp("_Name") + mol_b_name = mol_b.GetProp("_Name") + plt.tight_layout() + plt.savefig(f"solvent_parameter_interpolation_{mol_a_name}_to_{mol_b_name}.png") + # assert 0 + # debug + + run_solvent(mol_a, mol_b, core, ff, md_params, use_smoothcore=True) + run_solvent(mol_a, mol_b, core, ff, md_params, use_smoothcore=False) + + +if __name__ == "__main__": + read_from_args() diff --git a/tests/nonbonded/test_smoothcore_potential.py b/tests/nonbonded/test_smoothcore_potential.py new file mode 100644 index 0000000000..7324140177 --- /dev/null +++ b/tests/nonbonded/test_smoothcore_potential.py @@ -0,0 +1,225 @@ +import numpy as np +from common import GradientTest + +from timemachine.fe.free_energy import HostConfig +from timemachine.fe.single_topology import AtomMapFlags, AtomMapMixin, SingleTopology +from timemachine.fe.system import convert_bps_into_system +from timemachine.fe.utils import get_romol_conf +from timemachine.ff import Forcefield +from timemachine.ff.handlers import openmm_deserializer +from timemachine.md.builders import build_protein_system +from timemachine.md.minimizer import pre_equilibrate_host +from timemachine.potentials import SmoothcoreNonbondedInteractionGroup +from timemachine.potentials.nonbonded import ( + MIN_SMOOTHCORE_EPS_SQRT, + MIN_SMOOTHCORE_SIG_HALF, + reference_nonbonded_interaction_group, + smoothcore_charge_interpolation, + smoothcore_lj_interpolation, +) +from timemachine.testsystems.relative import get_hif2a_ligand_pair_single_topology + + +def get_hif2a_protein_coords_box_params(mol_a, mol_b, ff, minimize=False): + solvated_host_system, solvated_host_coords, box, topology, num_water_atoms = build_protein_system( + "timemachine/testsystems/data/hif2a_nowater_min.pdb", ff.protein_ff, ff.water_ff, [mol_a, mol_b] + ) + + host_bps, _ = openmm_deserializer.deserialize_system(solvated_host_system, cutoff=1.2) + + host_system = convert_bps_into_system(host_bps) + + if minimize: + hc = HostConfig(solvated_host_system, solvated_host_coords, box, num_water_atoms) + equil_coords, equil_box = pre_equilibrate_host([mol_a, mol_b], hc, ff) + assert equil_coords.shape == solvated_host_coords.shape + return equil_coords, equil_box, host_bps[-1].params, host_system, num_water_atoms + else: + return solvated_host_coords, box, host_bps[-1].params, host_system, num_water_atoms + + +def test_smoothcore_charge_interpolation(): + """ + Test that: + 1) charge interpolation along the schedule maintains net-neutral charges + 2) end-states are consistent with the interpolate lambda values of 0 and 1 + """ + mol_a, mol_b, core = get_hif2a_ligand_pair_single_topology() + + ff = Forcefield.load_default() + + mol_a_charges = ff.q_handle.parameterize(mol_a) + mol_b_charges = ff.q_handle.parameterize(mol_b) + amm = AtomMapMixin(mol_a, mol_b, core) + + charges_src = np.zeros(amm.get_num_atoms()) + charges_dst = np.zeros(amm.get_num_atoms()) + + mol_a_idxs = amm.mol_a_idxs() + mol_b_idxs = amm.mol_b_idxs() + + charges_src[mol_a_idxs] = mol_a_charges[np.array([amm.c_to_a[x] for x in mol_a_idxs])] + charges_dst[mol_b_idxs] = mol_b_charges[np.array([amm.c_to_b[x] for x in mol_b_idxs])] + + # test that net charges are zero through-out the schedule. + for lamb in np.linspace(0, 1.0, 20): + charges = smoothcore_charge_interpolation(lamb, charges_src, charges_dst, mol_a_idxs, mol_b_idxs) + np.testing.assert_almost_equal(np.sum(charges), 0, decimal=5) + + charges_lhs = smoothcore_charge_interpolation(0.0, charges_src, charges_dst, mol_a_idxs, mol_b_idxs) + charges_rhs = smoothcore_charge_interpolation(1.0, charges_src, charges_dst, mol_a_idxs, mol_b_idxs) + + np.testing.assert_array_equal(charges_lhs, charges_src) + np.testing.assert_array_equal(charges_rhs, charges_dst) + + +def test_smoothcore_lj_interpolation(): + """ + Test that the lj interpolation has the correct truncation treatment (upon absorption into + the anchoring atoms). + """ + mol_a, mol_b, core = get_hif2a_ligand_pair_single_topology() + + ff = Forcefield.load_default() + + mol_a_lj = ff.lj_handle.parameterize(mol_a) + mol_b_lj = ff.lj_handle.parameterize(mol_b) + amm = AtomMapMixin(mol_a, mol_b, core) + + lj_src = np.zeros((amm.get_num_atoms(), 2)) + lj_dst = np.zeros((amm.get_num_atoms(), 2)) + + mol_a_idxs = amm.mol_a_idxs() + mol_b_idxs = amm.mol_b_idxs() + + lj_src[mol_a_idxs] = mol_a_lj[np.array([amm.c_to_a[x] for x in mol_a_idxs])] + lj_dst[mol_b_idxs] = mol_b_lj[np.array([amm.c_to_b[x] for x in mol_b_idxs])] + + lj_lhs = smoothcore_lj_interpolation(0.0, lj_src, lj_dst, mol_a_idxs, mol_b_idxs) + lj_rhs = smoothcore_lj_interpolation(1.0, lj_src, lj_dst, mol_a_idxs, mol_b_idxs) + + np.testing.assert_array_equal(lj_lhs, lj_src) + np.testing.assert_array_equal(lj_rhs, lj_dst) + assert np.all(lj_lhs[amm.dummy_b_idxs(), 0] == 0) + assert np.all(lj_lhs[amm.dummy_b_idxs(), 1] == 0) + assert np.all(lj_rhs[amm.dummy_a_idxs(), 0] == 0) + assert np.all(lj_rhs[amm.dummy_a_idxs(), 1] == 0) + + lj_almost_lhs = smoothcore_lj_interpolation(0.0001, lj_src, lj_dst, mol_a_idxs, mol_b_idxs) + assert np.all(lj_almost_lhs[amm.dummy_b_idxs(), 0] >= MIN_SMOOTHCORE_SIG_HALF) + assert np.all(lj_almost_lhs[amm.dummy_b_idxs(), 1] >= MIN_SMOOTHCORE_EPS_SQRT) + + lj_almost_rhs = smoothcore_lj_interpolation(0.9999, lj_src, lj_dst, mol_a_idxs, mol_b_idxs) + assert np.all(lj_almost_rhs[amm.dummy_a_idxs(), 0] >= MIN_SMOOTHCORE_SIG_HALF) + assert np.all(lj_almost_rhs[amm.dummy_a_idxs(), 1] >= MIN_SMOOTHCORE_EPS_SQRT) + + +def test_smoothcore_ligand_anchor_idxs(): + mol_a, mol_b, core = get_hif2a_ligand_pair_single_topology() + ff = Forcefield.load_default() + st = SingleTopology(mol_a, mol_b, core, ff) + + ligand_anchor_idxs = np.zeros(st.get_num_atoms(), dtype=np.int32) + for c_idx, c_flag in enumerate(st.c_flags): + if c_flag == AtomMapFlags.CORE: + ligand_anchor_idxs[c_idx] = c_idx + + st = SingleTopology(mol_a, mol_b, core, ff) + + # anchors implied by the atom-mapping, manually generated by means of eye-balling + ligand_anchor_idxs[st.a_to_c[22]] = st.a_to_c[0] + ligand_anchor_idxs[st.a_to_c[24]] = st.a_to_c[0] + ligand_anchor_idxs[st.a_to_c[25]] = st.a_to_c[0] + ligand_anchor_idxs[st.a_to_c[33]] = st.a_to_c[0] + ligand_anchor_idxs[st.b_to_c[23]] = st.b_to_c[22] + + test_ligand_anchor_idxs = st.get_smoothcore_ligand_anchor_idxs() + + np.testing.assert_array_equal(test_ligand_anchor_idxs, ligand_anchor_idxs) + + +def test_smoothcore_potential(): + """ + Test that the smooth core potential on the minimized hif2a system. + + Verifies: + + 1) Agreement with reference jax implementation + 2) Agreement with existing NonbondedInteractionGroup at the end-states + + """ + + mol_a, mol_b, core = get_hif2a_ligand_pair_single_topology() + x_a = get_romol_conf(mol_a) + x_b = get_romol_conf(mol_b) + + ff = Forcefield.load_default() + host_conf, box, host_params, host_system, num_water_atoms = get_hif2a_protein_coords_box_params( + mol_a, mol_b, ff, minimize=True + ) + + ref_beta = host_system.nonbonded.potential.beta + ref_cutoff = host_system.nonbonded.potential.cutoff + + num_host_atoms = len(host_conf) + + st = SingleTopology(mol_a, mol_b, core, ff, use_smoothcore=True) + row_atom_idxs = np.arange(st.get_num_atoms()) + num_host_atoms + col_atom_idxs = np.arange(num_host_atoms) + + ligand_anchor_idxs = st.get_smoothcore_ligand_anchor_idxs() + ligand_anchor_idxs += num_host_atoms + anchor_idxs = np.concatenate([np.arange(num_host_atoms), ligand_anchor_idxs]) + + xs = [] + test_nrgs = [] + test_params = [] + + total_num_atoms = num_host_atoms + st.get_num_atoms() + ref_pot = SmoothcoreNonbondedInteractionGroup( + total_num_atoms, + row_atom_idxs.astype(np.int32), + anchor_idxs.astype(np.int32), + ref_beta, + ref_cutoff, + col_atom_idxs.astype(np.int32), + ) + + for lamb in np.linspace(0, 1, 11): + ligand_params = st._get_smoothcore_guest_params(ff.q_handle, ff.lj_handle, lamb) + combined_params = np.concatenate([host_params, ligand_params]) + ligand_conf = st.combine_confs(x_a, x_b, lamb) + combined_conf = np.concatenate([host_conf, ligand_conf]) + test_params.append(combined_params) + ref_nrg = ref_pot(combined_conf, combined_params, box) + xs.append(combined_conf) + test_nrgs.append(ref_nrg) + + for precision, rtol, atol in [(np.float64, 1e-8, 1e-8), (np.float32, 5e-4, 5e-4)]: + test_potential, combined_params = st._parameterize_unbound_host_guest_nonbonded_ixn( + lamb, + host_system.nonbonded, + num_water_atoms, + ) + test_impl = test_potential.to_gpu(precision) + GradientTest().compare_forces(combined_conf, combined_params, box, ref_pot, test_impl, rtol=rtol, atol=atol) + + # test that reference gpu, reference cpu, and the smoothcore are consistent at the end-states + st = SingleTopology(mol_a, mol_b, core, ff, use_smoothcore=False) + hg_system_lhs = st.combine_with_host(host_system, 0.0, num_water_atoms) + ref_u_lhs_gpu = hg_system_lhs.nonbonded_host_guest_ixn(xs[0], box) + ref_params_lhs = hg_system_lhs.nonbonded_host_guest_ixn.params.reshape(-1, 4) + ref_u_lhs_cpu = reference_nonbonded_interaction_group( + xs[0], ref_params_lhs, box, row_atom_idxs, col_atom_idxs, beta=ref_beta, cutoff=ref_cutoff + ) + np.testing.assert_allclose(test_nrgs[0], ref_u_lhs_cpu) + np.testing.assert_allclose(ref_u_lhs_gpu, ref_u_lhs_cpu) + + hg_system_rhs = st.combine_with_host(host_system, 1.0, num_water_atoms) + ref_u_rhs_gpu = hg_system_rhs.nonbonded_host_guest_ixn(xs[-1], box) + ref_params_rhs = hg_system_rhs.nonbonded_host_guest_ixn.params.reshape(-1, 4) + ref_u_rhs_cpu = reference_nonbonded_interaction_group( + xs[-1], ref_params_rhs, box, row_atom_idxs, col_atom_idxs, beta=ref_beta, cutoff=ref_cutoff + ) + np.testing.assert_allclose(test_nrgs[-1], ref_u_rhs_cpu) + np.testing.assert_allclose(ref_u_rhs_gpu, ref_u_rhs_cpu) diff --git a/timemachine/cpp/CMakeLists.txt b/timemachine/cpp/CMakeLists.txt index 7a7d4145af..3219528a04 100644 --- a/timemachine/cpp/CMakeLists.txt +++ b/timemachine/cpp/CMakeLists.txt @@ -73,6 +73,7 @@ pybind11_add_module(${LIBRARY_NAME} SHARED NO_EXTRAS src/nonbonded_all_pairs.cu src/nonbonded_pair_list.cu src/nonbonded_interaction_group.cu + src/smoothcore_nonbonded_interaction_group.cu src/energy_accumulation.cu src/local_md_utils.cu src/neighborlist.cu diff --git a/timemachine/cpp/src/kernels/k_nonbonded.cuh b/timemachine/cpp/src/kernels/k_nonbonded.cuh index 5fe32a9a88..986e93dc17 100644 --- a/timemachine/cpp/src/kernels/k_nonbonded.cuh +++ b/timemachine/cpp/src/kernels/k_nonbonded.cuh @@ -54,6 +54,103 @@ void __global__ k_check_rebuild_coords_and_box_gather( } } +template +void __global__ k_smoothcore_rescale_coords( + const int N, + const unsigned int *__restrict__ row_atom_idxs, + const unsigned int *__restrict__ row_anchor_idxs, + const double *__restrict__ coords, + const double *__restrict__ params, + double *__restrict__ coords_out) { + static_assert(COORDS_DIM == 3); + static_assert(PARAMS_DIM == 4); + int atom_idx = blockIdx.x * blockDim.x + threadIdx.x; + if (atom_idx >= N) { + return; + } + + unsigned int anchor_idx = row_anchor_idxs[atom_idx]; + RealType rescale = + static_cast(1.0) - static_cast(params[atom_idx * PARAMS_DIM + 3]); // old w_coordinates + + // Coords have 3 dimensions, params have 4 +#pragma unroll COORDS_DIM + for (int i = 0; i < COORDS_DIM; i++) { + RealType atom_pos = coords[atom_idx * COORDS_DIM + i]; + RealType anchor_pos = coords[anchor_idx * COORDS_DIM + i]; + coords_out[atom_idx * COORDS_DIM + i] = anchor_pos + rescale * (atom_pos - anchor_pos); + } +} + +template +void __global__ k_smoothcore_rescale_and_accum_du_dp( + const int N, + const unsigned int *__restrict__ anchor_idxs, + const double *__restrict__ coords, + const double *__restrict__ params, + const unsigned int *__restrict__ perm, + const unsigned long long *__restrict__ sorted_du_dp, + const unsigned long long *__restrict__ sorted_du_dx, + unsigned long long *__restrict__ du_dp) { + static_assert(COORDS_DIM == 3); + static_assert(PARAMS_DIM == 4); + int tid = blockIdx.x * blockDim.x + threadIdx.x; + if (tid >= N) { + return; + } + + int atom_idx = perm[tid]; + unsigned int anchor_idx = anchor_idxs[atom_idx]; + RealType accum_w = 0; +#pragma unroll COORDS_DIM + for (int i = 0; i < COORDS_DIM; i++) { + RealType atom_pos = coords[atom_idx * COORDS_DIM + i]; + RealType anchor_pos = coords[anchor_idx * COORDS_DIM + i]; + RealType d_du_dx_real = FIXED_TO_FLOAT(sorted_du_dx[tid * COORDS_DIM + i]); + accum_w += -(atom_pos - anchor_pos) * d_du_dx_real; + } + + atomicAdd(du_dp + atom_idx * PARAMS_DIM + 0, sorted_du_dp[tid * PARAMS_DIM + 0]); + atomicAdd(du_dp + atom_idx * PARAMS_DIM + 1, sorted_du_dp[tid * PARAMS_DIM + 1]); + atomicAdd(du_dp + atom_idx * PARAMS_DIM + 2, sorted_du_dp[tid * PARAMS_DIM + 2]); + atomicAdd(du_dp + atom_idx * PARAMS_DIM + 3, FLOAT_TO_FIXED_DU_DP(accum_w)); +} + +template +void __global__ k_smoothcore_rescale_and_accum_du_dx( + const int N, + const unsigned int *__restrict__ anchor_idxs, + const double *__restrict__ coords, + const double *__restrict__ params, + const unsigned int *__restrict__ perm, + const unsigned long long *__restrict__ sorted_du_dx, + unsigned long long *__restrict__ du_dx_out) { + + static_assert(COORDS_DIM == 3); + static_assert(PARAMS_DIM == 4); + int tid = blockIdx.x * blockDim.x + threadIdx.x; + if (tid >= N) { + return; + } + + int atom_idx = perm[tid]; + unsigned int anchor_idx = anchor_idxs[atom_idx]; +#pragma unroll COORDS_DIM + for (int i = 0; i < COORDS_DIM; i++) { + RealType atom_pos = coords[atom_idx * COORDS_DIM + i]; + RealType anchor_pos = coords[anchor_idx * COORDS_DIM + i]; + RealType du_dx_hat = FIXED_TO_FLOAT(sorted_du_dx[tid * COORDS_DIM + i]); + RealType w_i = params[atom_idx * PARAMS_DIM + 3]; + RealType dxhat_danchor = w_i; + RealType dxhat_datom = 1 - w_i; + RealType du_danchor = du_dx_hat * dxhat_danchor; + RealType du_datom = du_dx_hat * dxhat_datom; + + atomicAdd(du_dx_out + anchor_idx * COORDS_DIM + i, FLOAT_TO_FIXED_NONBONDED(du_danchor)); + atomicAdd(du_dx_out + atom_idx * COORDS_DIM + i, FLOAT_TO_FIXED_NONBONDED(du_datom)); + } +} + template void __global__ k_gather_coords_and_params( const int N, @@ -82,6 +179,38 @@ void __global__ k_gather_coords_and_params( } } +template +void __global__ k_smoothcore_gather_coords_and_params_zero_w( + const int N, + const unsigned int *__restrict__ idxs, + const RealType *__restrict__ coords, + const RealType *__restrict__ params, + RealType *__restrict__ gathered_coords, + RealType *__restrict__ gathered_params) { + static_assert(COORDS_DIM == 3); + static_assert(PARAMS_DIM == PARAMS_PER_ATOM); + int idx = blockIdx.x * blockDim.x + threadIdx.x; + if (idx >= N) { + return; + } + + const unsigned int atom_idx = idxs[idx]; + + // Coords have 3 dimensions, params have 4 +#pragma unroll COORDS_DIM + for (int i = 0; i < COORDS_DIM; i++) { + gathered_coords[idx * COORDS_DIM + i] = coords[atom_idx * COORDS_DIM + i]; + } +#pragma unroll PARAMS_DIM + for (int i = 0; i < PARAMS_DIM; i++) { + if (i == PARAMS_DIM - 1) { + gathered_params[idx * PARAMS_DIM + 3] = 0; // zero out w coords + } else { + gathered_params[idx * PARAMS_DIM + i] = params[atom_idx * PARAMS_DIM + i]; + } + } +} + template void __global__ k_scatter_accum( const int N, @@ -201,6 +330,7 @@ void __device__ v_nonbonded_unified( RealType d2ij = delta_x * delta_x + delta_y * delta_y + delta_z * delta_z; RealType delta_w; + // w coords seem messed up here - i.e. they're being double computed somehow if (ALCHEMICAL) { // (ytz): we are guaranteed that delta_w is zero if ALCHEMICAL == false delta_w = w_i - w_j; @@ -322,6 +452,10 @@ void __device__ v_nonbonded_unified( atomicAdd(du_dp + lj_param_idx_eps_j, g_epsj); atomicAdd(du_dp + w_param_idx_j, g_wj); } + + // if(atom_i_idx == 168 || atom_j_idx == 168) { + // printf("inner debug %d %d | %f %f %f %f\n", atom_i_idx, atom_j_idx, FIXED_TO_FLOAT_DU_DP(g_qi), FIXED_TO_FLOAT_DU_DP(g_qj), FIXED_TO_FLOAT_DU_DP(g_epsi), FIXED_TO_FLOAT_DU_DP(g_epsj)); + // } } } diff --git a/timemachine/cpp/src/smoothcore_nonbonded_interaction_group.cu b/timemachine/cpp/src/smoothcore_nonbonded_interaction_group.cu new file mode 100644 index 0000000000..5c6290c764 --- /dev/null +++ b/timemachine/cpp/src/smoothcore_nonbonded_interaction_group.cu @@ -0,0 +1,393 @@ +#include +#include +#include + +#include "device_buffer.hpp" +#include "energy_accumulation.hpp" +#include "fixed_point.hpp" +#include "gpu_utils.cuh" +#include "kernel_utils.cuh" +#include "kernels/k_indices.cuh" +#include "nonbonded_common.hpp" +#include "set_utils.hpp" +#include "smoothcore_nonbonded_interaction_group.hpp" + +#include "k_nonbonded.cuh" + +static const int STEPS_PER_SORT = 200; + +namespace timemachine { + +template +SmoothcoreNonbondedInteractionGroup::SmoothcoreNonbondedInteractionGroup( + const int N, + const std::vector &row_atom_idxs, + const std::vector &col_atom_idxs, + const std::vector &anchor_atom_idxs, + const double beta, + const double cutoff, + const bool disable_hilbert_sort, + const double nblist_padding) + : N_(N), NR_(row_atom_idxs.size()), NC_(col_atom_idxs.size()), + + kernel_ptrs_({// enumerate over every possible kernel combination + // Set threads to 1 if not computing energy to reduced unused shared memory + // U: Compute U + // X: Compute DU_DX + // P: Compute DU_DP + // U X P + &k_nonbonded_unified, + &k_nonbonded_unified, + &k_nonbonded_unified, + &k_nonbonded_unified, + &k_nonbonded_unified, + &k_nonbonded_unified, + &k_nonbonded_unified, + &k_nonbonded_unified}), + + beta_(beta), cutoff_(cutoff), steps_since_last_sort_(0), nblist_(N_), nblist_padding_(nblist_padding), + hilbert_sort_(nullptr), disable_hilbert_(disable_hilbert_sort) { + + this->validate_idxs(N_, row_atom_idxs, col_atom_idxs, false); + + cudaSafeMalloc(&d_col_atom_idxs_, N_ * sizeof(*d_col_atom_idxs_)); + cudaSafeMalloc(&d_row_atom_idxs_, N_ * sizeof(*d_row_atom_idxs_)); + cudaSafeMalloc(&d_anchor_idxs_, N_ * sizeof(*d_anchor_idxs_)); + cudaSafeMalloc(&d_u_buffer_, NONBONDED_KERNEL_BLOCKS * sizeof(*d_u_buffer_)); + + cudaSafeMalloc(&d_perm_, N_ * sizeof(*d_perm_)); + cudaSafeMalloc(&d_rescaled_x_, N_ * 3 * sizeof(*d_rescaled_x_)); + cudaSafeMalloc(&d_sorted_x_, N_ * 3 * sizeof(*d_sorted_x_)); + + cudaSafeMalloc(&d_sorted_p_, N_ * PARAMS_PER_ATOM * sizeof(*d_sorted_p_)); + cudaSafeMalloc(&d_sorted_du_dx_, N_ * 3 * sizeof(*d_sorted_du_dx_)); + cudaSafeMalloc(&d_sorted_du_dp_, N_ * PARAMS_PER_ATOM * sizeof(*d_sorted_du_dp_)); + + cudaSafeMalloc(&d_nblist_x_, N_ * 3 * sizeof(*d_nblist_x_)); + gpuErrchk(cudaMemset(d_nblist_x_, 0, N_ * 3 * sizeof(*d_nblist_x_))); // set non-sensical positions + cudaSafeMalloc(&d_nblist_box_, 3 * 3 * sizeof(*d_nblist_box_)); + gpuErrchk(cudaMemset(d_nblist_box_, 0, 3 * 3 * sizeof(*d_nblist_box_))); + cudaSafeMalloc(&d_rebuild_nblist_, 1 * sizeof(*d_rebuild_nblist_)); + gpuErrchk(cudaMallocHost(&p_rebuild_nblist_, 1 * sizeof(*p_rebuild_nblist_))); + + if (!disable_hilbert_) { + this->hilbert_sort_.reset(new HilbertSort(N_)); + } + + this->set_atom_idxs(row_atom_idxs, col_atom_idxs); + + gpuErrchk(cudaMemcpy(d_anchor_idxs_, &anchor_atom_idxs[0], N_ * sizeof(*d_anchor_idxs_), cudaMemcpyHostToDevice)); + + // Create event with timings disabled as timings slow down events + gpuErrchk(cudaEventCreateWithFlags(&nblist_flag_sync_event_, cudaEventDisableTiming)); +}; + +template SmoothcoreNonbondedInteractionGroup::~SmoothcoreNonbondedInteractionGroup() { + gpuErrchk(cudaFree(d_col_atom_idxs_)); + gpuErrchk(cudaFree(d_row_atom_idxs_)); + gpuErrchk(cudaFree(d_anchor_idxs_)); + + gpuErrchk(cudaFree(d_perm_)); + gpuErrchk(cudaFree(d_rescaled_x_)); + gpuErrchk(cudaFree(d_sorted_x_)); + gpuErrchk(cudaFree(d_u_buffer_)); + + gpuErrchk(cudaFree(d_sorted_p_)); + gpuErrchk(cudaFree(d_sorted_du_dx_)); + gpuErrchk(cudaFree(d_sorted_du_dp_)); + + gpuErrchk(cudaFree(d_nblist_x_)); + gpuErrchk(cudaFree(d_nblist_box_)); + gpuErrchk(cudaFree(d_rebuild_nblist_)); + gpuErrchk(cudaFreeHost(p_rebuild_nblist_)); + + gpuErrchk(cudaEventDestroy(nblist_flag_sync_event_)); +}; + +template bool SmoothcoreNonbondedInteractionGroup::needs_sort() { + return steps_since_last_sort_ % STEPS_PER_SORT == 0; +} + +template +void SmoothcoreNonbondedInteractionGroup::sort( + const double *d_coords, const double *d_box, cudaStream_t stream) { + // We must rebuild the neighborlist after sorting, as the neighborlist is tied to a particular sort order + if (!disable_hilbert_) { + this->hilbert_sort_->sort_device(NR_, d_row_atom_idxs_, d_coords, d_box, d_perm_, stream); + this->hilbert_sort_->sort_device(NC_, d_col_atom_idxs_, d_coords, d_box, d_perm_ + NR_, stream); + } else { + gpuErrchk(cudaMemcpyAsync( + d_perm_, d_row_atom_idxs_, NR_ * sizeof(*d_row_atom_idxs_), cudaMemcpyDeviceToDevice, stream)); + gpuErrchk(cudaMemcpyAsync( + d_perm_ + NR_, d_col_atom_idxs_, NC_ * sizeof(*d_col_atom_idxs_), cudaMemcpyDeviceToDevice, stream)); + } + gpuErrchk(cudaMemsetAsync(d_rebuild_nblist_, 1, sizeof(*d_rebuild_nblist_), stream)); + // Set the pinned memory to indicate that we need to rebuild + p_rebuild_nblist_[0] = 1; +} + +template +void SmoothcoreNonbondedInteractionGroup::execute_device( + const int N, + const int P, + const double *d_x, + const double *d_p, // N * PARAMS_PER_ATOM + const double *d_box, // 3 * 3 + unsigned long long *d_du_dx, + unsigned long long *d_du_dp, + __int128 *d_u, + cudaStream_t stream) { + + // (ytz) the nonbonded algorithm proceeds as follows: + + // (done in constructor), construct a hilbert curve mapping each of the HILBERT_GRID_DIM x HILBERT_GRID_DIM x HILBERT_GRID_DIM cells into an index. + // a. decide if we need to rebuild the neighborlist, if so: + // - look up which cell each particle belongs to, and its linear index along the hilbert curve. + // - use radix pair sort keyed on the hilbert index with values equal to the atomic index + // - resulting sorted values is the permutation array. + // - permute coords + // b. else: + // - permute new coords + // c. permute parameters + // d. compute the nonbonded interactions using the neighborlist + // e. inverse permute the forces, du/dps into the original index. + // f. u is buffered into a per-particle array, and then reduced. + + if (N != N_) { + throw std::runtime_error( + "SmoothcoreNonbondedInteractionGroup::execute_device(): expected N == N_, got N=" + std::to_string(N) + + ", N_=" + std::to_string(N_)); + } + + if (P != N_ * PARAMS_PER_ATOM) { + throw std::runtime_error( + "SmoothcoreNonbondedInteractionGroup::execute_device(): expected P == N_*" + + std::to_string(PARAMS_PER_ATOM) + ", got P=" + std::to_string(P) + ", N_*" + + std::to_string(PARAMS_PER_ATOM) + "=" + std::to_string(N_ * PARAMS_PER_ATOM)); + } + + // If the size of the row or cols is none, exit + if (NR_ == 0 || NC_ == 0) { + return; + } + + // initialize rescaled coordinates + gpuErrchk(cudaMemcpyAsync(d_rescaled_x_, d_x, N_ * 3 * sizeof(*d_rescaled_x_), cudaMemcpyDeviceToDevice, stream)); + + const int tpb = DEFAULT_THREADS_PER_BLOCK; + const int B = ceil_divide(N_, tpb); + + k_smoothcore_rescale_coords + <<>>(N_, nblist_.get_row_idxs(), d_anchor_idxs_, d_x, d_p, d_rescaled_x_); + gpuErrchk(cudaPeekAtLastError()); + + const int K = NR_ + NC_; // total number of interactions + const int B_K = ceil_divide(K, tpb); + + if (this->needs_sort()) { + // Sorting always triggers a neighborlist rebuild + this->sort(d_rescaled_x_, d_box, stream); + } else { + // (ytz) see if we need to rebuild the neighborlist. + // Reuse the d_perm_ here to avoid having to make two kernels calls. + k_check_rebuild_coords_and_box_gather<<>>( + NR_ + NC_, d_perm_, d_rescaled_x_, d_nblist_x_, d_box, d_nblist_box_, nblist_padding_, d_rebuild_nblist_); + gpuErrchk(cudaPeekAtLastError()); + // we can optimize this away by doing the check on the GPU directly. + gpuErrchk(cudaMemcpyAsync( + p_rebuild_nblist_, d_rebuild_nblist_, 1 * sizeof(*p_rebuild_nblist_), cudaMemcpyDeviceToHost, stream)); + gpuErrchk(cudaEventRecord(nblist_flag_sync_event_, stream)); + } + + // compute new coordinates/params + k_smoothcore_gather_coords_and_params_zero_w + <<>>(K, d_perm_, d_rescaled_x_, d_p, d_sorted_x_, d_sorted_p_); + gpuErrchk(cudaPeekAtLastError()); + // reset buffers and sorted accumulators + if (d_du_dx || d_du_dp) { + gpuErrchk(cudaMemsetAsync(d_sorted_du_dx_, 0, K * 3 * sizeof(*d_sorted_du_dx_), stream)); + } + if (d_du_dp) { + gpuErrchk(cudaMemsetAsync(d_sorted_du_dp_, 0, K * PARAMS_PER_ATOM * sizeof(*d_sorted_du_dp_), stream)); + } + + // Syncing to an event allows having additional kernels run while we synchronize + // Note that if no event is recorded, this is effectively a no-op, such as in the case of sorting. + gpuErrchk(cudaEventSynchronize(nblist_flag_sync_event_)); + if (p_rebuild_nblist_[0] > 0) { + nblist_.build_nblist_device(K, d_sorted_x_, d_box, cutoff_ + nblist_padding_, stream); + gpuErrchk(cudaMemsetAsync(d_rebuild_nblist_, 0, sizeof(*d_rebuild_nblist_), stream)); + gpuErrchk(cudaMemcpyAsync( + d_nblist_x_, d_rescaled_x_, N * 3 * sizeof(*d_rescaled_x_), cudaMemcpyDeviceToDevice, stream)); + gpuErrchk(cudaMemcpyAsync(d_nblist_box_, d_box, 3 * 3 * sizeof(*d_box), cudaMemcpyDeviceToDevice, stream)); + } + + // look up which kernel we need for this computation + int kernel_idx = 0; + kernel_idx |= d_du_dp ? 1 << 0 : 0; + kernel_idx |= (d_du_dx || d_du_dp) ? 1 << 1 : 0; // we need forces if we request d_du_dp + kernel_idx |= d_u ? 1 << 2 : 0; + + // note that these kernels will no longer trigger the "ALCHEMICAL=1" template option since we've set + // the w-params to be strictly 0. + kernel_ptrs_[kernel_idx]<<>>( + K, + nblist_.get_num_row_idxs(), + nblist_.get_ixn_count(), + d_sorted_x_, + d_sorted_p_, + d_box, + beta_, + cutoff_, + nblist_.get_row_idxs(), + nblist_.get_ixn_tiles(), + nblist_.get_ixn_atoms(), + d_sorted_du_dx_, + d_sorted_du_dp_, + d_u == nullptr ? nullptr : d_u_buffer_ // switch to nullptr if we don't request energies + ); + gpuErrchk(cudaPeekAtLastError()); + + // coords are (N,3) + if (d_du_dx) { + k_smoothcore_rescale_and_accum_du_dx + <<>>(N_, d_anchor_idxs_, d_x, d_p, d_perm_, d_sorted_du_dx_, d_du_dx); + gpuErrchk(cudaPeekAtLastError()); + } + + // params are (N, PARAMS_PER_ATOM) + if (d_du_dp) { + k_smoothcore_rescale_and_accum_du_dp + <<>>(N_, d_anchor_idxs_, d_x, d_p, d_perm_, d_sorted_du_dp_, d_sorted_du_dx_, d_du_dp); + gpuErrchk(cudaPeekAtLastError()); + } + if (d_u) { + accumulate_energy(NONBONDED_KERNEL_BLOCKS, d_u_buffer_, d_u, stream); + } + // Increment steps + steps_since_last_sort_++; +} + +template +void SmoothcoreNonbondedInteractionGroup::set_atom_idxs( + const std::vector &row_atom_idxs, const std::vector &col_atom_idxs) { + + this->validate_idxs(N_, row_atom_idxs, col_atom_idxs, true); + + std::vector unsigned_row_idxs = std::vector(row_atom_idxs.begin(), row_atom_idxs.end()); + std::set unique_row_atom_idxs(unique_idxs(unsigned_row_idxs)); + + std::vector row_atom_idxs_v(set_to_vector(unique_row_atom_idxs)); + std::vector col_atom_idxs_v(col_atom_idxs.begin(), col_atom_idxs.end()); + + cudaStream_t stream = static_cast(0); + if (row_atom_idxs_v.size() == 0 || row_atom_idxs_v.size() == N_) { + this->set_atom_idxs_device(col_atom_idxs_v.size(), row_atom_idxs_v.size(), nullptr, nullptr, stream); + } else { + DeviceBuffer d_col(col_atom_idxs_v); + DeviceBuffer d_row(row_atom_idxs_v); + + this->set_atom_idxs_device(col_atom_idxs_v.size(), row_atom_idxs_v.size(), d_col.data, d_row.data, stream); + } + gpuErrchk(cudaStreamSynchronize(stream)); +} + +// set_atom_idxs_device is for use when idxs exist on the GPU already and are used as the new idxs to compute the neighborlist on. +template +void SmoothcoreNonbondedInteractionGroup::set_atom_idxs_device( + const int NC, + const int NR, + unsigned int *d_in_column_idxs, + unsigned int *d_in_row_idxs, + const cudaStream_t stream) { + + if (NC + NR > N_) { + throw std::runtime_error("number of idxs must be less than or equal to N"); + } + if (NR > 0 && NC > 0) { + const size_t tpb = DEFAULT_THREADS_PER_BLOCK; + + // The indices must already be on the GPU and are copied into the potential's buffers. + gpuErrchk(cudaMemcpyAsync( + d_col_atom_idxs_, d_in_column_idxs, NC * sizeof(*d_col_atom_idxs_), cudaMemcpyDeviceToDevice, stream)); + gpuErrchk(cudaMemcpyAsync( + d_row_atom_idxs_, d_in_row_idxs, NR * sizeof(*d_row_atom_idxs_), cudaMemcpyDeviceToDevice, stream)); + + // The neighborlist does not use the indices directly, rather it takes a contiguous set of indices and the ixn group + // potential will resort the correct particles into the corresponding arrays. We can use the leftover spaces in the + // two d_*_atom_idxs_ arrays to store these nblist indices. + // NOTE: The leftover column indices will store the row indices and vice versa. + k_arange<<>>(NR, d_col_atom_idxs_ + NC); + gpuErrchk(cudaPeekAtLastError()); + k_arange<<>>(NC, d_row_atom_idxs_ + NR, NR); + gpuErrchk(cudaPeekAtLastError()); + + // Resize the nblist + nblist_.resize_device(NC + NR, stream); + + // Force a NBlist rebuild + gpuErrchk(cudaMemsetAsync(d_rebuild_nblist_, 1, 1 * sizeof(*d_rebuild_nblist_), stream)); + + // Offset into the ends of the arrays that now contain the row and column indices for the nblist + nblist_.set_idxs_device(NC, NR, d_row_atom_idxs_ + NR, d_col_atom_idxs_ + NC, stream); + } + + // Update the row and column counts + this->NR_ = NR; + this->NC_ = NC; + // Reset the steps so that we do a new sort + this->steps_since_last_sort_ = 0; +} + +template +void SmoothcoreNonbondedInteractionGroup::du_dp_fixed_to_float( + const int N, const int P, const unsigned long long *du_dp, double *du_dp_float) { + + for (int i = 0; i < N; i++) { + const int idx = i * PARAMS_PER_ATOM; + const int idx_charge = idx + PARAM_OFFSET_CHARGE; + const int idx_sig = idx + PARAM_OFFSET_SIG; + const int idx_eps = idx + PARAM_OFFSET_EPS; + const int idx_w = idx + PARAM_OFFSET_W; + + du_dp_float[idx_charge] = FIXED_TO_FLOAT_DU_DP(du_dp[idx_charge]); + du_dp_float[idx_sig] = FIXED_TO_FLOAT_DU_DP(du_dp[idx_sig]); + du_dp_float[idx_eps] = FIXED_TO_FLOAT_DU_DP(du_dp[idx_eps]); + du_dp_float[idx_w] = FIXED_TO_FLOAT_DU_DP(du_dp[idx_w]); + } +} + +template +void SmoothcoreNonbondedInteractionGroup::validate_idxs( + const int N, const std::vector &row_atom_idxs, const std::vector &col_atom_idxs, const bool allow_empty) { + + if (!allow_empty) { + if (row_atom_idxs.size() == 0) { + throw std::runtime_error("row_atom_idxs must be nonempty"); + } + if (col_atom_idxs.size() == 0) { + throw std::runtime_error("col_atom_idxs must be nonempty"); + } + if (row_atom_idxs.size() == static_cast(N)) { + throw std::runtime_error("must be less then N(" + std::to_string(N) + ") row indices"); + } + if (col_atom_idxs.size() == static_cast(N)) { + throw std::runtime_error("must be less then N(" + std::to_string(N) + ") col indices"); + } + } + verify_atom_idxs(N, row_atom_idxs, allow_empty); + verify_atom_idxs(N, col_atom_idxs, allow_empty); + + // row and col idxs must be disjoint + std::set unique_row_idxs(row_atom_idxs.begin(), row_atom_idxs.end()); + for (int col_atom_idx : col_atom_idxs) { + if (unique_row_idxs.find(col_atom_idx) != unique_row_idxs.end()) { + throw std::runtime_error("row and col indices must be disjoint"); + } + } +} + +template class SmoothcoreNonbondedInteractionGroup; +template class SmoothcoreNonbondedInteractionGroup; + +} // namespace timemachine diff --git a/timemachine/cpp/src/smoothcore_nonbonded_interaction_group.hpp b/timemachine/cpp/src/smoothcore_nonbonded_interaction_group.hpp new file mode 100644 index 0000000000..4bf84e194e --- /dev/null +++ b/timemachine/cpp/src/smoothcore_nonbonded_interaction_group.hpp @@ -0,0 +1,103 @@ +#pragma once + +#include "hilbert_sort.hpp" +#include "neighborlist.hpp" +#include "nonbonded_common.hpp" +#include "potential.hpp" +#include +#include +#include +#include + +namespace timemachine { + +template class SmoothcoreNonbondedInteractionGroup : public Potential { + +private: + const int N_; // total number of atoms, i.e. first dimension of input coords, params + int NR_; // number of row atoms + int NC_; // number of column atoms + + std::array kernel_ptrs_; + + unsigned int *d_col_atom_idxs_; + unsigned int *d_row_atom_idxs_; + unsigned int *d_anchor_idxs_; + int *p_ixn_count_; // pinned memory + + double beta_; + double cutoff_; + // This is safe to overflow, either reset to 0 or increment + unsigned int steps_since_last_sort_; + Neighborlist nblist_; + + const double nblist_padding_; + __int128 *d_u_buffer_; // [NONBONDED_KERNEL_BLOCKS] + double *d_nblist_x_; // coords which were used to compute the nblist + double *d_nblist_box_; // box which was used to rebuild the nblist + int *d_rebuild_nblist_; // whether or not we have to rebuild the nblist + int *p_rebuild_nblist_; // pinned + + unsigned int *d_perm_; // hilbert curve permutation + + // "sorted" means + // - if hilbert sorting enabled, atoms are sorted into contiguous + // blocks by interaction group, and each block is hilbert-sorted + // independently + // - otherwise, atoms are sorted into contiguous blocks by + // interaction group, with arbitrary ordering within each block + double *d_rescaled_x_; // rescaled coordinates + double *d_sorted_x_; // sorted coordinates + double *d_sorted_p_; // sorted parameters + unsigned long long *d_sorted_du_dx_; + unsigned long long *d_sorted_du_dp_; + + std::unique_ptr hilbert_sort_; + + cudaEvent_t nblist_flag_sync_event_; // Event to synchronize rebuild flag on + + const bool disable_hilbert_; + + bool needs_sort(); + + void sort(const double *d_x, const double *d_box, cudaStream_t stream); + + void validate_idxs( + const int N, + const std::vector &row_atom_idxs, + const std::vector &col_atom_idxs, + const bool allow_empty); + +public: + void set_atom_idxs_device( + const int NC, const int NR, unsigned int *d_column_idxs, unsigned int *d_row_idxs, const cudaStream_t stream); + + void set_atom_idxs(const std::vector &row_atom_idxs, const std::vector &col_atom_idxs); + + SmoothcoreNonbondedInteractionGroup( + const int N, + const std::vector &row_atom_idxs, + const std::vector &col_atom_idxs, + const std::vector &anchor_idxs, + const double beta, + const double cutoff, + const bool disable_hilbert_sort = false, + const double nblist_padding = 0.1); + + ~SmoothcoreNonbondedInteractionGroup(); + + virtual void execute_device( + const int N, + const int P, + const double *d_x, + const double *d_p, + const double *d_box, + unsigned long long *d_du_dx, + unsigned long long *d_du_dp, + __int128 *d_u, + cudaStream_t stream) override; + + void du_dp_fixed_to_float(const int N, const int P, const unsigned long long *du_dp, double *du_dp_float) override; +}; + +} // namespace timemachine diff --git a/timemachine/cpp/src/wrap_kernels.cpp b/timemachine/cpp/src/wrap_kernels.cpp index e592178b74..af42874068 100644 --- a/timemachine/cpp/src/wrap_kernels.cpp +++ b/timemachine/cpp/src/wrap_kernels.cpp @@ -40,6 +40,7 @@ #include "segmented_sumexp.hpp" #include "segmented_weighted_random_sampler.hpp" #include "set_utils.hpp" +#include "smoothcore_nonbonded_interaction_group.hpp" #include "summed_potential.hpp" #include "tibd_exchange_move.hpp" #include "translations.hpp" @@ -1582,6 +1583,95 @@ template void declare_nonbonded_interaction_group(py::module )pbdoc"); } +template void declare_smoothcore_nonbonded_interaction_group(py::module &m, const char *typestr) { + using Class = SmoothcoreNonbondedInteractionGroup; + std::string pyclass_name = std::string("SmoothcoreNonbondedInteractionGroup_") + typestr; + py::class_, Potential>( + m, pyclass_name.c_str(), py::buffer_protocol(), py::dynamic_attr()) + .def( + "set_atom_idxs", + &SmoothcoreNonbondedInteractionGroup::set_atom_idxs, + py::arg("row_atom_idxs"), + py::arg("col_atom_idxs"), + R"pbdoc( + Set up the atom idxs for the SmoothcoreNonbondedInteractionGroup. + The interaction is defined between two groups of atom idxs, + `row_atom_idxs` and `col_atom_idxs`. These should be a disjoint + list of idxs. + + Parameters + ---------- + row_atom_idxs: NDArray + First group of atoms in the interaction. + + col_atom_idxs: NDArray + Second group of atoms in the interaction. + + )pbdoc") + .def( + py::init([](const int N, + const py::array_t &row_atom_idxs_i, + const py::array_t &anchor_idxs_i, + const double beta, + const double cutoff, + std::optional> &col_atom_idxs_i, + const bool disable_hilbert_sort, + const double nblist_padding) { + std::vector row_atom_idxs = py_array_to_vector(row_atom_idxs_i); + std::vector anchor_idxs = py_array_to_vector(anchor_idxs_i); + + std::vector col_atom_idxs; + if (col_atom_idxs_i) { + col_atom_idxs.resize(col_atom_idxs_i->size()); + std::memcpy(col_atom_idxs.data(), col_atom_idxs_i->data(), col_atom_idxs_i->size() * sizeof(int)); + } else { + std::set unique_row_atom_idxs(unique_idxs(row_atom_idxs)); + col_atom_idxs = get_indices_difference(N, unique_row_atom_idxs); + } + + return new SmoothcoreNonbondedInteractionGroup( + N, row_atom_idxs, col_atom_idxs, anchor_idxs, beta, cutoff, disable_hilbert_sort, nblist_padding); + }), + py::arg("num_atoms"), + py::arg("row_atom_idxs_i"), + py::arg("anchor_idxs_i"), + py::arg("beta"), + py::arg("cutoff"), + py::arg("col_atom_idxs_i") = py::none(), + py::arg("disable_hilbert_sort") = false, + py::arg("nblist_padding") = 0.1, + R"pbdoc( + Set up the SmoothcoreNonbondedInteractionGroup. + + Parameters + ---------- + num_atoms: int + Number of atoms. + + row_atom_idxs: NDArray + First group of atoms in the interaction. + + anchor_idxs: NDArray + First group of atoms in the interaction. + + beta: float + + cutoff: float + Ignore all interactions beyond this distance in nm. + + col_atom_idxs: Optional[NDArray] + Second group of atoms in the interaction. If not specified, + use all of the atoms not in the `row_atom_idxs`. + + disable_hilbert_sort: bool + Set to True to disable the Hilbert sort. + + nblist_padding: float + Margin for the neighborlist. + + )pbdoc"); +} + template void declare_nonbonded_pair_list(py::module &m, const char *typestr) { using Class = NonbondedPairList; std::string pyclass_name; @@ -2235,6 +2325,9 @@ PYBIND11_MODULE(custom_ops, m) { declare_nonbonded_interaction_group(m, "f64"); declare_nonbonded_interaction_group(m, "f32"); + declare_smoothcore_nonbonded_interaction_group(m, "f64"); + declare_smoothcore_nonbonded_interaction_group(m, "f32"); + declare_nonbonded_precomputed(m, "f64"); declare_nonbonded_precomputed(m, "f32"); diff --git a/timemachine/fe/free_energy.py b/timemachine/fe/free_energy.py index 6aa4195294..9418c3ba07 100644 --- a/timemachine/fe/free_energy.py +++ b/timemachine/fe/free_energy.py @@ -892,6 +892,7 @@ def run_sims_sequential( for initial_state in initial_states: # run simulation traj = sample(initial_state, md_params, max_buffer_frames=100) + print(f"completed simulation at lambda={initial_state.lamb}!") # keep samples from any requested states in memory @@ -904,6 +905,7 @@ def run_sims_sequential( # analysis that depends on current and previous state if prev_state: state_pair = [prev_state, state] + u_kln_by_component = compute_energy_decomposed_u_kln(state_pair) u_kln_by_component_by_lambda.append(u_kln_by_component) @@ -992,6 +994,8 @@ def get_state(lamb: float) -> EnergyDecomposedState[StoredArrays]: assert_potentials_compatible(initial_state.potentials, potentials_0) traj = get_samples(lamb) + + print(f"traj at lambda={initial_state.lamb} coords max {np.amax(traj.frames)}, min {np.min(traj.frames)}") batch_u_fns = get_batch_u_fns(unbound_impls, [p.params for p in initial_state.potentials], temperature) return EnergyDecomposedState(traj.frames, traj.boxes, batch_u_fns) diff --git a/timemachine/fe/rbfe.py b/timemachine/fe/rbfe.py index 644f4c6a58..4b6d348735 100644 --- a/timemachine/fe/rbfe.py +++ b/timemachine/fe/rbfe.py @@ -534,6 +534,7 @@ def estimate_relative_free_energy_bisection( n_windows: Optional[int] = None, min_overlap: Optional[float] = None, min_cutoff: Optional[float] = 0.7, + use_smoothcore: Optional[bool] = False, ) -> SimulationResult: r"""Estimate relative free energy between mol_a and mol_b via independent simulations with a dynamic lambda schedule determined by successively bisecting the lambda interval between the pair of states with the greatest BAR @@ -588,7 +589,7 @@ def estimate_relative_free_energy_bisection( n_windows = DEFAULT_NUM_WINDOWS assert n_windows >= 2 - single_topology = SingleTopology(mol_a, mol_b, core, ff) + single_topology = SingleTopology(mol_a, mol_b, core, ff, use_smoothcore) lambda_interval = lambda_interval or (0.0, 1.0) lambda_min, lambda_max = lambda_interval[0], lambda_interval[1] diff --git a/timemachine/fe/single_topology.py b/timemachine/fe/single_topology.py index 0cb73afc69..9c2614cb90 100644 --- a/timemachine/fe/single_topology.py +++ b/timemachine/fe/single_topology.py @@ -32,7 +32,6 @@ HarmonicAngleStable, HarmonicBond, Nonbonded, - NonbondedInteractionGroup, NonbondedPairListPrecomputed, PeriodicTorsion, ) @@ -1121,6 +1120,66 @@ def __init__(self, mol_a, mol_b, core): self.c_to_a = {v: k for k, v in enumerate(self.a_to_c)} self.c_to_b = {v: k for k, v in enumerate(self.b_to_c)} + def mol_a_idxs(self): + """ + Indices in the combined mol that correspond to an atom in mol_a + """ + idxs = [] + for c_idx, flag in enumerate(self.c_flags): + if flag == AtomMapFlags.CORE or flag == AtomMapFlags.MOL_A: + idxs.append(c_idx) + return np.array(idxs, dtype=np.int32) + + def mol_b_idxs(self): + """ + Indices in the combined mol that correspond to an atom in mol_b + """ + idxs = [] + for c_idx, flag in enumerate(self.c_flags): + if flag == AtomMapFlags.CORE or flag == AtomMapFlags.MOL_B: + idxs.append(c_idx) + return np.array(idxs, dtype=np.int32) + + def core_idxs(self): + """ + Indices corresponding to non-core atoms + """ + idxs = [] + for c_idx, flag in enumerate(self.c_flags): + if flag == AtomMapFlags.CORE: + idxs.append(c_idx) + return np.array(idxs, dtype=np.int32) + + def dummy_idxs(self): + """ + Indices corresponding to non-core atoms + """ + idxs = [] + for c_idx, flag in enumerate(self.c_flags): + if flag != AtomMapFlags.CORE: + idxs.append(c_idx) + return np.array(idxs, dtype=np.int32) + + def dummy_a_idxs(self): + """ + Indices corresponding to non-core atoms in mol_a + """ + idxs = [] + for c_idx, flag in enumerate(self.c_flags): + if flag == AtomMapFlags.MOL_A: + idxs.append(c_idx) + return np.array(idxs, dtype=np.int32) + + def dummy_b_idxs(self): + """ + Indices corresponding to non-core atoms in mol_b + """ + idxs = [] + for c_idx, flag in enumerate(self.c_flags): + if flag == AtomMapFlags.MOL_B: + idxs.append(c_idx) + return np.array(idxs, dtype=np.int32) + def get_num_atoms(self): """ Get the total number of atoms in the alchemical hybrid. @@ -1143,6 +1202,65 @@ def get_num_dummy_atoms(self): """ return self.mol_a.GetNumAtoms() + self.mol_b.GetNumAtoms() - len(self.core) - len(self.core) + def combine_confs(self, x_a, x_b, lamb=1.0): + """ + Combine conformations of two molecules. + + TODO: interpolate confs based on the lambda value? + + Parameters + ---------- + x_a: np.array of shape (N_A,3) + First conformation + + x_b: np.array of shape (N_B,3) + Second conformation + + lamb: optional float + if lamb > 0.5, map atoms from x_a first, then overwrite with x_b, + otherwise use opposite order + + Returns + ------- + np.array of shape (self.num_atoms,3) + Combined conformation + + """ + if lamb < 0.5: + return self.combine_confs_lhs(x_a, x_b) + else: + return self.combine_confs_rhs(x_a, x_b) + + def combine_confs_rhs(self, x_a, x_b): + """ + Combine x_a and x_b conformations for lambda=1 + """ + # place a first, then b overrides a + assert x_a.shape == (self.mol_a.GetNumAtoms(), 3) + assert x_b.shape == (self.mol_b.GetNumAtoms(), 3) + x0 = np.zeros((self.get_num_atoms(), 3)) + for src, dst in enumerate(self.a_to_c): + x0[dst] = x_a[src] + for src, dst in enumerate(self.b_to_c): + x0[dst] = x_b[src] + + return x0 + + def combine_confs_lhs(self, x_a, x_b): + """ + Combine x_a and x_b conformations for lambda=0 + """ + # place b first, then a overrides b + assert x_a.shape == (self.mol_a.GetNumAtoms(), 3) + assert x_b.shape == (self.mol_b.GetNumAtoms(), 3) + x0 = np.zeros((self.get_num_atoms(), 3)) + for src, dst in enumerate(self.b_to_c): + x0[dst] = x_b[src] + for src, dst in enumerate(self.a_to_c): + x0[dst] = x_a[src] + + return x0 + _Bonded = TypeVar("_Bonded", bound=Union[ChiralAtomRestraint, HarmonicAngleStable, HarmonicBond, PeriodicTorsion]) @@ -1313,7 +1431,7 @@ def check_chiral_validity( class SingleTopology(AtomMapMixin): - def __init__(self, mol_a, mol_b, core, forcefield): + def __init__(self, mol_a, mol_b, core, forcefield, use_smoothcore=False): """ SingleTopology combines two molecules through a common core. The combined mol has atom indices laid out such that mol_a is identically mapped to the combined mol indices. @@ -1365,6 +1483,25 @@ def __init__(self, mol_a, mol_b, core, forcefield): self.dst_system.bond.potential.idxs, ) + self.use_smoothcore = use_smoothcore + + def get_smoothcore_ligand_anchor_idxs(self): + ligand_anchor_idxs = np.zeros(self.get_num_atoms(), dtype=np.int32) + + for c_idx, c_flag in enumerate(self.c_flags): + if c_flag == AtomMapFlags.CORE: + ligand_anchor_idxs[c_idx] = c_idx + + for anchor, dummy_group in self.dummy_groups_ab.items(): + for dummy_atom in dummy_group: + ligand_anchor_idxs[self.b_to_c[dummy_atom]] = self.b_to_c[anchor] + + for anchor, dummy_group in self.dummy_groups_ba.items(): + for dummy_atom in dummy_group: + ligand_anchor_idxs[self.a_to_c[dummy_atom]] = self.a_to_c[anchor] + + return ligand_anchor_idxs + def combine_masses(self, use_hmr=False): """ Combine masses between two end-states by taking the heavier of the two core atoms. @@ -1408,65 +1545,6 @@ def combine_masses(self, use_hmr=False): return mol_c_masses - def combine_confs(self, x_a, x_b, lamb=1.0): - """ - Combine conformations of two molecules. - - TODO: interpolate confs based on the lambda value? - - Parameters - ---------- - x_a: np.array of shape (N_A,3) - First conformation - - x_b: np.array of shape (N_B,3) - Second conformation - - lamb: optional float - if lamb > 0.5, map atoms from x_a first, then overwrite with x_b, - otherwise use opposite order - - Returns - ------- - np.array of shape (self.num_atoms,3) - Combined conformation - - """ - if lamb < 0.5: - return self.combine_confs_lhs(x_a, x_b) - else: - return self.combine_confs_rhs(x_a, x_b) - - def combine_confs_rhs(self, x_a, x_b): - """ - Combine x_a and x_b conformations for lambda=1 - """ - # place a first, then b overrides a - assert x_a.shape == (self.mol_a.GetNumAtoms(), 3) - assert x_b.shape == (self.mol_b.GetNumAtoms(), 3) - x0 = np.zeros((self.get_num_atoms(), 3)) - for src, dst in enumerate(self.a_to_c): - x0[dst] = x_a[src] - for src, dst in enumerate(self.b_to_c): - x0[dst] = x_b[src] - - return x0 - - def combine_confs_lhs(self, x_a, x_b): - """ - Combine x_a and x_b conformations for lambda=0 - """ - # place b first, then a overrides b - assert x_a.shape == (self.mol_a.GetNumAtoms(), 3) - assert x_b.shape == (self.mol_b.GetNumAtoms(), 3) - x0 = np.zeros((self.get_num_atoms(), 3)) - for src, dst in enumerate(self.b_to_c): - x0[dst] = x_b[src] - for src, dst in enumerate(self.a_to_c): - x0[dst] = x_a[src] - - return x0 - def _setup_end_state_src(self): """ Setup the source end-state, where mol_a is fully interacting, with mol_b's dummy atoms attached @@ -1777,6 +1855,34 @@ def mol(self, lamb, min_bond_k=100.0) -> Chem.Mol: # make read-only return Chem.Mol(mol) + def _get_smoothcore_guest_params(self, q_handle, lj_handle, lamb: float): + mol_a_charges = q_handle.parameterize(self.mol_a) + mol_b_charges = q_handle.parameterize(self.mol_b) + mol_a_lj = lj_handle.parameterize(self.mol_a) + mol_b_lj = lj_handle.parameterize(self.mol_b) + mol_a_idxs = self.mol_a_idxs() + mol_b_idxs = self.mol_b_idxs() + + charges_src = np.zeros(self.get_num_atoms()) + charges_dst = np.zeros(self.get_num_atoms()) + charges_src[mol_a_idxs] = mol_a_charges[np.array([self.c_to_a[x] for x in mol_a_idxs])] + charges_dst[mol_b_idxs] = mol_b_charges[np.array([self.c_to_b[x] for x in mol_b_idxs])] + + lj_src = np.zeros((self.get_num_atoms(), 2)) + lj_dst = np.zeros((self.get_num_atoms(), 2)) + lj_src[mol_a_idxs] = mol_a_lj[np.array([self.c_to_a[x] for x in mol_a_idxs])] + lj_dst[mol_b_idxs] = mol_b_lj[np.array([self.c_to_b[x] for x in mol_b_idxs])] + + from timemachine.potentials.nonbonded import smoothcore_ligand_param_interpolation + + np.testing.assert_almost_equal(np.sum(charges_src), np.sum(charges_dst), decimal=5) + + ligand_params = smoothcore_ligand_param_interpolation(lamb, charges_src, charges_dst, lj_src, lj_dst, self) + + np.testing.assert_almost_equal(np.sum(charges_src), np.sum(ligand_params[:, 0]), decimal=5) + + return ligand_params + def _get_guest_params(self, q_handle, lj_handle, lamb: float, cutoff: float) -> jax.Array: """ Return an array containing the guest_charges, guest_sigmas, guest_epsilons, guest_w_coords @@ -1861,16 +1967,14 @@ def _parameterize_host_nonbonded(self, host_nonbonded: BoundPotential[Nonbonded] return combined_nonbonded.bind(hg_nb_params) - def _parameterize_host_guest_nonbonded_ixn( + def _parameterize_unbound_host_guest_nonbonded_ixn( self, lamb, host_nonbonded: BoundPotential[Nonbonded], num_water_atoms: int - ) -> BoundPotential[NonbondedInteractionGroup]: + ) -> tuple: """Parameterize nonbonded interactions between the host and guest""" num_host_atoms = host_nonbonded.params.shape[0] num_guest_atoms = self.get_num_atoms() cutoff = host_nonbonded.potential.cutoff - guest_ixn_env_params = self._get_guest_params(self.ff.q_handle, self.ff.lj_handle, lamb, cutoff) - # L-W terms num_other_atoms = num_host_atoms - num_water_atoms @@ -1886,17 +1990,39 @@ def get_other_idxs(): def get_env_idxs(): return np.array(list(get_other_idxs()) + list(get_water_idxs()), dtype=np.int32) - ixn_pot, ixn_params = get_ligand_ixn_pots_params( - get_lig_idxs(), - get_env_idxs(), - host_nonbonded.params, - guest_ixn_env_params, - beta=host_nonbonded.potential.beta, - cutoff=cutoff, - ) + from timemachine.fe.topology import get_smoothcore_ligand_ixn_pots_params + + if self.use_smoothcore: + print("WARNING: Using smoothcore nonbonded interactions...") + guest_ixn_env_params = self._get_smoothcore_guest_params(self.ff.q_handle, self.ff.lj_handle, lamb) + ixn_pot, ixn_params = get_smoothcore_ligand_ixn_pots_params( + get_lig_idxs(), + self.get_smoothcore_ligand_anchor_idxs(), + get_env_idxs(), + host_nonbonded.params, + guest_ixn_env_params, + beta=host_nonbonded.potential.beta, + cutoff=cutoff, + ) + else: + guest_ixn_env_params = self._get_guest_params(self.ff.q_handle, self.ff.lj_handle, lamb, cutoff) + ixn_pot, ixn_params = get_ligand_ixn_pots_params( + get_lig_idxs(), + get_env_idxs(), + host_nonbonded.params, + guest_ixn_env_params, + beta=host_nonbonded.potential.beta, + cutoff=cutoff, + ) + + return ixn_pot, ixn_params + + def _parameterize_host_guest_nonbonded_ixn( + self, lamb, host_nonbonded: BoundPotential[Nonbonded], num_water_atoms: int + ): + ixn_pot, ixn_params = self._parameterize_unbound_host_guest_nonbonded_ixn(lamb, host_nonbonded, num_water_atoms) - bound_ixn_pot = ixn_pot.bind(ixn_params) - return bound_ixn_pot + return ixn_pot.bind(ixn_params) def combine_with_host(self, host_system: VacuumSystem, lamb: float, num_water_atoms: int) -> HostGuestSystem: """ @@ -1915,7 +2041,7 @@ def combine_with_host(self, host_system: VacuumSystem, lamb: float, num_water_at Parameters ---------- host_system: VacuumSystem - Parameterized system of the host + Parameterized host system. lamb: float Which lambda value we want to generate the combined system. diff --git a/timemachine/fe/topology.py b/timemachine/fe/topology.py index 23c4725686..8702935acb 100644 --- a/timemachine/fe/topology.py +++ b/timemachine/fe/topology.py @@ -696,7 +696,7 @@ def get_ligand_ixn_pots_params( guest_params_ixn_env: Params, beta=2.0, cutoff=1.2, -) -> Tuple[potentials.NonbondedInteractionGroup, Params]: +): """ Return the interaction group potentials and corresponding parameters for the ligand-water and ligand-protein interaction terms. @@ -725,7 +725,7 @@ def get_ligand_ixn_pots_params( num_lig_atoms = len(lig_idxs) num_total_atoms = num_lig_atoms + len(env_idxs) - hg_ixn_pot = potentials.NonbondedInteractionGroup( + ixn_pot = potentials.NonbondedInteractionGroup( num_total_atoms, lig_idxs, beta, @@ -734,4 +734,63 @@ def get_ligand_ixn_pots_params( ) hg_ixn_params = jnp.concatenate([host_nb_params, guest_params_ixn_env]) - return hg_ixn_pot, hg_ixn_params + + return ixn_pot, hg_ixn_params + + +def get_smoothcore_ligand_ixn_pots_params( + lig_idxs: NDArray, + lig_anchor_idxs: NDArray, + env_idxs: Optional[NDArray], + host_nb_params: Params, + guest_params_ixn_env: Params, + beta=2.0, + cutoff=1.2, +): + """ + Return the interaction group potentials and corresponding parameters + for the ligand-water and ligand-protein interaction terms. + + Parameters + ---------- + lig_idxs: + List of ligand indexes (dtype, np.int32), one for each ligand. + + lig_anchor_idxs: + List of ligand anchor indices denoting the origin for each atom. + + env_idxs: + Indexes for the environment atoms including waters. + May be None if there are no other atoms. + + host_nb_params: + Nonbonded parameters for the host (environment) atoms. + + guest_params_ixn_env: + Parameters for the guest (ligand) NB interactions with the + environment atoms. + """ + + # Init + env_idxs = env_idxs if env_idxs is not None else np.array([]) + + # Ligand-Env terms + num_lig_atoms = len(lig_idxs) + num_host_atoms = len(env_idxs) + num_total_atoms = num_lig_atoms + len(env_idxs) + + lig_anchor_idxs += num_host_atoms + anchor_idxs = np.concatenate([np.arange(num_host_atoms), lig_anchor_idxs]) + + ixn_pot = potentials.SmoothcoreNonbondedInteractionGroup( + num_total_atoms, + lig_idxs.astype(np.int32), + anchor_idxs.astype(np.int32), + beta, + cutoff, + col_atom_idxs=env_idxs.astype(np.int32), + ) + + hg_ixn_params = jnp.concatenate([host_nb_params, guest_params_ixn_env]) + + return ixn_pot, hg_ixn_params diff --git a/timemachine/ff/handlers/nonbonded.py b/timemachine/ff/handlers/nonbonded.py index 37ccc91ee2..826c83ad7d 100644 --- a/timemachine/ff/handlers/nonbonded.py +++ b/timemachine/ff/handlers/nonbonded.py @@ -698,6 +698,7 @@ def static_parameterize(params, smirks, mol): assert q_params.shape[0] == mol.GetNumAtoms() # check that return shape is consistent with input mol return q_params + # return jnp.zeros_like(q_params) class AM1CCCIntraHandler(AM1CCCHandler): diff --git a/timemachine/lib/custom_ops.pyi b/timemachine/lib/custom_ops.pyi index f91fabf372..aae4c00524 100644 --- a/timemachine/lib/custom_ops.pyi +++ b/timemachine/lib/custom_ops.pyi @@ -240,6 +240,14 @@ class SegmentedWeightedRandomSampler_f64: def __init__(self, max_vals_per_segment: int, segments: int, seed: int) -> None: ... def sample(self, weights: List[List[float]]) -> List[int]: ... +class SmoothcoreNonbondedInteractionGroup_f32(Potential): + def __init__(self, num_atoms: int, row_atom_idxs_i: numpy.typing.NDArray[numpy.int32], anchor_idxs_i: numpy.typing.NDArray[numpy.int32], beta: float, cutoff: float, col_atom_idxs_i: Optional[numpy.typing.NDArray[numpy.int32]] = ..., disable_hilbert_sort: bool = ..., nblist_padding: float = ...) -> None: ... + def set_atom_idxs(self, row_atom_idxs: List[int], col_atom_idxs: List[int]) -> None: ... + +class SmoothcoreNonbondedInteractionGroup_f64(Potential): + def __init__(self, num_atoms: int, row_atom_idxs_i: numpy.typing.NDArray[numpy.int32], anchor_idxs_i: numpy.typing.NDArray[numpy.int32], beta: float, cutoff: float, col_atom_idxs_i: Optional[numpy.typing.NDArray[numpy.int32]] = ..., disable_hilbert_sort: bool = ..., nblist_padding: float = ...) -> None: ... + def set_atom_idxs(self, row_atom_idxs: List[int], col_atom_idxs: List[int]) -> None: ... + class SummedPotential(Potential): def __init__(self, potentials: List[Potential], params_sizes: List[int], parallel: bool = ...) -> None: ... def get_potentials(self) -> List[Potential]: ... diff --git a/timemachine/potentials/__init__.py b/timemachine/potentials/__init__.py index f19ad256a4..40e5973560 100644 --- a/timemachine/potentials/__init__.py +++ b/timemachine/potentials/__init__.py @@ -16,6 +16,7 @@ NonbondedPairList, NonbondedPairListPrecomputed, PeriodicTorsion, + SmoothcoreNonbondedInteractionGroup, SummedPotential, make_summed_potential, ) @@ -36,6 +37,7 @@ "Nonbonded", "NonbondedAllPairs", "NonbondedInteractionGroup", + "SmoothcoreNonbondedInteractionGroup", "NonbondedPairList", "NonbondedExclusions", "NonbondedPairListPrecomputed", diff --git a/timemachine/potentials/nonbonded.py b/timemachine/potentials/nonbonded.py index c32b7fcaba..58363870f7 100644 --- a/timemachine/potentials/nonbonded.py +++ b/timemachine/potentials/nonbonded.py @@ -126,8 +126,8 @@ def nonbonded_block_unsummed( dij = delta_r(ri, rj, box) dij = jnp.concatenate([dij, (w_i - w_j).reshape(*dij.shape[:-1], 1)], axis=-1) - dij = jnp.linalg.norm(dij, axis=-1) + sig_i = jnp.expand_dims(params_i[:, 1], axis=1) sig_j = jnp.expand_dims(params_j[:, 1], axis=0) eps_i = jnp.expand_dims(params_i[:, 2], axis=1) @@ -143,7 +143,7 @@ def nonbonded_block_unsummed( es = switched_direct_space_pme(dij, qij, beta, cutoff) lj = lennard_jones(dij, sig_ij, eps_ij) - + lj = jnp.where(eps_ij != 0, lj, 0) # needed for self consistent du_deps w.r.t. GPU nrgs = jnp.where(dij < cutoff, es + lj, 0) return cast(Array, nrgs) @@ -155,6 +155,56 @@ def nonbonded_block(xi, xj, box, params_i, params_j, beta, cutoff): return jnp.sum(nonbonded_block_unsummed(xi, xj, box, params_i, params_j, beta, cutoff)) +import itertools + + +def nonbonded_block_rescaled(xi, xj, box, params_i, params_j, beta, cutoff, anchors_i, groups_i): + """ + + Soft-core variation where w_i is applied by means of rescaling dummy atoms relative to an + arbitrary choice of the anchoring atom. If w_i is 0, then atom i is fully interacting. + If w_i is 1, then atom_i is fully non-interacting. If 0 < w_i < 1 then atom_i is scaled + relative to the location of the anchoring atom for the group that atom_i belongs to. Atoms + that are not part of the groups_i ignore their w_i values. + + Note that we assume that charges have *already* been balanced correctly in the upstream + interpolation code. + + Example: + + anchors_i = [0, 5, 9] + groups_i = [[3,4], [7], [6,8,2]] + w_i = [[0.2, 0.2], [0.8], [0.2, 0.2]] + + """ + + xi = jnp.array(xi) + params_i = jnp.array(params_i) + + assert len(anchors_i) == len(groups_i) + + # transform xi + xi_a = xi[np.repeat(anchors_i, [len(x) for x in groups_i])] + + group_idxs = jnp.array(list(itertools.chain(*groups_i))) + + excluded_idxs = list(set(range(len(xi))).difference(group_idxs.tolist())) + excluded_idxs = jnp.array(excluded_idxs) + + np.testing.assert_array_equal(params_i[excluded_idxs, -1], 0) + + xi_g = xi[group_idxs] + scale_i = jnp.expand_dims(1 - params_i[:, -1], axis=-1)[group_idxs] + # compute the displacement vector + r_ga = scale_i * (xi_g - xi_a) + xi = xi.at[group_idxs].set(xi_g + r_ga) + + # set q,s,e,w all to zero if scale_i is exactly zero. + params_i = params_i.at[jnp.argwhere(scale_i <= 0)].set(0) + + return nonbonded_block(xi, xj, box, params_i, params_j, beta, cutoff) + + def convert_exclusions_to_rescale_masks(exclusion_idxs, scales, N): """Converts exclusions from list format used in Nonbonded to mask format used in `nonbonded`""" @@ -881,3 +931,104 @@ def lj_interaction_group_energy(sig_ligand, eps_ligand, lj_prefactors): projection = vmap(basis_expand_lj_atom)(sig_ligand, eps_ligand) return jnp.sum(projection * lj_prefactors) + + +def smoothcore_charge_interpolation(lamb, charges_src, charges_dst, mol_a_idxs, mol_b_idxs): + # absorb then shrink + + net_charge_src = np.sum(charges_src) + net_charge_dst = np.sum(charges_dst) + + np.testing.assert_almost_equal(net_charge_src, net_charge_dst, decimal=5) + charges_interpolated = np.zeros_like(charges_src) + + charges_interpolated[mol_a_idxs] += (1 - lamb) * charges_src[mol_a_idxs] + charges_interpolated[mol_b_idxs] += lamb * charges_dst[mol_b_idxs] + + np.testing.assert_almost_equal(np.sum(charges_interpolated), np.sum(net_charge_src), decimal=5) + + return charges_interpolated + + +# (ytz): guesstimate of the minimal numerically stable eps/sig pair that is 1-step removable +# MIN_SMOOTHCORE_SIG_HALF = 0.05 +# MIN_SMOOTHCORE_EPS_SQRT = 0.10 + +MIN_SMOOTHCORE_SIG_HALF = 0.0 # additive combining rule +MIN_SMOOTHCORE_EPS_SQRT = 0.0 # multiplicate combining rule + + +def smoothcore_lj_interpolation(lamb, lj_src, lj_dst, mol_a_idxs, mol_b_idxs): + MIN_SIG_EPS = np.where(lamb > 0 and lamb < 1, [MIN_SMOOTHCORE_SIG_HALF, MIN_SMOOTHCORE_EPS_SQRT], [0.0, 0.0]) + # core atoms are interpolated linearly + # dummy atoms [sig,eps] are scaled down to [min(anchor_sig, dummy_sig), min(anchor_eps, dummy_eps)] except + # at the end-points, where they're both zero. + lj_src_stable = np.repeat([MIN_SIG_EPS], len(lj_src), axis=0) + lj_dst_stable = np.repeat([MIN_SIG_EPS], len(lj_dst), axis=0) + + lj_src_stable[mol_a_idxs] = lj_src[mol_a_idxs] + lj_dst_stable[mol_b_idxs] = lj_dst[mol_b_idxs] + lj_interpolated = np.repeat([MIN_SIG_EPS], len(lj_src), axis=0) + + lj_interpolated[mol_a_idxs] += np.clip((1 - lamb) * 10, 0, 1) * lj_src[mol_a_idxs] + lj_interpolated[mol_b_idxs] += np.clip(lamb * 10, 0, 1) * lj_dst[mol_b_idxs] + + return lj_interpolated + + +def smoothcore_ligand_param_interpolation(lamb, charges_src, charges_dst, lj_src, lj_dst, amm): + # When turning the dummy atoms, first turn on the parameters to full strength while in a shrunken state + # Then expand out the dummy atoms slowly + q_lamb = lamb + s_lamb = lamb + w_lamb = lamb + # p_lamb = np.clip(2 * lamb, 0, 1) + # w_lamb = np.clip((lamb - 0.5)*2, 0, 1) + + mol_a_idxs = amm.mol_a_idxs() + mol_b_idxs = amm.mol_b_idxs() + dummy_a_idxs = amm.dummy_a_idxs() + dummy_b_idxs = amm.dummy_b_idxs() + + charges = smoothcore_charge_interpolation(q_lamb, charges_src, charges_dst, mol_a_idxs, mol_b_idxs) + charges = charges.reshape((-1, 1)) + sig_eps = smoothcore_lj_interpolation(s_lamb, lj_src, lj_dst, mol_a_idxs, mol_b_idxs) + w_offsets = np.zeros(amm.get_num_atoms()) + w_offsets[dummy_a_idxs] = w_lamb + w_offsets[dummy_b_idxs] = 1 - w_lamb + w_offsets = w_offsets.reshape((-1, 1)) + ligand_params = np.hstack([charges, sig_eps, w_offsets]) + + return ligand_params + + +def smoothcore_nonbonded_interaction_group(conf, params, box, row_atom_idxs, col_atom_idxs, anchor_idxs, beta, cutoff): + conf = jnp.array(conf) + params = jnp.array(params) + + anchor_confs = conf[anchor_idxs] + rescale_factors = jnp.expand_dims(1 - params[:, -1], axis=-1) + conf = anchor_confs + rescale_factors * (conf - anchor_confs) + + row_xs = conf[row_atom_idxs] + col_xs = conf[col_atom_idxs] + + # reset w_coords + params = params.at[:, -1].set(0) + row_params = jnp.array(params[row_atom_idxs]) + col_params = jnp.array(params[col_atom_idxs]) + + return nonbonded_block(row_xs, col_xs, box, row_params, col_params, beta, cutoff) + + +def reference_nonbonded_interaction_group(conf, params, box, row_atom_idxs, col_atom_idxs, beta, cutoff): + conf = jnp.array(conf) + params = jnp.array(params) + + row_params = jnp.array(params[row_atom_idxs]) + col_params = jnp.array(params[col_atom_idxs]) + + row_xs = conf[row_atom_idxs] + col_xs = conf[col_atom_idxs] + + return nonbonded_block(row_xs, col_xs, box, row_params, col_params, beta, cutoff) diff --git a/timemachine/potentials/potentials.py b/timemachine/potentials/potentials.py index 6e7a2af5bf..78e15048a9 100644 --- a/timemachine/potentials/potentials.py +++ b/timemachine/potentials/potentials.py @@ -163,8 +163,6 @@ class NonbondedInteractionGroup(Potential): nblist_padding: float = 0.1 def __call__(self, conf: Conf, params: Params, box: Optional[Box]) -> float | Array: - num_atoms, _ = jnp.array(conf).shape - vdW, electrostatics = nonbonded.nonbonded_interaction_groups( conf, params, @@ -177,6 +175,30 @@ def __call__(self, conf: Conf, params: Params, box: Optional[Box]) -> float | Ar return jnp.sum(vdW) + jnp.sum(electrostatics) +@dataclass +class SmoothcoreNonbondedInteractionGroup(Potential): + num_atoms: int + row_atom_idxs: NDArray[np.int32] + anchor_idxs: NDArray[np.int32] + beta: float + cutoff: float + col_atom_idxs: Optional[NDArray[np.int32]] = None + disable_hilbert_sort: bool = False + nblist_padding: float = 0.1 + + def __call__(self, conf: Conf, params: Params, box: Optional[Box]) -> float | Array: + return nonbonded.smoothcore_nonbonded_interaction_group( + conf, + params, + box, + self.row_atom_idxs, + self.col_atom_idxs, + self.anchor_idxs, + self.beta, + self.cutoff, + ) + + @dataclass class NonbondedPairList(Potential): idxs: NDArray[np.int32] diff --git a/timemachine/testsystems/data/benzyl.sdf b/timemachine/testsystems/data/benzyl.sdf new file mode 100644 index 0000000000..dda43f1056 --- /dev/null +++ b/timemachine/testsystems/data/benzyl.sdf @@ -0,0 +1,274 @@ +benzene + 3D + Structure written by MMmdl. + 12 12 0 0 1 0 999 V2000 + -0.0059 0.0056 -0.0624 C 0 0 0 0 0 0 + 0.0329 -0.6666 -1.2782 C 0 0 0 0 0 0 + 0.0664 -2.0558 -1.3034 C 0 0 0 0 0 0 + 0.0612 -2.7728 -0.1128 C 0 0 0 0 0 0 + 0.0224 -2.1005 1.1030 C 0 0 0 0 0 0 + -0.0111 -0.7114 1.1282 C 0 0 0 0 0 0 + -0.0320 1.0872 -0.0428 H 0 0 0 0 0 0 + 0.0370 -0.1084 -2.2052 H 0 0 0 0 0 0 + 0.0966 -2.5792 -2.2500 H 0 0 0 0 0 0 + 0.0873 -3.8543 -0.1324 H 0 0 0 0 0 0 + 0.0183 -2.6587 2.0300 H 0 0 0 0 0 0 + -0.0413 -0.1880 2.0748 H 0 0 0 0 0 0 + 1 2 1 0 0 0 + 1 6 2 0 0 0 + 1 7 1 0 0 0 + 2 3 2 0 0 0 + 2 8 1 0 0 0 + 3 4 1 0 0 0 + 3 9 1 0 0 0 + 4 5 2 0 0 0 + 4 10 1 0 0 0 + 5 6 1 0 0 0 + 5 11 1 0 0 0 + 6 12 1 0 0 0 +M END +> +1 + +> +entry + +> +0 + +> +0 + +$$$$ +phenol + 3D + Structure written by MMmdl. + 13 13 0 0 1 0 999 V2000 + -0.0059 0.0056 -0.0624 C 0 0 0 0 0 0 + 0.0329 -0.6666 -1.2782 C 0 0 0 0 0 0 + 0.0664 -2.0558 -1.3034 C 0 0 0 0 0 0 + 0.0612 -2.7728 -0.1128 C 0 0 0 0 0 0 + 0.0224 -2.1005 1.1030 C 0 0 0 0 0 0 + -0.0111 -0.7114 1.1282 C 0 0 0 0 0 0 + -0.0320 1.0872 -0.0428 H 0 0 0 0 0 0 + 0.0370 -0.1084 -2.2052 H 0 0 0 0 0 0 + 0.0966 -2.5792 -2.2500 H 0 0 0 0 0 0 + 0.0873 -3.8543 -0.1324 H 0 0 0 0 0 0 + 0.0183 -2.6587 2.0300 H 0 0 0 0 0 0 + -0.0492 -0.0516 2.3214 O 0 0 0 0 0 0 + -0.0475 -0.6884 3.0399 H 0 0 0 0 0 0 + 1 2 1 0 0 0 + 1 6 2 0 0 0 + 1 7 1 0 0 0 + 2 3 2 0 0 0 + 2 8 1 0 0 0 + 3 4 1 0 0 0 + 3 9 1 0 0 0 + 4 5 2 0 0 0 + 4 10 1 0 0 0 + 5 6 1 0 0 0 + 5 11 1 0 0 0 + 6 12 1 0 0 0 + 12 13 1 0 0 0 +M END +> +2 + +> +entry + +> +0 + +> +0 + +$$$$ +benzene_t_butyl + 3D + Structure written by MMmdl. + 24 24 0 0 1 0 999 V2000 + -0.0059 0.0056 -0.0624 C 0 0 0 0 0 0 + 0.0329 -0.6666 -1.2782 C 0 0 0 0 0 0 + 0.0664 -2.0558 -1.3034 C 0 0 0 0 0 0 + 0.0612 -2.7728 -0.1128 C 0 0 0 0 0 0 + 0.0224 -2.1005 1.1030 C 0 0 0 0 0 0 + -0.0111 -0.7114 1.1282 C 0 0 0 0 0 0 + -0.0320 1.0872 -0.0428 H 0 0 0 0 0 0 + 0.0370 -0.1084 -2.2052 H 0 0 0 0 0 0 + 0.0966 -2.5792 -2.2500 H 0 0 0 0 0 0 + 0.0873 -3.8543 -0.1324 H 0 0 0 0 0 0 + 0.0183 -2.6587 2.0300 H 0 0 0 0 0 0 + -0.0541 0.0335 2.4754 C 0 0 0 0 0 0 + -0.0514 -0.9879 3.6279 C 0 0 0 0 0 0 + 1.1796 0.9471 2.5973 C 0 0 0 0 0 0 + -1.3336 0.8873 2.5502 C 0 0 0 0 0 0 + 1.1492 1.4743 3.5508 H 0 0 0 0 0 0 + 2.0858 0.3437 2.5451 H 0 0 0 0 0 0 + 1.1783 1.6709 1.7823 H 0 0 0 0 0 0 + -1.3640 1.4145 3.5037 H 0 0 0 0 0 0 + -1.3362 1.6111 1.7352 H 0 0 0 0 0 0 + -2.2075 0.2415 2.4646 H 0 0 0 0 0 0 + -0.0818 -0.4607 4.5814 H 0 0 0 0 0 0 + -0.9246 -1.6347 3.5429 H 0 0 0 0 0 0 + 0.8542 -1.5924 3.5762 H 0 0 0 0 0 0 + 1 2 1 0 0 0 + 1 6 2 0 0 0 + 1 7 1 0 0 0 + 2 3 2 0 0 0 + 2 8 1 0 0 0 + 3 4 1 0 0 0 + 3 9 1 0 0 0 + 4 5 2 0 0 0 + 4 10 1 0 0 0 + 5 6 1 0 0 0 + 5 11 1 0 0 0 + 6 12 1 0 0 0 + 12 13 1 0 0 0 + 12 14 1 0 0 0 + 12 15 1 0 0 0 + 13 22 1 0 0 0 + 13 23 1 0 0 0 + 13 24 1 0 0 0 + 14 16 1 0 0 0 + 14 17 1 0 0 0 + 14 18 1 0 0 0 + 15 19 1 0 0 0 + 15 20 1 0 0 0 + 15 21 1 0 0 0 +M END +> +3 + +> +entry + +> +0 + +> +0 + +$$$$ +benzene_t_oh + 3D + Structure written by MMmdl. + 18 18 0 0 1 0 999 V2000 + -0.0059 0.0056 -0.0624 C 0 0 0 0 0 0 + 0.0329 -0.6666 -1.2782 C 0 0 0 0 0 0 + 0.0664 -2.0558 -1.3034 C 0 0 0 0 0 0 + 0.0612 -2.7728 -0.1128 C 0 0 0 0 0 0 + 0.0224 -2.1005 1.1030 C 0 0 0 0 0 0 + -0.0111 -0.7114 1.1282 C 0 0 0 0 0 0 + -0.0320 1.0872 -0.0428 H 0 0 0 0 0 0 + 0.0370 -0.1084 -2.2052 H 0 0 0 0 0 0 + 0.0966 -2.5792 -2.2500 H 0 0 0 0 0 0 + 0.0873 -3.8543 -0.1324 H 0 0 0 0 0 0 + 0.0183 -2.6587 2.0300 H 0 0 0 0 0 0 + -0.0541 0.0335 2.4754 C 0 0 0 0 0 0 + -0.0516 -0.9017 3.5306 O 0 0 0 0 0 0 + 1.0755 0.8700 2.5870 O 0 0 0 0 0 0 + -1.2256 0.8152 2.5439 O 0 0 0 0 0 0 + -1.2524 1.2796 3.3837 H 0 0 0 0 0 0 + 1.0487 1.3343 3.4268 H 0 0 0 0 0 0 + -0.0784 -0.4374 4.3704 H 0 0 0 0 0 0 + 1 2 1 0 0 0 + 1 6 2 0 0 0 + 1 7 1 0 0 0 + 2 3 2 0 0 0 + 2 8 1 0 0 0 + 3 4 1 0 0 0 + 3 9 1 0 0 0 + 4 5 2 0 0 0 + 4 10 1 0 0 0 + 5 6 1 0 0 0 + 5 11 1 0 0 0 + 6 12 1 0 0 0 + 12 13 1 0 0 0 + 12 14 1 0 0 0 + 12 15 1 0 0 0 + 13 18 1 0 0 0 + 14 17 1 0 0 0 + 15 16 1 0 0 0 +M END +> +4 + +> +entry + +> +0 + +> +0 + +$$$$ +benzene_bicycle + 3D + Structure written by MMmdl. + 24 26 0 0 1 0 999 V2000 + -0.0059 0.0055 -0.0625 C 0 0 0 0 0 0 + 0.0329 -0.6665 -1.2782 C 0 0 0 0 0 0 + 0.0664 -2.0560 -1.3034 C 0 0 0 0 0 0 + 0.0612 -2.7727 -0.1128 C 0 0 0 0 0 0 + 0.0224 -2.1008 1.1028 C 0 0 0 0 0 0 + -0.0114 -0.7056 1.1357 C 0 0 0 0 0 0 + -0.0320 1.0871 -0.0428 H 0 0 0 0 0 0 + 0.0370 -0.1085 -2.2050 H 0 0 0 0 0 0 + 0.0966 -2.5791 -2.2500 H 0 0 0 0 0 0 + 0.0873 -3.8540 -0.1325 H 0 0 0 0 0 0 + 0.0183 -2.6587 2.0299 H 0 0 0 0 0 0 + -0.0527 -0.0266 2.4813 C 0 0 0 0 0 0 + -0.0587 -0.6834 3.6300 N 0 0 0 0 0 0 + -0.0982 0.2636 4.5389 C 0 0 0 0 0 0 + -0.1176 1.5222 3.9853 C 0 0 0 0 0 0 + -0.0879 1.3408 2.6283 O 0 0 0 0 0 0 + -0.1263 0.2547 6.0480 C 0 0 0 0 0 0 + -0.1631 2.6102 5.0358 C 0 0 0 0 0 0 + -0.1684 1.7629 6.3928 C 0 0 0 0 0 0 + -1.0125 -0.2801 6.4170 H 0 0 0 0 0 0 + 0.7698 -0.2379 6.4504 H 0 0 0 0 0 0 + -0.2062 2.4033 7.5965 O 0 0 0 0 0 0 + -0.1821 3.6481 4.7378 H 0 0 0 0 0 0 + -0.2260 3.3525 7.4541 H 0 0 0 0 0 0 + 1 2 1 0 0 0 + 1 6 2 0 0 0 + 1 7 1 0 0 0 + 2 3 2 0 0 0 + 2 8 1 0 0 0 + 3 4 1 0 0 0 + 3 9 1 0 0 0 + 4 5 2 0 0 0 + 4 10 1 0 0 0 + 5 6 1 0 0 0 + 5 11 1 0 0 0 + 6 12 1 0 0 0 + 12 13 2 0 0 0 + 12 16 1 0 0 0 + 13 14 1 0 0 0 + 14 15 2 0 0 0 + 14 17 1 0 0 0 + 15 16 1 0 0 0 + 15 18 1 0 0 0 + 17 19 1 0 0 0 + 17 20 1 0 0 0 + 17 21 1 0 0 0 + 18 19 2 0 0 0 + 18 23 1 0 0 0 + 19 22 1 0 0 0 + 22 24 1 0 0 0 +M END +> +5 + +> +entry + +> +0 + +> +0 + +$$$$