diff --git a/openmm_ramd/analyze/analyze_log.py b/openmm_ramd/analyze/analyze_log.py deleted file mode 100644 index b5e60fd..0000000 --- a/openmm_ramd/analyze/analyze_log.py +++ /dev/null @@ -1,118 +0,0 @@ -""" -analyze_log.py - -Read and parse a RAMD log file. Construct a series of milestoning models from -the trajectory fragments for a set of bins of RAMD force vectors. The -information from the milestoning models will be used in a sigma-RAMD model -that will allow one to expand the residence time in a Taylor series expansion -according to sigma-RAMD theory. -""" - -import sys -import argparse - -import numpy as np - -import parser -import milestoning -import sigma_ramd - -def analyze_log(ramd_log_file_list, num_milestones=10, num_xi_bins=5, - verbose=True): - if verbose: - print(f"Using {num_milestones} milestones.") - print(f"Using {num_xi_bins} angle bins.") - xi_bins_ranges = np.linspace(-1.0, 1.0, num_xi_bins+1) - trajectory_list = [] - forceOutFreq = None - forceRAMD = None - timeStep = None - for ramd_log_filename in ramd_log_file_list: - trajectories, forceOutFreq_, forceRAMD_, timeStep_ \ - = parser.parse_ramd_log_file(ramd_log_filename) - if forceOutFreq is None: - forceOutFreq = forceOutFreq_ - forceRAMD = forceRAMD_ - timeStep = timeStep_ - else: - assert forceOutFreq == forceOutFreq_, "RAMD logs contain differing forceOutFreq values." - assert forceRAMD == forceRAMD_, "RAMD logs contain differing forceRAMD values." - assert timeStep == timeStep_, "RAMD logs contain differing timeStep values." - - trajectory_list += trajectories - - if verbose: - print("Number of trajectories extracted from log file(s):", - len(trajectory_list)) - print("forceOutFreq:", forceOutFreq) - print("forceRAMD:", forceRAMD, "kcal/(mole * Angstrom)") - print("timeStep:", timeStep, "ps") - - # Each trajectory represents a set of frames between force changes - # Align the trajectories and convert to 1D, and xi values - one_dim_trajs = parser.condense_trajectories(trajectories) - - #Construct a milestoning model based on trajectory fragments - milestones, min_location, max_location, starting_cv_val \ - = milestoning.uniform_milestone_locations(one_dim_trajs, num_milestones) - - xi_bins = [] - for j in range(num_xi_bins): - xi_bins.append([]) - - for i, trajectory in enumerate(one_dim_trajs): - traj_xi = trajectory[0][1] - found_bin = False - for j in range(num_xi_bins): - if (traj_xi >= xi_bins_ranges[j]) \ - and (traj_xi <= xi_bins_ranges[j+1]): - xi_bins[j].append(trajectory) - found_bin = True - break - - if not found_bin: - raise Exception(f"xi value {traj_xi} not placed in bin.") - - frame_time = timeStep * forceOutFreq - xi_bin_time_profiles = np.zeros((num_xi_bins, num_milestones, num_milestones)) - for i in range(num_xi_bins): - if verbose: - print(f"num in bin {i}: {len(xi_bins[i])}") - xi_bin_trajs = xi_bins[i] - count_matrix, time_vector, rate_matrix, transition_matrix \ - = milestoning.make_milestoning_model(xi_bin_trajs, milestones, frame_time, num_milestones) - - # use the milestoning model to construct time profiles - - time_profile_matrix = milestoning.make_time_profiles(transition_matrix, time_vector, num_milestones) - xi_bin_time_profiles[i,:,:] = time_profile_matrix[:,:] - - # Use sigma-RAMD to analyze - beta=0.0 # TODO: fill out - calc = sigma_ramd.functional_expansion_1d_ramd( - xi_bin_time_profiles, force_constant=forceRAMD, beta=beta, - min_cv_val=min_location, max_cv_val=max_location, - starting_cv_val=starting_cv_val) - time_estimate = calc.make_second_order_time_estimate() - print("time_estimate:", time_estimate) - -if __name__ == "__main__": - argparser = argparse.ArgumentParser(description=__doc__) - argparser.add_argument( - "ramdLogFiles", metavar="RAMDLOGFILES", type=str, nargs="+", - help="The RAMD log files to open and parse.") - argparser.add_argument( - "-n", "--numMilestones", dest="numMilestones", default=10, - help="The number of milestones to use in the milestoning "\ - "calculation.", type=int) - argparser.add_argument( - "-a", "--numAngleBins", dest="numAngleBins", default=5, - help="The number of bins to divide the force vectors into.", type=int) - - args = argparser.parse_args() - args = vars(args) - ramdLogFiles = args["ramdLogFiles"] - numMilestones = args["numMilestones"] - numAngleBins = args["numAngleBins"] - - analyze_log(ramdLogFiles, numMilestones, numAngleBins) \ No newline at end of file diff --git a/openmm_ramd/analyze/milestoning.py b/openmm_ramd/analyze/milestoning.py deleted file mode 100644 index f12a92f..0000000 --- a/openmm_ramd/analyze/milestoning.py +++ /dev/null @@ -1,109 +0,0 @@ -""" -openmm_ramd/analyze/milestoning.py - -Prepare and utilize a milestoning model for RAMD simulation to use -in a sigma-RAMD analysis. -""" - -import numpy as np - -def uniform_milestone_locations(one_dim_trajs, num_milestones): - """ - Define a set of milestone locations uniformly spread. - """ - # First, locate the minimum and maximum positions along the CV - min_location = 9e9 - max_location = -9e9 - starting_cv_val = None - for i, trajectory in enumerate(one_dim_trajs): - for j, frame in enumerate(trajectory): - [cv_val, xi_val] = frame - if i == 0 and j == 0: - starting_cv_val = cv_val - if cv_val < min_location: - min_location = cv_val - if cv_val > max_location: - max_location = cv_val - - assert min_location < max_location, \ - "The minimum must be less than the maximum." - - assert starting_cv_val is not None, "No starting CV value found." - - span = max_location - min_location - h = span / (num_milestones + 1) - milestones = [] - for i in range(num_milestones): - milestone = h*(i+1) - milestones.append(milestone) - - return milestones, min_location, max_location, starting_cv_val - -def make_milestoning_model( - xi_bin_trajs, milestones, frame_time, num_milestones): - count_matrix = np.zeros((num_milestones, num_milestones), dtype=np.int32) - time_vector = np.zeros((num_milestones, 1)) - - for i, traj in enumerate(xi_bin_trajs): - src_milestone = None - src_time = 0.0 - new_cell_id = None - src_time = -frame_time - for j, frame in enumerate(traj): - [cv_val, xi_val] = frame - # Find what cell it's in - old_cell_id = new_cell_id - new_cell_id = num_milestones - src_time += frame_time - for k, milestone in enumerate(milestones): - if cv_val < milestone: - new_cell_id = k - break - if (old_cell_id is not None) and (new_cell_id != old_cell_id): - # then a milestone transition has happened - diff = new_cell_id - old_cell_id - dest_milestone = new_cell_id - (diff + 1) // 2 - if src_milestone != dest_milestone: - if src_milestone is not None: - assert src_milestone != dest_milestone - count_matrix[dest_milestone, src_milestone] += 1 - time_vector[src_milestone, 0] += src_time - - src_milestone = dest_milestone - src_time = 0.0 - - #print("count_matrix:", count_matrix) - #print("time_vector:", time_vector) - - transition_matrix = np.zeros((num_milestones, num_milestones)) - for i in range(num_milestones): # column index - column_sum = np.sum(count_matrix[:,i]) - for j in range(num_milestones): # row index - if column_sum == 0.0: - transition_matrix[j,i] = 0.0 - else: - transition_matrix[j,i] = count_matrix[j,i] / column_sum - - rate_matrix = np.zeros((num_milestones, num_milestones)) - for i in range(num_milestones): - for j in range(num_milestones): - if time_vector[j,0] == 0.0: - rate_matrix[i,j] = 0.0 - else: - rate_matrix[i,j] = count_matrix[i,j] / time_vector[j,0] - - #print("rate_matrix:", rate_matrix) - - return count_matrix, time_vector, transition_matrix, rate_matrix - -def make_time_profiles(transition_matrix, time_vector, num_milestones): - transit_time_matrix = np.zeros((num_milestones, num_milestones)) - identity = np.identity(num_milestones) - for i in range(num_milestones): - transition_matrix_with_sink = np.copy(transition_matrix[:,:]) - transition_matrix_with_sink[:,i] = 0.0 - geom_series = np.linalg.inv(identity-transition_matrix_with_sink) - result = time_vector.T @ geom_series - transit_time_matrix[i,:] = result[0,:] - - return transit_time_matrix \ No newline at end of file diff --git a/openmm_ramd/analyze/parser.py b/openmm_ramd/analyze/parser.py deleted file mode 100644 index 661047b..0000000 --- a/openmm_ramd/analyze/parser.py +++ /dev/null @@ -1,157 +0,0 @@ -""" -parser.py - -Parse RAMD log files, and use the log files to construct trajectories of -CVs. -""" - -import re -import os - -import numpy as np - -def parse_ramd_log_file(ramd_log_filename): - assert os.path.exists(ramd_log_filename), \ - f"File not found: {ramd_log_filename}." - trajectories = [] - trajectory = [] - n_frames = 0 - relative_COM_x = None - relative_COM_y = None - relative_COM_z = None - ligand_step = None - ligand_COM_x = None - ligand_COM_y = None - ligand_COM_z = None - with open(ramd_log_filename, "r") as f: - for i, line in enumerate(f.readlines()): - new_trajectory_frame = False - new_trajectory = False - # Determine the log output frequency - forceOutFreq_search = re.match("RAMD: forceOutFreq * (\d+)", line) - if forceOutFreq_search: - forceOutFreq = int(forceOutFreq_search.group(1)) - - forceRAMD_search = re.match("RAMD: forceRAMD * (\d+\.\d*)", line) - if forceRAMD_search: - forceRAMD = float(forceRAMD_search.group(1)) - - timeStep_search = re.match("RAMD: timeStep * (\d+\.\d*)", line) - if timeStep_search: - timeStep = float(timeStep_search.group(1)) - - # Find the lines that indicate the ligand COM - lig_com_search = re.match("RAMD FORCE: (\d+) > LIGAND COM IS: \[ *([+-]?\d+\.\d*[eE]?[+-]?\d*) +([+-]?\d+\.\d*[eE]?[+-]?\d*) +([+-]?\d+\.\d*[eE]?[+-]?\d*) *\]", line) - if lig_com_search: - old_ligand_step = ligand_step - ligand_step = int(lig_com_search.group(1)) - old_ligand_COM_x = ligand_COM_x - old_ligand_COM_y = ligand_COM_y - old_ligand_COM_z = ligand_COM_z - ligand_COM_x = float(lig_com_search.group(2)) - ligand_COM_y = float(lig_com_search.group(3)) - ligand_COM_z = float(lig_com_search.group(4)) - new_trajectory_frame = True - - rec_com_search = re.match("RAMD FORCE: (\d+) > PROTEIN COM IS: \[ *([+-]?\d+\.\d*[eE]?[+-]?\d*) +([+-]?\d+\.\d*[eE]?[+-]?\d*) +([+-]?\d+\.\d*[eE]?[+-]?\d*) *\]", line) - if rec_com_search: - receptor_step = int(rec_com_search.group(1)) - receptor_COM_x = float(rec_com_search.group(2)) - receptor_COM_y = float(rec_com_search.group(3)) - receptor_COM_z = float(rec_com_search.group(4)) - relative_COM_x = ligand_COM_x - receptor_COM_x - relative_COM_y = ligand_COM_y - receptor_COM_y - relative_COM_z = ligand_COM_z - receptor_COM_z - continue - - accel_search = re.match("RAMD: (\d+) >>> KEEP PREVIOUS ACCELERATION DIRECTION: *\[ *([+-]?\d+\.\d*[eE]?[+-]?\d*) +([+-]?\d+\.\d*[eE]?[+-]?\d*) +([+-]?\d+\.\d*[eE]?[+-]?\d*) *\]", line) - if accel_search: - accel_step = int(accel_search.group(1)) - accel_COM_x = float(accel_search.group(2)) - accel_COM_y = float(accel_search.group(3)) - accel_COM_z = float(accel_search.group(4)) - continue - - change_accel_search = re.match("RAMD: (\d+) >>> CHANGE ACCELERATION DIRECTION TO: *\[ *([+-]?\d+\.\d*[eE]?[+-]?\d*) +([+-]?\d+\.\d*[eE]?[+-]?\d*) +([+-]?\d+\.\d*[eE]?[+-]?\d*) *\]", line) - if change_accel_search: - accel_step = int(change_accel_search.group(1)) - #accel_COM_x = float(change_accel_search.group(2)) - #accel_COM_y = float(change_accel_search.group(3)) - #accel_COM_z = float(change_accel_search.group(4)) - old_ligand_step = ligand_step - new_trajectory_frame = True - new_trajectory = True - - if new_trajectory_frame: - if n_frames > 0: - if relative_COM_x is not None: - assert old_ligand_step == receptor_step, \ - "The lines defining receptor and ligand COM are not the same RAMD step." - frame = [relative_COM_x, relative_COM_y, relative_COM_z, - accel_COM_x, accel_COM_y, accel_COM_z] - else: - frame = [old_ligand_COM_x, old_ligand_COM_y, old_ligand_COM_z, - accel_COM_x, accel_COM_y, accel_COM_z] - - assert old_ligand_step == accel_step, \ - "The lines defining ligand COM and accel vector are not the same RAMD step." - - trajectory.append(frame) - - n_frames += 1 - - if new_trajectory: - - trajectories.append(trajectory) - trajectory = [] - n_frames = 0 - - return trajectories, forceOutFreq, forceRAMD, timeStep - -def condense_trajectories(trajectories): - """ - Reduce trajectories to 1D, and compute xi values - """ - # First, find the average position of all ligand positions, this will - # define our 1D CV - sum_ligx = 0.0 - sum_ligy = 0.0 - sum_ligz = 0.0 - total_points = 0 - - for i, trajectory in enumerate(trajectories): - start_accx = None - for j, frame in enumerate(trajectory): - [ligx, ligy, ligz, accx, accy, accz] = frame - sum_ligx += ligx - sum_ligy += ligy - sum_ligz += ligz - total_points += 1 - if start_accx is None: - start_accx = accx - else: - assert start_accx == start_accx, \ - "Every frame must have the same acceleration vector" - - avg_ligx = sum_ligx / total_points - avg_ligy = sum_ligy / total_points - avg_ligz = sum_ligz / total_points - avg_lig_length = np.sqrt(avg_ligx**2 + avg_ligy**2 + avg_ligz**2) - norm_avgx = avg_ligx / avg_lig_length - norm_avgy = avg_ligy / avg_lig_length - norm_avgz = avg_ligz / avg_lig_length - - one_dim_trajs = [] - for i, trajectory in enumerate(trajectories): - one_dim_traj = [] - for j, frame in enumerate(trajectory): - [ligx, ligy, ligz, accx, accy, accz] = frame - # dot product between frame pos and avg_lig - cv_val = (ligx*norm_avgx + ligy*norm_avgy + ligz*norm_avgz) - # dot product between frame acc and avg_lig - xi_val = (accx*norm_avgx + accy*norm_avgy + accz*norm_avgz) - one_dim_traj.append([cv_val, xi_val]) - - one_dim_trajs.append(one_dim_traj) - - return one_dim_trajs \ No newline at end of file diff --git a/openmm_ramd/analyze/sigma_ramd.py b/openmm_ramd/analyze/sigma_ramd.py deleted file mode 100644 index 03af553..0000000 --- a/openmm_ramd/analyze/sigma_ramd.py +++ /dev/null @@ -1,178 +0,0 @@ -""" -Run a sigma-RAMD calculation on the time profile data generated from -a RAMD log file, or perhaps some other means. -""" - -import numpy as np -import scipy.integrate as sp_int - -class functional_expansion_1d_ramd(): - def __init__(self, xi_bin_time_profiles, force_constant, beta, - min_cv_val, max_cv_val, starting_cv_val): - self.xi_bin_time_profiles = xi_bin_time_profiles - [num_xi_bins, num_milestones, dummy] = xi_bin_time_profiles.shape - #self.num_milestones = num_milestones - self.n = num_milestones - self.n_xi = num_xi_bins - self.A = force_constant - self.beta = beta - self.a = min_cv_val - self.b = starting_cv_val - self.c = max_cv_val - self.h_z = (self.c - self.a) / (self.n-1) - self.xi_a = -1.0 - self.xi_b = 1.0 - self.xi_span = self.xi_b - self.xi_a - self.h_xi = self.xi_span / (self.n_xi-1) - self.z_s = np.linspace(self.a, self.c, self.n) - self.xi_s = np.linspace(self.xi_a, self.xi_b, self.n_xi) - return - - - def make_zeroth_order_terms(self): - # inverse times for each slice of xi - self.flux_array = np.zeros(self.n_xi) - start_index = int((self.b - self.a)/self.h_z) - #print("start_index:", start_index) - for i, xi in enumerate(self.xi_s): - time_start_to_finish = self.xi_bin_time_profiles[i, -1, start_index] - #print("i:", i, "time_start_to_finish:", time_start_to_finish) - self.flux_array[i] = 1.0 / time_start_to_finish - - self.T = self.xi_span / sp_int.simps(self.flux_array, dx=self.h_xi) - #print("T:", self.T) - - def make_first_order_terms(self): - time_integrals_xi = np.zeros(self.n_xi) - for i, xi in enumerate(self.xi_s): - time_span_b_to_z = np.zeros(self.n) - for j, z in enumerate(self.z_s): - time_span_b_to_z[j] = self.xi_bin_time_profiles[i, j, 0] - integrated_time_b_to_z = sp_int.simps(time_span_b_to_z, dx=self.h_z) - - boundary_term1 = (self.c-self.a)*self.xi_bin_time_profiles[i, -1, 0] - - time_span_z_to_c = np.zeros(self.n) - for j, z in enumerate(self.z_s): - time_span_z_to_c[j] = self.xi_bin_time_profiles[i, -1, j] - integrated_time_z_to_c = sp_int.simps(time_span_z_to_c, dx=self.h_z) - - time_integrals_xi[i] \ - = (boundary_term1 - integrated_time_b_to_z \ - - integrated_time_z_to_c) * self.flux_array[i]**2 * xi - - time_integral = sp_int.simps(time_integrals_xi, dx=self.h_xi) \ - * self.T**2 / self.xi_span - self.first_order_correction = -self.beta*self.A*time_integral*self.T - - return - - def make_second_order_terms(self): - time_integrals_xi = np.zeros(self.n_xi) - for i, xi in enumerate(self.xi_s): - # Test the on-diagonal b to z - time_span_on_diag_b_to_z = np.zeros(self.n) - for j1, z in enumerate(self.z_s): - integral_span_inner1 = np.zeros(j1+1) - for j2 in range(j1+1): - - integral_span_inner1[j2] \ - = self.xi_bin_time_profiles[i, j2, 0] - - time_b_to_z = sp_int.simps(integral_span_inner1, dx=self.h_z) - time_span_on_diag_b_to_z[j1] = time_b_to_z - - integrated_time_b_to_z = 2.0 \ - * sp_int.simps(time_span_on_diag_b_to_z, dx=self.h_z) - - # boundary term c**2 - boundary_term_b_to_c1 = (self.c-self.a)**2 \ - * self.xi_bin_time_profiles[i, -1, 0] - - # boundary term 2*c - integral_span_inner1 = np.zeros(self.n) - for j, z in enumerate(self.z_s): - integral_span_inner1[j] = self.xi_bin_time_profiles[i, j, 0] - boundary_term_b_to_c2 = 2.0 * (self.c-self.a) \ - * sp_int.simps(integral_span_inner1, dx=self.h_z) - on_diag_b_to_z = boundary_term_b_to_c1 - boundary_term_b_to_c2 \ - + integrated_time_b_to_z - - # on diagonal z to c - time_span_z_to_c = np.zeros(self.n) - for j1, z in enumerate(self.z_s): - integral_span_inner1 = np.zeros(self.n-j1) - for j2 in range(j1, self.n): - index2 = self.n-j2-1 - - integral_span_inner1[index2] = self.xi_bin_time_profiles[i, -1, j2] - - time_z_to_c = sp_int.simps(integral_span_inner1, dx=self.h_z) - time_span_z_to_c[j1] = time_z_to_c - - integrated_time_z_to_c = 2.0 \ - * sp_int.simps(time_span_z_to_c, dx=self.h_z) - on_diag_est = on_diag_b_to_z + integrated_time_z_to_c - - # Test the off-diagonal b to z - - time_span_off_diag_b_to_z = np.zeros(self.n) - for j1, z in enumerate(self.z_s): - integral_span_inner1 = np.zeros(j1+1) - for j2 in range(j1+1): - - integral_span_inner1[j2] \ - = self.xi_bin_time_profiles[i, j2, 0] - - time_b_to_z = sp_int.simps(integral_span_inner1, dx=self.h_z) - time_span_off_diag_b_to_z[j1] = time_b_to_z - - integrated_time_b_to_z \ - = sp_int.simps(time_span_off_diag_b_to_z, dx=self.h_z) - - # off diagonal boundary term c**2 - boundary_term_b_to_c1 = (self.c-self.a)**2 \ - * self.xi_bin_time_profiles[i, -1, 0] - - # off diagonal boundary term 2*c - integral_span_inner1 = np.zeros(self.n) - for j, z in enumerate(self.z_s): - integral_span_inner1[j] = self.xi_bin_time_profiles[i, j, 0] - boundary_term_b_to_c2 = 2.0 * (self.c-self.a) \ - * sp_int.simps(integral_span_inner1, dx=self.h_z) - - off_diag_b_to_z = boundary_term_b_to_c1 - boundary_term_b_to_c2 \ - + time_b_to_z - - # off diagonal z to c - time_span_z_to_c = np.zeros(self.n) - for j1, z in enumerate(self.z_s): - integral_span_inner1 = np.zeros(self.n-j1) - for j2 in range(j1, self.n): - index2 = self.n-j2-1 - - integral_span_inner1[index2] \ - = self.xi_bin_time_profiles[i, -1, j2] - - time_z_to_c = sp_int.simps(integral_span_inner1, dx=self.h_z) - time_span_z_to_c[j1] = time_z_to_c - - time_z_to_c = sp_int.simps(time_span_z_to_c, dx=self.h_z) - off_diag_est = off_diag_b_to_z + time_z_to_c - time_integrals_xi[i] = (-on_diag_est + off_diag_est) \ - * self.flux_array[i]**2 * xi - - # Down here, integrate over u values - time_integral = sp_int.simps(time_integrals_xi, dx=self.h_xi) \ - * self.T**2 / self.xi_span - #print("time_integral o2:", time_integral) - # TODO: missing a lot of stuff here... - self.second_order_correction = self.beta**2*self.A**2*time_integral/2.0 - - def make_second_order_time_estimate(self): - self.make_zeroth_order_terms() - self.make_first_order_terms() - self.make_second_order_terms() - time_estimate = self.T*np.exp(self.first_order_correction)\ - *np.exp(self.second_order_correction) - return time_estimate \ No newline at end of file